From d60c459e65124fed1effa64350dba348c7396a11 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sun, 9 Oct 2022 23:09:28 +0200
Subject: [PATCH 01/31] Type test_xarray.py

---
 tests/test_xarray.py | 119 +++++++++++++++++++++++++++++--------------
 1 file changed, 81 insertions(+), 38 deletions(-)

diff --git a/tests/test_xarray.py b/tests/test_xarray.py
index 6669830b5..3d97092d4 100644
--- a/tests/test_xarray.py
+++ b/tests/test_xarray.py
@@ -1,3 +1,7 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import numpy as np
 import pandas as pd
 import pytest
@@ -23,12 +27,17 @@
 except ValueError:
     pass
 
+if TYPE_CHECKING:
+    from flox.core import T_Agg, T_Engine
+
 
 @pytest.mark.parametrize("reindex", [None, False, True])
 @pytest.mark.parametrize("min_count", [None, 1, 3])
 @pytest.mark.parametrize("add_nan", [True, False])
 @pytest.mark.parametrize("skipna", [True, False])
-def test_xarray_reduce(skipna, add_nan, min_count, engine, reindex):
+def test_xarray_reduce(
+    skipna: bool, add_nan: bool, min_count: int | None, engine: T_Engine, reindex: bool | None
+) -> None:
     arr = np.ones((4, 12))
 
     if add_nan:
@@ -76,7 +85,9 @@ def test_xarray_reduce(skipna, add_nan, min_count, engine, reindex):
 # TODO: sort
 @pytest.mark.parametrize("pass_expected_groups", [True, False])
 @pytest.mark.parametrize("chunk", (True, False))
-def test_xarray_reduce_multiple_groupers(pass_expected_groups, chunk, engine):
+def test_xarray_reduce_multiple_groupers(
+    pass_expected_groups: bool, chunk: bool, engine: T_Engine
+) -> None:
     if not has_dask and chunk:
         pytest.skip()
 
@@ -100,32 +111,43 @@ def test_xarray_reduce_multiple_groupers(pass_expected_groups, chunk, engine):
         coords={"labels": ["a", "b", "c", "f"], "labels2": [1, 2]},
     ).expand_dims(z=4)
 
-    kwargs = dict(func="count", engine=engine)
+    func = "count"
+    expected_groups = None
     if pass_expected_groups:
-        kwargs["expected_groups"] = (expected.labels.data, expected.labels2.data)
+        expected_groups = (expected.labels.data, expected.labels2.data)
 
     with raise_if_dask_computes():
-        actual = xarray_reduce(da, da.labels, da.labels2, **kwargs)
+        actual = xarray_reduce(
+            da, da.labels, da.labels2, func=func, expected_groups=expected_groups, engine=engine
+        )
     xr.testing.assert_identical(expected, actual)
 
     with raise_if_dask_computes():
-        actual = xarray_reduce(da, "labels", da.labels2, **kwargs)
+        actual = xarray_reduce(
+            da, "labels", da.labels2, func=func, expected_groups=expected_groups, engine=engine
+        )
     xr.testing.assert_identical(expected, actual)
 
     with raise_if_dask_computes():
-        actual = xarray_reduce(da, "labels", "labels2", **kwargs)
+        actual = xarray_reduce(
+            da, "labels", "labels2", func=func, expected_groups=expected_groups, engine=engine
+        )
     xr.testing.assert_identical(expected, actual)
 
     if pass_expected_groups:
-        kwargs["expected_groups"] = (expected.labels2.data, expected.labels.data)
+        expected_groups = (expected.labels2.data, expected.labels.data)
     with raise_if_dask_computes():
-        actual = xarray_reduce(da, "labels2", "labels", **kwargs)
+        actual = xarray_reduce(
+            da, "labels2", "labels", func=func, expected_groups=expected_groups, engine=engine
+        )
     xr.testing.assert_identical(expected.transpose("z", "labels2", "labels"), actual)
 
 
 @pytest.mark.parametrize("pass_expected_groups", [True, False])
 @pytest.mark.parametrize("chunk", (True, False))
-def test_xarray_reduce_multiple_groupers_2(pass_expected_groups, chunk, engine):
+def test_xarray_reduce_multiple_groupers_2(
+    pass_expected_groups: bool, chunk: bool, engine: T_Engine
+) -> None:
     if not has_dask and chunk:
         pytest.skip()
 
@@ -151,20 +173,31 @@ def test_xarray_reduce_multiple_groupers_2(pass_expected_groups, chunk, engine):
         },
     ).expand_dims(z=4, x=2)
 
-    kwargs = dict(func="count", engine=engine)
+    func = "count"
+    expected_groups = None
     if pass_expected_groups:
-        kwargs["expected_groups"] = (expected.labels.data, expected.labels.data)
+        expected_groups = (expected.labels.data, expected.labels.data)
 
     with raise_if_dask_computes():
-        actual = xarray_reduce(da, "labels", "labels2", **kwargs)
+        actual = xarray_reduce(
+            da, "labels", "labels2", func=func, expected_groups=expected_groups, engine=engine
+        )
     xr.testing.assert_identical(expected, actual)
 
     with pytest.raises(NotImplementedError):
-        xarray_reduce(da, "labels", "labels2", dim=..., **kwargs)
+        xarray_reduce(
+            da,
+            "labels",
+            "labels2",
+            dim=...,
+            func=func,
+            expected_groups=expected_groups,
+            engine=engine,
+        )
 
 
 @requires_dask
-def test_dask_groupers_error():
+def test_dask_groupers_error() -> None:
     da = xr.DataArray(
         [1.0, 2.0], dims="x", coords={"labels": ("x", [1, 2]), "labels2": ("x", [1, 2])}
     )
@@ -173,7 +206,7 @@ def test_dask_groupers_error():
 
 
 @requires_dask
-def test_xarray_reduce_single_grouper(engine):
+def test_xarray_reduce_single_grouper(engine: T_Engine) -> None:
 
     # DataArray
     ds = xr.tutorial.open_dataset("rasm", chunks={"time": 9})
@@ -218,7 +251,7 @@ def test_xarray_reduce_single_grouper(engine):
     xr.testing.assert_allclose(actual, expected)
 
 
-def test_xarray_reduce_errors():
+def test_xarray_reduce_errors() -> None:
 
     da = xr.DataArray(np.ones((12,)), dims="x")
     by = xr.DataArray(np.ones((12,)), dims="x")
@@ -238,7 +271,7 @@ def test_xarray_reduce_errors():
 @pytest.mark.parametrize("isdask", [True, False])
 @pytest.mark.parametrize("dataarray", [True, False])
 @pytest.mark.parametrize("chunklen", [27, 4 * 31 + 1, 4 * 31 + 20])
-def test_xarray_resample(chunklen, isdask, dataarray, engine):
+def test_xarray_resample(chunklen: int, isdask: bool, dataarray: bool, engine: T_Engine) -> None:
     if isdask:
         if not has_dask:
             pytest.skip()
@@ -256,7 +289,7 @@ def test_xarray_resample(chunklen, isdask, dataarray, engine):
 
 
 @requires_dask
-def test_xarray_resample_dataset_multiple_arrays(engine):
+def test_xarray_resample_dataset_multiple_arrays(engine: T_Engine) -> None:
     # regression test for #35
     times = pd.date_range("2000", periods=5)
     foo = xr.DataArray(range(5), dims=["time"], coords=[times], name="foo")
@@ -289,7 +322,7 @@ def test_xarray_resample_dataset_multiple_arrays(engine):
         [(10,), (10,)],
     ],
 )
-def test_rechunk_for_blockwise(inchunks, expected):
+def test_rechunk_for_blockwise(inchunks: tuple[int, ...], expected: tuple[int, ...]) -> None:
     labels = np.array([1, 1, 1, 2, 2, 3, 3, 5, 5, 5])
 
     da = xr.DataArray(dask.array.ones((10,), chunks=inchunks), dims="x", name="foo")
@@ -310,7 +343,7 @@ def test_rechunk_for_blockwise(inchunks, expected):
 # TODO: dim=None, dim=Ellipsis, groupby unindexed dim
 
 
-def test_groupby_duplicate_coordinate_labels(engine):
+def test_groupby_duplicate_coordinate_labels(engine: T_Engine) -> None:
     # fix for http://stackoverflow.com/questions/38065129
     array = xr.DataArray([1, 2, 3], [("x", [1, 1, 2])])
     expected = xr.DataArray([3, 3], [("x", [1, 2])])
@@ -318,7 +351,7 @@ def test_groupby_duplicate_coordinate_labels(engine):
     assert_equal(expected, actual)
 
 
-def test_multi_index_groupby_sum(engine):
+def test_multi_index_groupby_sum(engine: T_Engine) -> None:
     # regression test for xarray GH873
     ds = xr.Dataset(
         {"foo": (("x", "y", "z"), np.ones((3, 4, 2)))},
@@ -342,7 +375,7 @@ def test_multi_index_groupby_sum(engine):
 
 
 @pytest.mark.parametrize("chunks", (None, 2))
-def test_xarray_groupby_bins(chunks, engine):
+def test_xarray_groupby_bins(chunks, engine: T_Engine) -> None:
     array = xr.DataArray([1, 1, 1, 1, 1], dims="x")
     labels = xr.DataArray([1, 1.5, 1.9, 2, 3], dims="x", name="labels")
 
@@ -352,16 +385,17 @@ def test_xarray_groupby_bins(chunks, engine):
         array = array.chunk({"x": chunks})
         labels = labels.chunk({"x": chunks})
 
-    kwargs = dict(
-        dim="x",
-        func="count",
-        engine=engine,
-        expected_groups=np.array([1, 2, 4, 5]),
-        isbin=True,
-        fill_value=0,
-    )
     with raise_if_dask_computes():
-        actual = xarray_reduce(array, labels, **kwargs)
+        actual = xarray_reduce(
+            array,
+            labels,
+            dim="x",
+            func="count",
+            engine=engine,
+            expected_groups=np.array([1, 2, 4, 5]),
+            isbin=True,
+            fill_value=0,
+        )
     expected = xr.DataArray(
         np.array([3, 1, 0]),
         dims="labels_bins",
@@ -374,7 +408,16 @@ def test_xarray_groupby_bins(chunks, engine):
     labels = labels.expand_dims(y=2).copy()
     labels.data[-1, -1] = np.nan
     with raise_if_dask_computes():
-        actual = xarray_reduce(array, labels, **kwargs)
+        actual = xarray_reduce(
+            array,
+            labels,
+            dim="x",
+            func="count",
+            engine=engine,
+            expected_groups=np.array([1, 2, 4, 5]),
+            isbin=True,
+            fill_value=0,
+        )
     expected = xr.DataArray(
         np.array([[[3, 1, 0]] * 3, [[3, 0, 0]] * 3]),
         dims=("y", "z", "labels_bins"),
@@ -384,7 +427,7 @@ def test_xarray_groupby_bins(chunks, engine):
 
 
 @requires_dask
-def test_func_is_aggregation():
+def test_func_is_aggregation() -> None:
     from flox.aggregations import mean
 
     ds = xr.tutorial.open_dataset("rasm", chunks={"time": 9})
@@ -400,7 +443,7 @@ def test_func_is_aggregation():
 
 
 @requires_dask
-def test_cache():
+def test_cache() -> None:
     pytest.importorskip("cachey")
 
     from flox.cache import cache
@@ -423,7 +466,7 @@ def test_cache():
 
 @pytest.mark.parametrize("use_cftime", [True, False])
 @pytest.mark.parametrize("func", ["count", "mean"])
-def test_datetime_array_reduce(use_cftime, func, engine):
+def test_datetime_array_reduce(use_cftime: bool, func: str, engine: T_Engine) -> None:
 
     time = xr.DataArray(
         xr.date_range("2009-01-01", "2012-12-31", use_cftime=use_cftime),
@@ -436,7 +479,7 @@ def test_datetime_array_reduce(use_cftime, func, engine):
 
 
 @requires_dask
-def test_groupby_bins_indexed_coordinate():
+def test_groupby_bins_indexed_coordinate() -> None:
     ds = (
         xr.tutorial.open_dataset("air_temperature")
         .isel(time=slice(100))
@@ -457,7 +500,7 @@ def test_groupby_bins_indexed_coordinate():
 
 
 @pytest.mark.parametrize("chunk", (True, False))
-def test_mixed_grouping(chunk):
+def test_mixed_grouping(chunk: bool) -> None:
     if not has_dask and chunk:
         pytest.skip()
     # regression test for https://github.com/xarray-contrib/flox/pull/111

From 6d22bb00b692cde412505b4f1f154550f237b374 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sun, 9 Oct 2022 23:09:45 +0200
Subject: [PATCH 02/31] Fix typing

---
 flox/xarray.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/flox/xarray.py b/flox/xarray.py
index 5f87bafe6..59047492d 100644
--- a/flox/xarray.py
+++ b/flox/xarray.py
@@ -19,6 +19,8 @@
 from .xrutils import _contains_cftime_datetimes, _to_pytimedelta, datetime_to_numeric
 
 if TYPE_CHECKING:
+    from .core import T_Engine, T_Method
+
     from xarray.core.resample import Resample
     from xarray.core.types import T_DataArray, T_Dataset
 
@@ -63,8 +65,8 @@ def xarray_reduce(
     dim: Dims | ellipsis = None,
     split_out: int = 1,
     fill_value=None,
-    method: str = "map-reduce",
-    engine: str = "numpy",
+    method: T_Method = "map-reduce",
+    engine: T_Engine = "numpy",
     keep_attrs: bool | None = True,
     skipna: bool | None = None,
     min_count: int | None = None,

From 42826a840fe63279b7e0ef91e98afa987c5ad0da Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 9 Oct 2022 21:10:49 +0000
Subject: [PATCH 03/31] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 flox/xarray.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flox/xarray.py b/flox/xarray.py
index 59047492d..dddd5b363 100644
--- a/flox/xarray.py
+++ b/flox/xarray.py
@@ -19,11 +19,11 @@
 from .xrutils import _contains_cftime_datetimes, _to_pytimedelta, datetime_to_numeric
 
 if TYPE_CHECKING:
-    from .core import T_Engine, T_Method
-
     from xarray.core.resample import Resample
     from xarray.core.types import T_DataArray, T_Dataset
 
+    from .core import T_Engine, T_Method
+
     Dims = Union[str, Iterable[Hashable], None]
 
 

From 91a5bd2ad6aee74fcda522f5f2f0378854cbdf35 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Mon, 10 Oct 2022 00:22:03 +0200
Subject: [PATCH 04/31] Add type hints to test_core.py

---
 tests/test_core.py | 320 +++++++++++++++++++++++++++++++++------------
 1 file changed, 239 insertions(+), 81 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 25660e734..d232d0871 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from functools import reduce
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
 import pandas as pd
@@ -67,7 +67,7 @@ def dask_array_ones(*args):
 )
 
 if TYPE_CHECKING:
-    from flox.core import T_Engine, T_ExpectedGroupsOpt, T_Func2
+    from flox.core import T_Engine, T_ExpectedGroupsOpt, T_Agg, T_Method
 
 
 def test_alignment_error():
@@ -108,7 +108,7 @@ def test_alignment_error():
 )
 def test_groupby_reduce(
     engine: T_Engine,
-    func: T_Func2,
+    func: T_Agg,
     array: np.ndarray,
     by: np.ndarray,
     expected: list[float],
@@ -146,10 +146,12 @@ def test_groupby_reduce(
     assert_equal(expected_result, result)
 
 
-def gen_array_by(size, func):
+def gen_array_by(
+    size: tuple[int, ...], func: str
+) -> tuple[np.ndarray[Any, Any], np.ndarray[Any, Any]]:
     by = np.ones(size[-1])
     rng = np.random.default_rng(12345)
-    array = rng.random(size)
+    array: np.ndarray[Any, Any] = rng.random(size)
     if "nan" in func and "nanarg" not in func:
         array[[1, 4, 5], ...] = np.nan
     elif "nanarg" in func and len(size) > 1:
@@ -164,7 +166,9 @@ def gen_array_by(size, func):
 @pytest.mark.parametrize("size", ((12,), (12, 9)))
 @pytest.mark.parametrize("add_nan_by", [True, False])
 @pytest.mark.parametrize("func", ALL_FUNCS)
-def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine):
+def test_groupby_reduce_all(
+    nby, size: tuple[int, ...], chunks, func: str, add_nan_by: bool, engine: T_Engine
+) -> None:
     if chunks is not None and not has_dask:
         pytest.skip()
     if "arg" in func and engine == "flox":
@@ -173,15 +177,14 @@ def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine):
     array, by = gen_array_by(size, func)
     if chunks:
         array = dask.array.from_array(array, chunks=chunks)
-    by = (by,) * nby
-    by = [b + idx for idx, b in enumerate(by)]
+    bys = [by] * nby
+    bys = [b + idx for idx, b in enumerate(bys)]
     if add_nan_by:
         for idx in range(nby):
-            by[idx][2 * idx : 2 * idx + 3] = np.nan
-    by = tuple(by)
-    nanmask = reduce(np.logical_or, (np.isnan(b) for b in by))
+            bys[idx][2 * idx : 2 * idx + 3] = np.nan
+    nanmask = reduce(np.logical_or, (np.isnan(b) for b in bys))
 
-    finalize_kwargs = [{}]
+    finalize_kwargs: list[dict[str, Any]] = [{}]
     if "var" in func or "std" in func:
         finalize_kwargs = finalize_kwargs + [{"ddof": 1}, {"ddof": 0}]
         fill_value = np.nan
@@ -189,7 +192,6 @@ def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine):
         fill_value = None
 
     for kwargs in finalize_kwargs:
-        flox_kwargs = dict(func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value)
         with np.errstate(invalid="ignore", divide="ignore"):
             if "arg" in func and add_nan_by:
                 array[..., nanmask] = np.nan
@@ -199,7 +201,9 @@ def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine):
         for _ in range(nby):
             expected = np.expand_dims(expected, -1)
 
-        actual, *groups = groupby_reduce(array, *by, **flox_kwargs)
+        actual, *groups = groupby_reduce(
+            array, *bys, func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value
+        )
         assert actual.ndim == (array.ndim + nby - 1)
         assert expected.ndim == (array.ndim + nby - 1)
         expected_groups = tuple(np.array([idx + 1.0]) for idx in range(nby))
@@ -211,10 +215,20 @@ def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine):
 
         if not has_dask:
             continue
-        for method in ["map-reduce", "cohorts", "split-reduce"]:
+
+        methods: list[T_Method] = ["map-reduce", "cohorts", "split-reduce"]
+        for method in methods:
             if "arg" in func and method != "map-reduce":
                 continue
-            actual, *groups = groupby_reduce(array, *by, method=method, **flox_kwargs)
+            actual, *groups = groupby_reduce(
+                array,
+                *bys,
+                method=method,
+                func=func,
+                engine=engine,
+                finalize_kwargs=kwargs,
+                fill_value=fill_value,
+            )
             for actual_group, expect in zip(groups, expected_groups):
                 assert_equal(actual_group, expect)
             if "arg" in func:
@@ -225,7 +239,7 @@ def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine):
 @requires_dask
 @pytest.mark.parametrize("size", ((12,), (12, 5)))
 @pytest.mark.parametrize("func", ("argmax", "nanargmax", "argmin", "nanargmin"))
-def test_arg_reduction_dtype_is_int(size, func):
+def test_arg_reduction_dtype_is_int(size: tuple[int, ...], func: str) -> None:
     """avoid bugs being hidden by the xfail in the above test."""
 
     rng = np.random.default_rng(12345)
@@ -245,14 +259,14 @@ def test_arg_reduction_dtype_is_int(size, func):
     assert actual.dtype.kind == "i"
 
 
-def test_groupby_reduce_count():
+def test_groupby_reduce_count() -> None:
     array = np.array([0, 0, np.nan, np.nan, np.nan, 1, 1])
     labels = np.array(["a", "b", "b", "b", "c", "c", "c"])
     result, _ = groupby_reduce(array, labels, func="count")
     assert_equal(result, [1, 1, 2])
 
 
-def test_func_is_aggregation():
+def test_func_is_aggregation() -> None:
     from flox.aggregations import mean
 
     array = np.array([0, 0, np.nan, np.nan, np.nan, 1, 1])
@@ -265,14 +279,14 @@ def test_func_is_aggregation():
 @requires_dask
 @pytest.mark.parametrize("func", ("sum", "prod"))
 @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.int32, np.int64])
-def test_groupby_reduce_preserves_dtype(dtype, func):
+def test_groupby_reduce_preserves_dtype(dtype, func: str) -> None:
     array = np.ones((2, 12), dtype=dtype)
     by = np.array([labels] * 2)
     result, _ = groupby_reduce(from_array(array, chunks=(-1, 4)), by, func=func)
     assert result.dtype == array.dtype
 
 
-def test_numpy_reduce_nd_md():
+def test_numpy_reduce_nd_md() -> None:
     array = np.ones((2, 12))
     by = np.array([labels] * 2)
 
@@ -319,7 +333,16 @@ def test_numpy_reduce_nd_md():
         ((10, 12), (3, 3), 3),  # form 3
     ],
 )
-def test_groupby_agg_dask(func, shape, array_chunks, group_chunks, add_nan, dtype, engine, reindex):
+def test_groupby_agg_dask(
+    func: str,
+    shape: tuple[int, ...],
+    array_chunks: tuple[int, ...],
+    group_chunks,
+    add_nan: bool,
+    dtype,
+    engine: T_Engine,
+    reindex: bool | None,
+) -> None:
     """Tests groupby_reduce with dask arrays against groupby_reduce with numpy arrays"""
 
     rng = np.random.default_rng(12345)
@@ -341,67 +364,115 @@ def test_groupby_agg_dask(func, shape, array_chunks, group_chunks, add_nan, dtyp
     kwargs = dict(
         func=func, expected_groups=[0, 1, 2], fill_value=False if func in ["all", "any"] else 123
     )
+    expected_groups = [0, 1, 2]
+    fill_value = False if func in ["all", "any"] else 123
 
-    expected, _ = groupby_reduce(array.compute(), labels, engine="numpy", **kwargs)
-    actual, _ = groupby_reduce(array.compute(), labels, engine=engine, **kwargs)
+    expected, _ = groupby_reduce(
+        array.compute(),
+        labels,
+        engine="numpy",
+        func=func,
+        expected_groups=expected_groups,
+        fill_value=fill_value,
+    )
+    actual, _ = groupby_reduce(
+        array.compute(),
+        labels,
+        engine=engine,
+        func=func,
+        expected_groups=expected_groups,
+        fill_value=fill_value,
+    )
     assert_equal(actual, expected)
 
     with raise_if_dask_computes():
-        actual, _ = groupby_reduce(array, labels, engine=engine, **kwargs)
+        actual, _ = groupby_reduce(
+            array,
+            labels,
+            engine=engine,
+            func=func,
+            expected_groups=expected_groups,
+            fill_value=fill_value,
+        )
     assert_equal(actual, expected)
 
     by = from_array(labels, group_chunks)
     with raise_if_dask_computes():
-        actual, _ = groupby_reduce(array, by, engine=engine, **kwargs)
+        actual, _ = groupby_reduce(
+            array,
+            by,
+            engine=engine,
+            func=func,
+            expected_groups=expected_groups,
+            fill_value=fill_value,
+        )
     assert_equal(expected, actual)
 
-    kwargs["expected_groups"] = [0, 2, 1]
+    expected_groups = [0, 2, 1]
     with raise_if_dask_computes():
-        actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=False)
+        actual, groups = groupby_reduce(
+            array,
+            by,
+            engine=engine,
+            func=func,
+            expected_groups=expected_groups,
+            fill_value=fill_value,
+            sort=False,
+        )
     assert_equal(groups, [0, 2, 1])
     assert_equal(expected, actual[..., [0, 2, 1]])
 
     kwargs["expected_groups"] = [0, 2, 1]
     with raise_if_dask_computes():
-        actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=True)
+        actual, groups = groupby_reduce(
+            array,
+            by,
+            engine=engine,
+            func=func,
+            expected_groups=expected_groups,
+            fill_value=fill_value,
+            sort=True,
+        )
     assert_equal(groups, [0, 1, 2])
     assert_equal(expected, actual)
 
 
-def test_numpy_reduce_axis_subset(engine):
+def test_numpy_reduce_axis_subset(engine: T_Engine) -> None:
     # TODO: add NaNs
     by = labels2d
     array = np.ones_like(by)
     kwargs = dict(func="count", engine=engine, fill_value=0)
-    result, _ = groupby_reduce(array, by, **kwargs, axis=1)
+    result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=1)
     assert_equal(result, [[2, 3], [2, 3]])
 
     by = np.broadcast_to(labels2d, (3, *labels2d.shape))
     array = np.ones_like(by)
-    result, _ = groupby_reduce(array, by, **kwargs, axis=1)
+    result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=1)
     subarr = np.array([[1, 1], [1, 1], [0, 2], [1, 1], [1, 1]])
     expected = np.tile(subarr, (3, 1, 1))
     assert_equal(result, expected)
 
-    result, _ = groupby_reduce(array, by, **kwargs, axis=2)
+    result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=2)
     subarr = np.array([[2, 3], [2, 3]])
     expected = np.tile(subarr, (3, 1, 1))
     assert_equal(result, expected)
 
-    result, _ = groupby_reduce(array, by, **kwargs, axis=(1, 2))
+    result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=(1, 2))
     expected = np.array([[4, 6], [4, 6], [4, 6]])
     assert_equal(result, expected)
 
-    result, _ = groupby_reduce(array, by, **kwargs, axis=(2, 1))
+    result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=(2, 1))
     assert_equal(result, expected)
 
-    result, _ = groupby_reduce(array, by[0, ...], **kwargs, axis=(1, 2))
+    result, _ = groupby_reduce(
+        array, by[0, ...], func="count", engine=engine, fill_value=0, axis=(1, 2)
+    )
     expected = np.array([[4, 6], [4, 6], [4, 6]])
     assert_equal(result, expected)
 
 
 @requires_dask
-def test_dask_reduce_axis_subset():
+def test_dask_reduce_axis_subset() -> None:
 
     by = labels2d
     array = np.ones_like(by)
@@ -456,12 +527,13 @@ def test_dask_reduce_axis_subset():
 @pytest.mark.parametrize(
     "axis", [None, (0, 1, 2), (0, 1), (0, 2), (1, 2), 0, 1, 2, (0,), (1,), (2,)]
 )
-def test_groupby_reduce_axis_subset_against_numpy(func, axis, engine):
+def test_groupby_reduce_axis_subset_against_numpy(func: str, axis, engine: T_Engine) -> None:
     if "arg" in func and engine == "flox":
         pytest.skip()
 
     if not isinstance(axis, int) and "arg" in func and (axis is None or len(axis) > 1):
         pytest.skip()
+    fill_value: bool | float
     if func in ["all", "any"]:
         fill_value = False
     else:
@@ -496,7 +568,9 @@ def test_groupby_reduce_axis_subset_against_numpy(func, axis, engine):
         (None, [0], (1,)),  # global reduction; 0 shaped group axis; 1 group
     ],
 )
-def test_groupby_reduce_nans(chunks, axis, groups, expected_shape, engine):
+def test_groupby_reduce_nans(
+    chunks, axis, groups, expected_shape: tuple[int, ...], engine: T_Engine
+) -> None:
     def _maybe_chunk(arr):
         if chunks:
             if not has_dask:
@@ -531,7 +605,7 @@ def _maybe_chunk(arr):
 
 
 @requires_dask
-def test_groupby_all_nan_blocks(engine):
+def test_groupby_all_nan_blocks(engine: T_Engine) -> None:
     labels = np.array([0, 0, 2, 2, 2, 1, 1, 2, 2, 1, 1, 0])
     nan_labels = labels.astype(float)  # copy
     nan_labels[:5] = np.nan
@@ -553,7 +627,7 @@ def test_groupby_all_nan_blocks(engine):
 
 
 @pytest.mark.parametrize("axis", (0, 1, 2, -1))
-def test_reindex(axis):
+def test_reindex(axis: int) -> None:
     shape = [2, 2, 2]
     fill_value = 0
 
@@ -573,7 +647,7 @@ def test_reindex(axis):
 
 
 @pytest.mark.xfail
-def test_bad_npg_behaviour():
+def test_bad_npg_behaviour() -> None:
     labels = np.array([0, 0, 2, 2, 2, 1, 1, 2, 2, 1, 1, 0], dtype=int)
     # fmt: off
     array = np.array([[1] * 12, [1] * 12])
@@ -590,7 +664,7 @@ def test_bad_npg_behaviour():
 
 @pytest.mark.xfail
 @pytest.mark.parametrize("func", ("nanargmax", "nanargmin"))
-def test_npg_nanarg_bug(func):
+def test_npg_nanarg_bug(func: str) -> None:
     array = np.array([1, 1, 2, 1, 1, np.nan, 6, 1])
     labels = np.array([1, 1, 1, 1, 1, 1, 1, 1]) - 1
 
@@ -602,7 +676,9 @@ def test_npg_nanarg_bug(func):
 @pytest.mark.parametrize("method", ["split-reduce", "cohorts", "map-reduce"])
 @pytest.mark.parametrize("chunk_labels", [False, True])
 @pytest.mark.parametrize("chunks", ((), (1,), (2,)))
-def test_groupby_bins(chunk_labels, chunks, engine, method) -> None:
+def test_groupby_bins(
+    chunk_labels: bool, chunks: tuple[int, ...], engine: T_Engine, method: T_Method
+) -> None:
     array = [1, 1, 1, 1, 1, 1]
     labels = [0.2, 1.5, 1.9, 2, 3, 20]
 
@@ -650,7 +726,7 @@ def test_groupby_bins(chunk_labels, chunks, engine, method) -> None:
         [(10,), (10,)],
     ],
 )
-def test_rechunk_for_blockwise(inchunks, expected):
+def test_rechunk_for_blockwise(inchunks: tuple[int, ...], expected: tuple[int, ...]) -> None:
     labels = np.array([1, 1, 1, 2, 2, 3, 3, 5, 5, 5])
     assert _get_optimal_chunks_for_groups(inchunks, labels) == expected
 
@@ -673,7 +749,7 @@ def test_rechunk_for_blockwise(inchunks, expected):
         ],
     ],
 )
-def test_find_group_cohorts(expected, labels, chunks, merge):
+def test_find_group_cohorts(expected, labels, chunks: tuple[int, ...], merge: bool) -> None:
     actual = list(find_group_cohorts(labels, (chunks,), merge, method="cohorts"))
     assert actual == expected, (actual, expected)
 
@@ -691,7 +767,7 @@ def test_find_group_cohorts(expected, labels, chunks, merge):
         [3, ((3, 4, 3, 4, 3, 4, 3, 4, 2),)],
     ],
 )
-def test_rechunk_for_cohorts(chunk_at, expected):
+def test_rechunk_for_cohorts(chunk_at: int, expected) -> None:
     array = dask.array.ones((30,), chunks=7)
     labels = np.arange(0, 30) % 7
     rechunked = rechunk_for_cohorts(array, axis=-1, force_new_chunk_at=chunk_at, labels=labels)
@@ -701,7 +777,7 @@ def test_rechunk_for_cohorts(chunk_at, expected):
 @pytest.mark.parametrize("chunks", [None, 3])
 @pytest.mark.parametrize("fill_value", [123, np.nan])
 @pytest.mark.parametrize("func", ALL_FUNCS)
-def test_fill_value_behaviour(func, chunks, fill_value, engine):
+def test_fill_value_behaviour(func: str, chunks, fill_value: float, engine: T_Engine) -> None:
     # fill_value = np.nan tests promotion of int counts to float
     # This is used by xarray
     if func in ["all", "any"] or "arg" in func:
@@ -732,7 +808,8 @@ def npfunc(x):
 @requires_dask
 @pytest.mark.parametrize("func", ["mean", "sum"])
 @pytest.mark.parametrize("dtype", ["float32", "float64", "int32", "int64"])
-def test_dtype_preservation(dtype, func, engine):
+def test_dtype_preservation(dtype: str, func: str, engine: T_Engine) -> None:
+    expected: np.typing.DTypeLike
     if func == "sum" or (func == "mean" and "float" in dtype):
         expected = np.dtype(dtype)
     elif func == "mean" and "int" in dtype:
@@ -749,7 +826,7 @@ def test_dtype_preservation(dtype, func, engine):
 
 @requires_dask
 @pytest.mark.parametrize("method", ["split-reduce", "map-reduce", "cohorts"])
-def test_cohorts(method):
+def test_cohorts(method: T_Method) -> None:
     repeats = [4, 4, 12, 2, 3, 4]
     labels = np.repeat(np.arange(6), repeats)
     array = dask.array.from_array(labels, chunks=(4, 8, 4, 9, 4))
@@ -763,7 +840,7 @@ def test_cohorts(method):
 @pytest.mark.parametrize("func", ALL_FUNCS)
 @pytest.mark.parametrize("axis", (-1, None))
 @pytest.mark.parametrize("method", ["blockwise", "cohorts", "map-reduce", "split-reduce"])
-def test_cohorts_nd_by(func, method, axis, engine):
+def test_cohorts_nd_by(func: str, method: T_Method, axis: int | None, engine: T_Engine) -> None:
     o = dask.array.ones((3,), chunks=-1)
     o2 = dask.array.ones((2, 3), chunks=-1)
 
@@ -777,6 +854,7 @@ def test_cohorts_nd_by(func, method, axis, engine):
     if "arg" in func and (axis is None or engine == "flox"):
         pytest.skip()
 
+    fill_value: bool | int
     if func in ["any", "all"]:
         fill_value = False
     else:
@@ -785,13 +863,31 @@ def test_cohorts_nd_by(func, method, axis, engine):
     if axis is not None and method != "map-reduce":
         pytest.xfail()
 
-    kwargs = dict(func=func, engine=engine, method=method, axis=axis, fill_value=fill_value)
-    actual, groups = groupby_reduce(array, by, **kwargs)
-    expected, sorted_groups = groupby_reduce(array.compute(), by, **kwargs)
+    actual, groups = groupby_reduce(
+        array, by, func=func, engine=engine, method=method, axis=axis, fill_value=fill_value
+    )
+    expected, sorted_groups = groupby_reduce(
+        array.compute(),
+        by,
+        func=func,
+        engine=engine,
+        method=method,
+        axis=axis,
+        fill_value=fill_value,
+    )
     assert_equal(groups, sorted_groups)
     assert_equal(actual, expected)
 
-    actual, groups = groupby_reduce(array, by, sort=False, **kwargs)
+    actual, groups = groupby_reduce(
+        array,
+        by,
+        sort=False,
+        func=func,
+        engine=engine,
+        method=method,
+        axis=axis,
+        fill_value=fill_value,
+    )
     if method == "cohorts":
         assert_equal(groups, [4, 3, 40, 2, 31, 1, 30])
     elif method in ("split-reduce", "map-reduce"):
@@ -804,7 +900,9 @@ def test_cohorts_nd_by(func, method, axis, engine):
 
 @pytest.mark.parametrize("func", ["sum", "count"])
 @pytest.mark.parametrize("fill_value, expected", ((0, np.integer), (np.nan, np.floating)))
-def test_dtype_promotion(func, fill_value, expected, engine):
+def test_dtype_promotion(
+    func: str, fill_value: int, expected: np.typing.DTypeLike, engine: T_Engine
+) -> None:
     array = np.array([1, 1])
     by = [0, 1]
 
@@ -815,7 +913,7 @@ def test_dtype_promotion(func, fill_value, expected, engine):
 
 
 @pytest.mark.parametrize("func", ["mean", "nanmean"])
-def test_empty_bins(func, engine):
+def test_empty_bins(func: str, engine: T_Engine) -> None:
     array = np.ones((2, 3, 2))
     by = np.broadcast_to([0, 1], array.shape)
 
@@ -832,7 +930,7 @@ def test_empty_bins(func, engine):
     assert_equal(actual, expected)
 
 
-def test_datetime_binning():
+def test_datetime_binning() -> None:
     time_bins = pd.date_range(start="2010-08-01", end="2010-08-15", freq="24H")
     by = pd.date_range("2010-08-01", "2010-08-15", freq="15min")
 
@@ -848,7 +946,7 @@ def test_datetime_binning():
 
 
 @pytest.mark.parametrize("func", ALL_FUNCS)
-def test_bool_reductions(func, engine):
+def test_bool_reductions(func: str, engine: T_Engine) -> None:
     if "arg" in func and engine == "flox":
         pytest.skip()
     groups = np.array([1, 1, 1])
@@ -874,17 +972,18 @@ def test_map_reduce_blockwise_mixed() -> None:
 
 @requires_dask
 @pytest.mark.parametrize("method", ["split-reduce", "blockwise", "map-reduce", "cohorts"])
-def test_group_by_datetime(engine, method):
-    kwargs = dict(
-        func="mean",
-        method=method,
-        engine=engine,
-    )
+def test_group_by_datetime(engine: T_Engine, method: T_Method) -> None:
     t = pd.date_range("2000-01-01", "2000-12-31", freq="D").to_series()
     data = t.dt.dayofyear
     daskarray = dask.array.from_array(data.values, chunks=30)
 
-    actual, _ = groupby_reduce(daskarray, t, **kwargs)
+    actual, _ = groupby_reduce(
+        daskarray,
+        t,
+        func="mean",
+        method=method,
+        engine=engine,
+    )
     expected = data.to_numpy().astype(float)
     assert_equal(expected, actual)
 
@@ -892,7 +991,15 @@ def test_group_by_datetime(engine, method):
         return None
 
     edges = pd.date_range("1999-12-31", "2000-12-31", freq="M").to_series().to_numpy()
-    actual, _ = groupby_reduce(daskarray, t.to_numpy(), isbin=True, expected_groups=edges, **kwargs)
+    actual, _ = groupby_reduce(
+        daskarray,
+        t.to_numpy(),
+        isbin=True,
+        expected_groups=edges,
+        func="mean",
+        method=method,
+        engine=engine,
+    )
     expected = data.resample("M").mean().to_numpy()
     assert_equal(expected, actual)
 
@@ -901,13 +1008,15 @@ def test_group_by_datetime(engine, method):
         t.to_numpy(),
         isbin=True,
         expected_groups=edges,
-        **kwargs,
+        func="mean",
+        method=method,
+        engine=engine,
     )
     expected = np.broadcast_to(expected, (2, 3, expected.shape[-1]))
     assert_equal(expected, actual)
 
 
-def test_factorize_values_outside_bins():
+def test_factorize_values_outside_bins() -> None:
 
     vals = factorize_(
         (np.arange(10).reshape(5, 2), np.arange(10).reshape(5, 2)),
@@ -941,50 +1050,98 @@ def test_multiple_groupers() -> None:
     assert_equal(expected, actual)
 
 
-def test_factorize_reindex_sorting_strings():
+def test_factorize_reindex_sorting_strings() -> None:
     kwargs = dict(
         by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),),
         axis=-1,
         expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),),
     )
 
-    expected = factorize_(**kwargs, reindex=True, sort=True)[0]
+    expected = factorize_(
+        by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),),
+        axis=-1,
+        expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),),
+        reindex=True,
+        sort=True,
+    )[0]
     assert_equal(expected, [0, 1, 4, 2])
 
-    expected = factorize_(**kwargs, reindex=True, sort=False)[0]
+    expected = factorize_(
+        by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),),
+        axis=-1,
+        expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),),
+        reindex=True,
+        sort=False,
+    )[0]
     assert_equal(expected, [0, 3, 4, 1])
 
-    expected = factorize_(**kwargs, reindex=False, sort=False)[0]
+    expected = factorize_(
+        by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),),
+        axis=-1,
+        expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),),
+        reindex=False,
+        sort=False,
+    )[0]
     assert_equal(expected, [0, 1, 2, 3])
 
-    expected = factorize_(**kwargs, reindex=False, sort=True)[0]
+    expected = factorize_(
+        by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),),
+        axis=-1,
+        expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),),
+        reindex=False,
+        sort=True,
+    )[0]
     assert_equal(expected, [0, 1, 3, 2])
 
 
-def test_factorize_reindex_sorting_ints():
+def test_factorize_reindex_sorting_ints() -> None:
     kwargs = dict(
         by=(np.array([-10, 1, 10, 2, 3, 5]),),
         axis=-1,
         expected_groups=(np.array([0, 1, 2, 3, 4, 5]),),
     )
 
-    expected = factorize_(**kwargs, reindex=True, sort=True)[0]
+    expected = factorize_(
+        by=(np.array([-10, 1, 10, 2, 3, 5]),),
+        axis=-1,
+        expected_groups=(np.array([0, 1, 2, 3, 4, 5]),),
+        reindex=True,
+        sort=True,
+    )[0]
     assert_equal(expected, [6, 1, 6, 2, 3, 5])
 
-    expected = factorize_(**kwargs, reindex=True, sort=False)[0]
+    expected = factorize_(
+        by=(np.array([-10, 1, 10, 2, 3, 5]),),
+        axis=-1,
+        expected_groups=(np.array([0, 1, 2, 3, 4, 5]),),
+        reindex=True,
+        sort=False,
+    )[0]
     assert_equal(expected, [6, 1, 6, 2, 3, 5])
 
     kwargs["expected_groups"] = (np.arange(5, -1, -1),)
 
-    expected = factorize_(**kwargs, reindex=True, sort=True)[0]
+    expected = factorize_(
+        by=(np.array([-10, 1, 10, 2, 3, 5]),),
+        axis=-1,
+        expected_groups=(np.array([0, 1, 2, 3, 4, 5]),),
+        reindex=True,
+        sort=True,
+    )[0]
     assert_equal(expected, [6, 1, 6, 2, 3, 5])
 
-    expected = factorize_(**kwargs, reindex=True, sort=False)[0]
+    expected = factorize_(
+        by=(np.array([-10, 1, 10, 2, 3, 5]),),
+        axis=-1,
+        expected_groups=(np.array([0, 1, 2, 3, 4, 5]),),
+        reindex=True,
+        sort=False,
+    )[0]
     assert_equal(expected, [6, 4, 6, 3, 2, 0])
 
 
 @requires_dask
-def test_custom_aggregation_blockwise():
+def test_custom_aggregation_blockwise() -> None:
     def grouped_median(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None):
         return aggregate(
             group_idx,
@@ -1007,7 +1164,8 @@ def grouped_median(group_idx, array, *, axis=-1, size=None, fill_value=None, dty
     expected = np.median(array, axis=-1, keepdims=True)
     assert_equal(expected, actual)
 
-    for method in ["map-reduce", "cohorts", "split-reduce"]:
+    methods: list[T_Method] = ["map-reduce", "cohorts", "split-reduce"]
+    for method in methods:
         with pytest.raises(NotImplementedError):
             groupby_reduce(
                 dask.array.from_array(array, chunks=(1, -1)),

From 203adb09e5c3eb5ca77089b452f407738d79e8d6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 9 Oct 2022 22:22:31 +0000
Subject: [PATCH 05/31] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index d232d0871..5c62f1814 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -67,7 +67,7 @@ def dask_array_ones(*args):
 )
 
 if TYPE_CHECKING:
-    from flox.core import T_Engine, T_ExpectedGroupsOpt, T_Agg, T_Method
+    from flox.core import T_Agg, T_Engine, T_ExpectedGroupsOpt, T_Method
 
 
 def test_alignment_error():

From 492963ad04390692e77e1d1a391782be7522cf14 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Mon, 10 Oct 2022 00:24:32 +0200
Subject: [PATCH 06/31] Update test_core.py

---
 tests/test_core.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index d232d0871..b9066864b 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -441,7 +441,6 @@ def test_numpy_reduce_axis_subset(engine: T_Engine) -> None:
     # TODO: add NaNs
     by = labels2d
     array = np.ones_like(by)
-    kwargs = dict(func="count", engine=engine, fill_value=0)
     result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=1)
     assert_equal(result, [[2, 3], [2, 3]])
 
@@ -1051,12 +1050,6 @@ def test_multiple_groupers() -> None:
 
 
 def test_factorize_reindex_sorting_strings() -> None:
-    kwargs = dict(
-        by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),),
-        axis=-1,
-        expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),),
-    )
-
     expected = factorize_(
         by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),),
         axis=-1,

From 071c8694510662f5dbd7a66a7280380e5d8333fe Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Mon, 10 Oct 2022 00:31:43 +0200
Subject: [PATCH 07/31] Update test_core.py

---
 tests/test_core.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 827e70332..00d513a8e 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1088,16 +1088,11 @@ def test_factorize_reindex_sorting_strings() -> None:
 
 
 def test_factorize_reindex_sorting_ints() -> None:
-    kwargs = dict(
-        by=(np.array([-10, 1, 10, 2, 3, 5]),),
-        axis=-1,
-        expected_groups=(np.array([0, 1, 2, 3, 4, 5]),),
-    )
-
+    expected_groups = (np.array([0, 1, 2, 3, 4, 5]),)
     expected = factorize_(
         by=(np.array([-10, 1, 10, 2, 3, 5]),),
         axis=-1,
-        expected_groups=(np.array([0, 1, 2, 3, 4, 5]),),
+        expected_groups=expected_groups,
         reindex=True,
         sort=True,
     )[0]
@@ -1106,18 +1101,18 @@ def test_factorize_reindex_sorting_ints() -> None:
     expected = factorize_(
         by=(np.array([-10, 1, 10, 2, 3, 5]),),
         axis=-1,
-        expected_groups=(np.array([0, 1, 2, 3, 4, 5]),),
+        expected_groups=expected_groups,
         reindex=True,
         sort=False,
     )[0]
     assert_equal(expected, [6, 1, 6, 2, 3, 5])
 
-    kwargs["expected_groups"] = (np.arange(5, -1, -1),)
+    expected_groups = (np.arange(5, -1, -1),)
 
     expected = factorize_(
         by=(np.array([-10, 1, 10, 2, 3, 5]),),
         axis=-1,
-        expected_groups=(np.array([0, 1, 2, 3, 4, 5]),),
+        expected_groups=expected_groups,
         reindex=True,
         sort=True,
     )[0]
@@ -1126,7 +1121,7 @@ def test_factorize_reindex_sorting_ints() -> None:
     expected = factorize_(
         by=(np.array([-10, 1, 10, 2, 3, 5]),),
         axis=-1,
-        expected_groups=(np.array([0, 1, 2, 3, 4, 5]),),
+        expected_groups=expected_groups,
         reindex=True,
         sort=False,
     )[0]

From 4e9db600efe56dd251a77a18d5eacd155b802272 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 09:09:18 +0100
Subject: [PATCH 08/31] Don't add type hints to kwargs for readability

---
 tests/test_core.py | 128 ++++++++++-----------------------------------
 1 file changed, 27 insertions(+), 101 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 163b54193..5827ab29b 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -361,78 +361,32 @@ def test_groupby_agg_dask(
         labels[:3] = np.nan  # entire block is NaN when group_chunks=3
         labels[-2:] = np.nan
 
-    kwargs = dict(
+    kwargs: dict[str, Any] = dict(
         func=func, expected_groups=[0, 1, 2], fill_value=False if func in ["all", "any"] else 123
     )
-    expected_groups = [0, 1, 2]
-    fill_value = False if func in ["all", "any"] else 123
 
-    expected, _ = groupby_reduce(
-        array.compute(),
-        labels,
-        engine="numpy",
-        func=func,
-        expected_groups=expected_groups,
-        fill_value=fill_value,
-    )
-    actual, _ = groupby_reduce(
-        array.compute(),
-        labels,
-        engine=engine,
-        func=func,
-        expected_groups=expected_groups,
-        fill_value=fill_value,
-    )
+    expected, _ = groupby_reduce(array.compute(), labels, engine="numpy", **kwargs)
+    actual, _ = groupby_reduce(array.compute(), labels, engine=engine, **kwargs)
     assert_equal(actual, expected)
 
     with raise_if_dask_computes():
-        actual, _ = groupby_reduce(
-            array,
-            labels,
-            engine=engine,
-            func=func,
-            expected_groups=expected_groups,
-            fill_value=fill_value,
-        )
+        actual, _ = groupby_reduce(array, labels, engine=engine, **kwargs)
     assert_equal(actual, expected)
 
     by = from_array(labels, group_chunks)
     with raise_if_dask_computes():
-        actual, _ = groupby_reduce(
-            array,
-            by,
-            engine=engine,
-            func=func,
-            expected_groups=expected_groups,
-            fill_value=fill_value,
-        )
+        actual, _ = groupby_reduce(array, by, engine=engine, **kwargs)
     assert_equal(expected, actual)
 
     expected_groups = [0, 2, 1]
     with raise_if_dask_computes():
-        actual, groups = groupby_reduce(
-            array,
-            by,
-            engine=engine,
-            func=func,
-            expected_groups=expected_groups,
-            fill_value=fill_value,
-            sort=False,
-        )
+        actual, groups = groupby_reduce(array, by, **kwargs, sort=False)
     assert_equal(groups, [0, 2, 1])
     assert_equal(expected, actual[..., [0, 2, 1]])
 
     kwargs["expected_groups"] = [0, 2, 1]
     with raise_if_dask_computes():
-        actual, groups = groupby_reduce(
-            array,
-            by,
-            engine=engine,
-            func=func,
-            expected_groups=expected_groups,
-            fill_value=fill_value,
-            sort=True,
-        )
+        actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=True)
     assert_equal(groups, [0, 1, 2])
     assert_equal(expected, actual)
 
@@ -441,31 +395,30 @@ def test_numpy_reduce_axis_subset(engine: T_Engine) -> None:
     # TODO: add NaNs
     by = labels2d
     array = np.ones_like(by)
-    result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=1)
+    kwargs: dict[str, Any] = dict(func="count", engine=engine, fill_value=0)
+    result, _ = groupby_reduce(array, by, *[], axis=1, **kwargs)
     assert_equal(result, [[2, 3], [2, 3]])
 
     by = np.broadcast_to(labels2d, (3, *labels2d.shape))
     array = np.ones_like(by)
-    result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=1)
+    result, _ = groupby_reduce(array, by, **kwargs, axis=1)
     subarr = np.array([[1, 1], [1, 1], [0, 2], [1, 1], [1, 1]])
     expected = np.tile(subarr, (3, 1, 1))
     assert_equal(result, expected)
 
-    result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=2)
+    result, _ = groupby_reduce(array, by, **kwargs, axis=2)
     subarr = np.array([[2, 3], [2, 3]])
     expected = np.tile(subarr, (3, 1, 1))
     assert_equal(result, expected)
 
-    result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=(1, 2))
+    result, _ = groupby_reduce(array, by, **kwargs, axis=(1, 2))
     expected = np.array([[4, 6], [4, 6], [4, 6]])
     assert_equal(result, expected)
 
-    result, _ = groupby_reduce(array, by, func="count", engine=engine, fill_value=0, axis=(2, 1))
+    result, _ = groupby_reduce(array, by, **kwargs, axis=(2, 1))
     assert_equal(result, expected)
 
-    result, _ = groupby_reduce(
-        array, by[0, ...], func="count", engine=engine, fill_value=0, axis=(1, 2)
-    )
+    result, _ = groupby_reduce(array, by[0, ...], **kwargs, axis=(1, 2))
     expected = np.array([[4, 6], [4, 6], [4, 6]])
     assert_equal(result, expected)
 
@@ -863,31 +816,15 @@ def test_cohorts_nd_by(func: str, method: T_Method, axis: int | None, engine: T_
     if axis is not None and method != "map-reduce":
         pytest.xfail()
 
-    actual, groups = groupby_reduce(
-        array, by, func=func, engine=engine, method=method, axis=axis, fill_value=fill_value
-    )
-    expected, sorted_groups = groupby_reduce(
-        array.compute(),
-        by,
-        func=func,
-        engine=engine,
-        method=method,
-        axis=axis,
-        fill_value=fill_value,
+    kwargs: dict[str, Any] = dict(
+        func=func, engine=engine, method=method, axis=axis, fill_value=fill_value
     )
+    actual, groups = groupby_reduce(array, by, **kwargs)
+    expected, sorted_groups = groupby_reduce(array.compute(), by, **kwargs)
     assert_equal(groups, sorted_groups)
     assert_equal(actual, expected)
 
-    actual, groups = groupby_reduce(
-        array,
-        by,
-        sort=False,
-        func=func,
-        engine=engine,
-        method=method,
-        axis=axis,
-        fill_value=fill_value,
-    )
+    actual, groups = groupby_reduce(array, by, sort=False, **kwargs)
     if method == "map-reduce":
         assert_equal(groups, [1, 30, 2, 31, 3, 4, 40])
     else:
@@ -971,17 +908,16 @@ def test_map_reduce_blockwise_mixed() -> None:
 @requires_dask
 @pytest.mark.parametrize("method", ["split-reduce", "blockwise", "map-reduce", "cohorts"])
 def test_group_by_datetime(engine: T_Engine, method: T_Method) -> None:
-    t = pd.date_range("2000-01-01", "2000-12-31", freq="D").to_series()
-    data = t.dt.dayofyear
-    daskarray = dask.array.from_array(data.values, chunks=30)
-
-    actual, _ = groupby_reduce(
-        daskarray,
-        t,
+    kwargs: dict[str, Any] = dict(
         func="mean",
         method=method,
         engine=engine,
     )
+    t = pd.date_range("2000-01-01", "2000-12-31", freq="D").to_series()
+    data = t.dt.dayofyear
+    daskarray = dask.array.from_array(data.values, chunks=30)
+
+    actual, _ = groupby_reduce(daskarray, t, **kwargs)
     expected = data.to_numpy().astype(float)
     assert_equal(expected, actual)
 
@@ -989,15 +925,7 @@ def test_group_by_datetime(engine: T_Engine, method: T_Method) -> None:
         return None
 
     edges = pd.date_range("1999-12-31", "2000-12-31", freq="M").to_series().to_numpy()
-    actual, _ = groupby_reduce(
-        daskarray,
-        t.to_numpy(),
-        isbin=True,
-        expected_groups=edges,
-        func="mean",
-        method=method,
-        engine=engine,
-    )
+    actual, _ = groupby_reduce(daskarray, t.to_numpy(), isbin=True, expected_groups=edges, **kwargs)
     expected = data.resample("M").mean().to_numpy()
     assert_equal(expected, actual)
 
@@ -1006,9 +934,7 @@ def test_group_by_datetime(engine: T_Engine, method: T_Method) -> None:
         t.to_numpy(),
         isbin=True,
         expected_groups=edges,
-        func="mean",
-        method=method,
-        engine=engine,
+        **kwargs,
     )
     expected = np.broadcast_to(expected, (2, 3, expected.shape[-1]))
     assert_equal(expected, actual)

From 4b134fc3972b0f3db2ce4d80674717ba8d31ca48 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 09:43:42 +0100
Subject: [PATCH 09/31] fix merge errors

---
 tests/test_core.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 788d217bf..272198564 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -232,6 +232,9 @@ def test_groupby_reduce_all(
         params = list(itertools.product(["map-reduce"], [True, False, None]))
         params.extend(itertools.product(["cohorts"], [False, None]))
         for method, reindex in params:
+            flox_kwargs = dict(
+                func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value
+            )
             call = partial(
                 groupby_reduce, array, *by, method=method, reindex=reindex, **flox_kwargs
             )
@@ -395,7 +398,7 @@ def test_groupby_agg_dask(
         actual, _ = groupby_reduce(array, by, engine=engine, **kwargs)
     assert_equal(expected, actual)
 
-    expected_groups = [0, 2, 1]
+    kwargs["expected_groups"] = [0, 2, 1]
     with raise_if_dask_computes():
         actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=False)
     assert_equal(groups, np.array([0, 2, 1], dtype=np.intp))
@@ -1005,7 +1008,7 @@ def test_multiple_groupers() -> None:
 
 
 def test_factorize_reindex_sorting_strings() -> None:
-    expected = factorize_(
+    kwargs = dict(
         by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),),
         axis=-1,
         expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),),
@@ -1025,8 +1028,7 @@ def test_factorize_reindex_sorting_strings() -> None:
 
 
 def test_factorize_reindex_sorting_ints() -> None:
-    expected_groups = (np.array([0, 1, 2, 3, 4, 5]),)
-    expected = factorize_(
+    kwargs = dict(
         by=(np.array([-10, 1, 10, 2, 3, 5]),),
         axis=-1,
         expected_groups=(np.array([0, 1, 2, 3, 4, 5], np.int64),),
@@ -1038,7 +1040,7 @@ def test_factorize_reindex_sorting_ints() -> None:
     expected = factorize_(**kwargs, reindex=True, sort=False)[0]
     assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64))
 
-    expected_groups = (np.arange(5, -1, -1),)
+    kwargs["expected_groups"] = (np.arange(5, -1, -1),)
 
     expected = factorize_(**kwargs, reindex=True, sort=True)[0]
     assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64))

From af5c912000d45b94cfddd6d300af052bd30bd7f6 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 09:54:40 +0100
Subject: [PATCH 10/31] Update test_core.py

---
 tests/test_core.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 272198564..ed982422d 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -205,6 +205,7 @@ def test_groupby_reduce_all(
         tolerance = None
 
     for kwargs in finalize_kwargs:
+        flox_kwargs = dict(func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value)
         with np.errstate(invalid="ignore", divide="ignore"):
             if "arg" in func and add_nan_by:
                 array[..., nanmask] = np.nan
@@ -214,9 +215,7 @@ def test_groupby_reduce_all(
         for _ in range(nby):
             expected = np.expand_dims(expected, -1)
 
-        actual, *groups = groupby_reduce(
-            array, *bys, func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value
-        )
+        actual, *groups = groupby_reduce(array, *by, **flox_kwargs)
         assert actual.ndim == (array.ndim + nby - 1)
         assert expected.ndim == (array.ndim + nby - 1)
         expected_groups = tuple(np.array([idx + 1.0]) for idx in range(nby))

From 6656b7fd8ee90fdd2ebe0750f2b421c72328f14a Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 09:56:27 +0100
Subject: [PATCH 11/31] Update test_core.py

---
 tests/test_core.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index ed982422d..ec168665f 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -231,9 +231,6 @@ def test_groupby_reduce_all(
         params = list(itertools.product(["map-reduce"], [True, False, None]))
         params.extend(itertools.product(["cohorts"], [False, None]))
         for method, reindex in params:
-            flox_kwargs = dict(
-                func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value
-            )
             call = partial(
                 groupby_reduce, array, *by, method=method, reindex=reindex, **flox_kwargs
             )

From 51f448c560bc8a2c2dc8389d3ecd0d2e99ddb8ed Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 10:11:18 +0100
Subject: [PATCH 12/31] Update test_core.py

---
 tests/test_core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index ec168665f..3d0eea885 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -215,7 +215,7 @@ def test_groupby_reduce_all(
         for _ in range(nby):
             expected = np.expand_dims(expected, -1)
 
-        actual, *groups = groupby_reduce(array, *by, **flox_kwargs)
+        actual, *groups = groupby_reduce(array, *bys, **flox_kwargs)
         assert actual.ndim == (array.ndim + nby - 1)
         assert expected.ndim == (array.ndim + nby - 1)
         expected_groups = tuple(np.array([idx + 1.0]) for idx in range(nby))
@@ -232,7 +232,7 @@ def test_groupby_reduce_all(
         params.extend(itertools.product(["cohorts"], [False, None]))
         for method, reindex in params:
             call = partial(
-                groupby_reduce, array, *by, method=method, reindex=reindex, **flox_kwargs
+                groupby_reduce, array, *bys, method=method, reindex=reindex, **flox_kwargs
             )
             if "arg" in func and reindex is True:
                 # simple_combine with argreductions not supported right now

From 83e3cec010a4d5f78ea62847b8b1463d9ea8d7c4 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 10:58:02 +0100
Subject: [PATCH 13/31] Update mypy ci

---
 .github/workflows/ci-additional.yaml | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
index fd7cc8242..c7d7be20e 100644
--- a/.github/workflows/ci-additional.yaml
+++ b/.github/workflows/ci-additional.yaml
@@ -100,6 +100,7 @@ jobs:
           environment-name: xarray-tests
           extra-specs: |
             python=${{env.PYTHON_VERSION}}
+            conda
           cache-env: true
           cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}"
       - name: Install xarray
@@ -115,4 +116,13 @@ jobs:
 
       - name: Run mypy
         run: |
-          python -m mypy --install-types --non-interactive
+          python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report
+      - name: Upload mypy coverage to Codecov
+        uses: codecov/codecov-action@v3.1.1
+        with:
+          file: mypy_report/cobertura.xml
+          flags: mypy
+          env_vars: PYTHON_VERSION
+          name: codecov-umbrella
+          fail_ci_if_error: false
+

From 1698ed225a5a219ca9d99d8308a4cbe9a9fdf8b0 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 11:09:09 +0100
Subject: [PATCH 14/31] Update ci-additional.yaml

---
 .github/workflows/ci-additional.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
index c7d7be20e..fc1608773 100644
--- a/.github/workflows/ci-additional.yaml
+++ b/.github/workflows/ci-additional.yaml
@@ -59,6 +59,7 @@ jobs:
           environment-name: flox-tests
           extra-specs: |
             python=${{env.PYTHON_VERSION}}
+            lxml
           cache-env: true
           cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}"
 

From 87c81169b35f4d25e01e6339c02c3248ff69eb18 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 11:11:38 +0100
Subject: [PATCH 15/31] Update ci-additional.yaml

---
 .github/workflows/ci-additional.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
index fc1608773..818ce5b40 100644
--- a/.github/workflows/ci-additional.yaml
+++ b/.github/workflows/ci-additional.yaml
@@ -59,7 +59,6 @@ jobs:
           environment-name: flox-tests
           extra-specs: |
             python=${{env.PYTHON_VERSION}}
-            lxml
           cache-env: true
           cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}"
 
@@ -102,6 +101,7 @@ jobs:
           extra-specs: |
             python=${{env.PYTHON_VERSION}}
             conda
+            lxml
           cache-env: true
           cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}"
       - name: Install xarray

From 7d619ea9ca6b8865a933d84549e61955b1d2f412 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 11:22:14 +0100
Subject: [PATCH 16/31] Move tests to /flox so mypy finds it

---
 {tests => flox/tests}/__init__.py    | 0
 {tests => flox/tests}/test_core.py   | 0
 {tests => flox/tests}/test_xarray.py | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename {tests => flox/tests}/__init__.py (100%)
 rename {tests => flox/tests}/test_core.py (100%)
 rename {tests => flox/tests}/test_xarray.py (100%)

diff --git a/tests/__init__.py b/flox/tests/__init__.py
similarity index 100%
rename from tests/__init__.py
rename to flox/tests/__init__.py
diff --git a/tests/test_core.py b/flox/tests/test_core.py
similarity index 100%
rename from tests/test_core.py
rename to flox/tests/test_core.py
diff --git a/tests/test_xarray.py b/flox/tests/test_xarray.py
similarity index 100%
rename from tests/test_xarray.py
rename to flox/tests/test_xarray.py

From 6117bc2632eaf403c86b6ab48b65048639bee8d3 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 11:31:10 +0100
Subject: [PATCH 17/31] Update ci-additional.yaml

---
 .github/workflows/ci-additional.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
index 818ce5b40..cdbe8c410 100644
--- a/.github/workflows/ci-additional.yaml
+++ b/.github/workflows/ci-additional.yaml
@@ -97,14 +97,14 @@ jobs:
         uses: mamba-org/provision-with-micromamba@v14
         with:
           environment-file: ${{env.CONDA_ENV_FILE}}
-          environment-name: xarray-tests
+          environment-name: flox-tests
           extra-specs: |
             python=${{env.PYTHON_VERSION}}
             conda
             lxml
           cache-env: true
           cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}"
-      - name: Install xarray
+      - name: Install flox
         run: |
           python -m pip install --no-deps -e .
       - name: Version info

From 391c7b5269b1782d3dfc7fae27556cf0583f9afe Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 11:46:01 +0100
Subject: [PATCH 18/31] include more files

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 32e55d712..b10f1a26e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,8 +33,8 @@ known_third_party = [
 
 [tool.mypy]
 allow_redefinition = true
-exclude = "properties|asv_bench|doc|tests|flycheck"
-files = "flox/*.py"
+exclude = "properties|doc|flycheck"
+files = "flox"
 show_error_codes = true
 
 [[tool.mypy.overrides]]

From 90bf14f09b596f73d1a408d5ca3823331420dc66 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 11:46:14 +0100
Subject: [PATCH 19/31] move back tests

---
 {flox/tests => tests}/__init__.py    | 0
 {flox/tests => tests}/test_core.py   | 0
 {flox/tests => tests}/test_xarray.py | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename {flox/tests => tests}/__init__.py (100%)
 rename {flox/tests => tests}/test_core.py (100%)
 rename {flox/tests => tests}/test_xarray.py (100%)

diff --git a/flox/tests/__init__.py b/tests/__init__.py
similarity index 100%
rename from flox/tests/__init__.py
rename to tests/__init__.py
diff --git a/flox/tests/test_core.py b/tests/test_core.py
similarity index 100%
rename from flox/tests/test_core.py
rename to tests/test_core.py
diff --git a/flox/tests/test_xarray.py b/tests/test_xarray.py
similarity index 100%
rename from flox/tests/test_xarray.py
rename to tests/test_xarray.py

From 3bab2f084bcf77eeeb9beb217af156e7651fca43 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 11:48:23 +0100
Subject: [PATCH 20/31] Revert "move back tests"

This reverts commit 90bf14f09b596f73d1a408d5ca3823331420dc66.
---
 {tests => flox/tests}/__init__.py    | 0
 {tests => flox/tests}/test_core.py   | 0
 {tests => flox/tests}/test_xarray.py | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename {tests => flox/tests}/__init__.py (100%)
 rename {tests => flox/tests}/test_core.py (100%)
 rename {tests => flox/tests}/test_xarray.py (100%)

diff --git a/tests/__init__.py b/flox/tests/__init__.py
similarity index 100%
rename from tests/__init__.py
rename to flox/tests/__init__.py
diff --git a/tests/test_core.py b/flox/tests/test_core.py
similarity index 100%
rename from tests/test_core.py
rename to flox/tests/test_core.py
diff --git a/tests/test_xarray.py b/flox/tests/test_xarray.py
similarity index 100%
rename from tests/test_xarray.py
rename to flox/tests/test_xarray.py

From 3e4831917dfa81d7621dda06fa2119d31fb25d3b Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 12:11:30 +0100
Subject: [PATCH 21/31] fix type errors

---
 flox/tests/test_core.py   | 26 ++++++++++++++++----------
 flox/tests/test_xarray.py |  2 +-
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/flox/tests/test_core.py b/flox/tests/test_core.py
index 3d0eea885..c657fa31b 100644
--- a/flox/tests/test_core.py
+++ b/flox/tests/test_core.py
@@ -80,6 +80,10 @@ def dask_array_ones(*args):
 if TYPE_CHECKING:
     from flox.core import T_Agg, T_Engine, T_ExpectedGroupsOpt, T_Method
 
+    # Let anything through in kwargs for code readability, will likely miss a lot of
+    # type errors within these dicts though:
+    T_Kwargs = dict[str, Any]
+
 
 def test_alignment_error():
     da = np.ones((12,))
@@ -195,7 +199,7 @@ def test_groupby_reduce_all(
             bys[idx][2 * idx : 2 * idx + 3] = np.nan
     nanmask = reduce(np.logical_or, (np.isnan(b) for b in bys))
 
-    finalize_kwargs: list[dict[str, Any]] = [{}]
+    finalize_kwargs: list[T_Kwargs] = [{}]
     if "var" in func or "std" in func:
         finalize_kwargs = finalize_kwargs + [{"ddof": 1}, {"ddof": 0}]
         fill_value = np.nan
@@ -205,7 +209,9 @@ def test_groupby_reduce_all(
         tolerance = None
 
     for kwargs in finalize_kwargs:
-        flox_kwargs = dict(func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value)
+        flox_kwargs: T_Kwargs = dict(
+            func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value
+        )
         with np.errstate(invalid="ignore", divide="ignore"):
             if "arg" in func and add_nan_by:
                 array[..., nanmask] = np.nan
@@ -377,7 +383,7 @@ def test_groupby_agg_dask(
         labels[:3] = np.nan  # entire block is NaN when group_chunks=3
         labels[-2:] = np.nan
 
-    kwargs: dict[str, Any] = dict(
+    kwargs: T_Kwargs = dict(
         func=func, expected_groups=[0, 1, 2], fill_value=False if func in ["all", "any"] else 123
     )
 
@@ -410,7 +416,7 @@ def test_numpy_reduce_axis_subset(engine: T_Engine) -> None:
     # TODO: add NaNs
     by = labels2d
     array = np.ones_like(by, dtype=np.int64)
-    kwargs = dict(func="count", engine=engine, fill_value=0)
+    kwargs: T_Kwargs = dict(func="count", engine=engine, fill_value=0)
     result, _ = groupby_reduce(array, by, **kwargs, axis=1)
     assert_equal(result, np.array([[2, 3], [2, 3]], dtype=np.int64))
 
@@ -845,7 +851,7 @@ def test_cohorts_nd_by(func: str, method: T_Method, axis: int | None, engine: T_
     if axis is not None and method != "map-reduce":
         pytest.xfail()
 
-    kwargs: dict[str, Any] = dict(
+    kwargs: T_Kwargs = dict(
         func=func, engine=engine, method=method, axis=axis, fill_value=fill_value
     )
     actual, groups = groupby_reduce(array, by, **kwargs)
@@ -928,7 +934,7 @@ def test_map_reduce_blockwise_mixed() -> None:
         dask.array.from_array(data.values, chunks=365),
         t.dt.month,
         func="mean",
-        method="split-reduce",
+        method="cohorts",
     )
     expected, _ = groupby_reduce(data, t.dt.month, func="mean")
     assert_equal(expected, actual)
@@ -937,7 +943,7 @@ def test_map_reduce_blockwise_mixed() -> None:
 @requires_dask
 @pytest.mark.parametrize("method", ["split-reduce", "blockwise", "map-reduce", "cohorts"])
 def test_group_by_datetime(engine: T_Engine, method: T_Method) -> None:
-    kwargs: dict[str, Any] = dict(
+    kwargs: T_Kwargs = dict(
         func="mean",
         method=method,
         engine=engine,
@@ -1004,7 +1010,7 @@ def test_multiple_groupers() -> None:
 
 
 def test_factorize_reindex_sorting_strings() -> None:
-    kwargs = dict(
+    kwargs: T_Kwargs = dict(
         by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),),
         axis=-1,
         expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),),
@@ -1024,7 +1030,7 @@ def test_factorize_reindex_sorting_strings() -> None:
 
 
 def test_factorize_reindex_sorting_ints() -> None:
-    kwargs = dict(
+    kwargs: T_Kwargs = dict(
         by=(np.array([-10, 1, 10, 2, 3, 5]),),
         axis=-1,
         expected_groups=(np.array([0, 1, 2, 3, 4, 5], np.int64),),
@@ -1069,7 +1075,7 @@ def grouped_median(group_idx, array, *, axis=-1, size=None, fill_value=None, dty
     expected = np.median(array, axis=-1, keepdims=True)
     assert_equal(expected, actual)
 
-    methods: list[T_Method] = ["map-reduce", "cohorts", "split-reduce"]
+    methods: list[T_Method] = ["map-reduce", "cohorts"]
     for method in methods:
         with pytest.raises(NotImplementedError):
             groupby_reduce(
diff --git a/flox/tests/test_xarray.py b/flox/tests/test_xarray.py
index 9fce115c7..5ba94af16 100644
--- a/flox/tests/test_xarray.py
+++ b/flox/tests/test_xarray.py
@@ -503,7 +503,7 @@ def test_groupby_bins_indexed_coordinate() -> None:
         expected_groups=([40, 50, 60, 70],),
         isbin=(True,),
         func="mean",
-        method="split-reduce",
+        method="cohorts",
     )
     xr.testing.assert_allclose(expected, actual)
 

From 797a50e372416ec31c6bc60a7486d97560134ca3 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 12:16:49 +0100
Subject: [PATCH 22/31] numba numpy_groupies to ignore

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index b10f1a26e..885626285 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,8 @@ module=[
     "cftime",
     "dask.*",
     "importlib_metadata",
-    "numpy_groupies",
+    "numba.*",
+    "numpy_groupies.*",
     "matplotlib.*",
     "pandas",
     "setuptools",

From 3026276aaed518ae648bece08cf7acb8b4508a6c Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 12:31:57 +0100
Subject: [PATCH 23/31] Update test_core.py

---
 flox/tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flox/tests/test_core.py b/flox/tests/test_core.py
index c657fa31b..0a2f7c9b1 100644
--- a/flox/tests/test_core.py
+++ b/flox/tests/test_core.py
@@ -837,7 +837,7 @@ def test_cohorts_nd_by(func: str, method: T_Method, axis: int | None, engine: T_
     by[0, 1] = 30
     by[2, 1] = 40
     by[0, 4] = 31
-    array = np.broadcast_to(array, (2, 3) + array.shape)
+    array = dask.array.broadcast_to(array, (2, 3) + array.shape)
 
     if "arg" in func and (axis is None or engine == "flox"):
         pytest.skip()

From d38239363e3c8076a2ddc8f3918604bebcfc64d6 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 12:56:20 +0100
Subject: [PATCH 24/31] move engine fixture to conftest

---
 conftest.py             | 49 +++++++++++++++++++++++++++++++++++++++++
 flox/tests/__init__.py  | 10 ---------
 flox/tests/test_core.py |  1 -
 3 files changed, 49 insertions(+), 11 deletions(-)
 create mode 100644 conftest.py

diff --git a/conftest.py b/conftest.py
new file mode 100644
index 000000000..d427ecc64
--- /dev/null
+++ b/conftest.py
@@ -0,0 +1,49 @@
+"""Configuration for pytest."""
+
+import pytest
+
+
+def pytest_addoption(parser):
+    """Add command-line flags for pytest."""
+    parser.addoption("--run-flaky", action="store_true", help="runs flaky tests")
+    parser.addoption(
+        "--run-network-tests",
+        action="store_true",
+        help="runs tests requiring a network connection",
+    )
+
+
+def pytest_runtest_setup(item):
+    # based on https://stackoverflow.com/questions/47559524
+    if "flaky" in item.keywords and not item.config.getoption("--run-flaky"):
+        pytest.skip("set --run-flaky option to run flaky tests")
+    if "network" in item.keywords and not item.config.getoption("--run-network-tests"):
+        pytest.skip("set --run-network-tests to run test requiring an internet connection")
+
+
+@pytest.fixture(autouse=True)
+def add_standard_imports(doctest_namespace, tmpdir):
+    import numpy as np
+    import pandas as pd
+
+    import xarray as xr
+
+    doctest_namespace["np"] = np
+    doctest_namespace["pd"] = pd
+    doctest_namespace["xr"] = xr
+
+    # always seed numpy.random to make the examples deterministic
+    np.random.seed(0)
+
+    # always switch to the temporary directory, so files get written there
+    tmpdir.chdir()
+
+
+@pytest.fixture(scope="module", params=["flox", "numpy", "numba"])
+def engine(request):
+    if request.param == "numba":
+        try:
+            import numba  # noqa: F401
+        except ImportError:
+            pytest.xfail()
+    return request.param
diff --git a/flox/tests/__init__.py b/flox/tests/__init__.py
index b1a266652..e2b8d8584 100644
--- a/flox/tests/__init__.py
+++ b/flox/tests/__init__.py
@@ -125,13 +125,3 @@ def assert_equal_tuple(a, b):
             np.testing.assert_array_equal(a_, b_)
         else:
             assert a_ == b_
-
-
-@pytest.fixture(scope="module", params=["flox", "numpy", "numba"])
-def engine(request):
-    if request.param == "numba":
-        try:
-            import numba
-        except ImportError:
-            pytest.xfail()
-    return request.param
diff --git a/flox/tests/test_core.py b/flox/tests/test_core.py
index 0a2f7c9b1..48fd7365c 100644
--- a/flox/tests/test_core.py
+++ b/flox/tests/test_core.py
@@ -26,7 +26,6 @@
 from . import (
     assert_equal,
     assert_equal_tuple,
-    engine,
     has_dask,
     raise_if_dask_computes,
     requires_dask,

From 8a52b4f756a524f0893b27dd3fddfe16f2334445 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 12:59:22 +0100
Subject: [PATCH 25/31] Update test_xarray.py

---
 flox/tests/test_xarray.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flox/tests/test_xarray.py b/flox/tests/test_xarray.py
index 5ba94af16..fcbf4f22c 100644
--- a/flox/tests/test_xarray.py
+++ b/flox/tests/test_xarray.py
@@ -12,7 +12,7 @@
 
 from flox.xarray import rechunk_for_blockwise, resample_reduce, xarray_reduce
 
-from . import assert_equal, engine, has_dask, raise_if_dask_computes, requires_dask
+from . import assert_equal, has_dask, raise_if_dask_computes, requires_dask
 
 # isort: off
 if has_dask:

From 1a02d23ec258c0f7cef6729bfae5f69dbe80d1b0 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 13:02:55 +0100
Subject: [PATCH 26/31] comment out for now

---
 conftest.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/conftest.py b/conftest.py
index d427ecc64..906a70949 100644
--- a/conftest.py
+++ b/conftest.py
@@ -21,22 +21,22 @@ def pytest_runtest_setup(item):
         pytest.skip("set --run-network-tests to run test requiring an internet connection")
 
 
-@pytest.fixture(autouse=True)
-def add_standard_imports(doctest_namespace, tmpdir):
-    import numpy as np
-    import pandas as pd
+# @pytest.fixture(autouse=True)
+# def add_standard_imports(doctest_namespace, tmpdir):
+#     import numpy as np
+#     import pandas as pd
 
-    import xarray as xr
+#     import xarray as xr
 
-    doctest_namespace["np"] = np
-    doctest_namespace["pd"] = pd
-    doctest_namespace["xr"] = xr
+#     doctest_namespace["np"] = np
+#     doctest_namespace["pd"] = pd
+#     doctest_namespace["xr"] = xr
 
-    # always seed numpy.random to make the examples deterministic
-    np.random.seed(0)
+#     # always seed numpy.random to make the examples deterministic
+#     np.random.seed(0)
 
-    # always switch to the temporary directory, so files get written there
-    tmpdir.chdir()
+#     # always switch to the temporary directory, so files get written there
+#     tmpdir.chdir()
 
 
 @pytest.fixture(scope="module", params=["flox", "numpy", "numba"])

From 988956f1f28a3c3f4896000101eb9594608999b6 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 13:07:17 +0100
Subject: [PATCH 27/31] Update test_xarray.py

---
 flox/tests/test_xarray.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flox/tests/test_xarray.py b/flox/tests/test_xarray.py
index fcbf4f22c..98375b5c0 100644
--- a/flox/tests/test_xarray.py
+++ b/flox/tests/test_xarray.py
@@ -475,7 +475,7 @@ def test_cache() -> None:
 
 @pytest.mark.parametrize("use_cftime", [True, False])
 @pytest.mark.parametrize("func", ["count", "mean"])
-def test_datetime_array_reduce(use_cftime: bool, func: str, engine: T_Engine) -> None:
+def test_datetime_array_reduce(use_cftime: bool, func: T_Agg, engine: T_Engine) -> None:
 
     time = xr.DataArray(
         xr.date_range("2009-01-01", "2012-12-31", use_cftime=use_cftime),

From a4d5a9e02f4929b76cb43226a944cfcc37b7e1b4 Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 13:09:32 +0100
Subject: [PATCH 28/31] Update ci-additional.yaml

---
 .github/workflows/ci-additional.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
index cdbe8c410..4a1f397a2 100644
--- a/.github/workflows/ci-additional.yaml
+++ b/.github/workflows/ci-additional.yaml
@@ -126,4 +126,3 @@ jobs:
           env_vars: PYTHON_VERSION
           name: codecov-umbrella
           fail_ci_if_error: false
-

From e0308b8935757d06db96a86401a57a475cf135dc Mon Sep 17 00:00:00 2001
From: Illviljan <14371165+Illviljan@users.noreply.github.com>
Date: Sat, 5 Nov 2022 13:12:52 +0100
Subject: [PATCH 29/31] Update test_xarray.py

---
 flox/tests/test_xarray.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flox/tests/test_xarray.py b/flox/tests/test_xarray.py
index 98375b5c0..963fe9c1c 100644
--- a/flox/tests/test_xarray.py
+++ b/flox/tests/test_xarray.py
@@ -28,7 +28,7 @@
     pass
 
 if TYPE_CHECKING:
-    from flox.core import T_Agg, T_Engine
+    from flox.core import T_Engine
 
 
 tolerance64 = {"rtol": 1e-15, "atol": 1e-18}
@@ -475,7 +475,7 @@ def test_cache() -> None:
 
 @pytest.mark.parametrize("use_cftime", [True, False])
 @pytest.mark.parametrize("func", ["count", "mean"])
-def test_datetime_array_reduce(use_cftime: bool, func: T_Agg, engine: T_Engine) -> None:
+def test_datetime_array_reduce(use_cftime: bool, func, engine: T_Engine) -> None:
 
     time = xr.DataArray(
         xr.date_range("2009-01-01", "2012-12-31", use_cftime=use_cftime),

From 472db7823bd4d9df1e3e88cf9bc88b3f8f87abfe Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Sat, 26 Nov 2022 20:50:53 -0700
Subject: [PATCH 30/31] Clean up conftest

---
 conftest.py | 36 ------------------------------------
 1 file changed, 36 deletions(-)

diff --git a/conftest.py b/conftest.py
index 906a70949..c5c121d32 100644
--- a/conftest.py
+++ b/conftest.py
@@ -3,42 +3,6 @@
 import pytest
 
 
-def pytest_addoption(parser):
-    """Add command-line flags for pytest."""
-    parser.addoption("--run-flaky", action="store_true", help="runs flaky tests")
-    parser.addoption(
-        "--run-network-tests",
-        action="store_true",
-        help="runs tests requiring a network connection",
-    )
-
-
-def pytest_runtest_setup(item):
-    # based on https://stackoverflow.com/questions/47559524
-    if "flaky" in item.keywords and not item.config.getoption("--run-flaky"):
-        pytest.skip("set --run-flaky option to run flaky tests")
-    if "network" in item.keywords and not item.config.getoption("--run-network-tests"):
-        pytest.skip("set --run-network-tests to run test requiring an internet connection")
-
-
-# @pytest.fixture(autouse=True)
-# def add_standard_imports(doctest_namespace, tmpdir):
-#     import numpy as np
-#     import pandas as pd
-
-#     import xarray as xr
-
-#     doctest_namespace["np"] = np
-#     doctest_namespace["pd"] = pd
-#     doctest_namespace["xr"] = xr
-
-#     # always seed numpy.random to make the examples deterministic
-#     np.random.seed(0)
-
-#     # always switch to the temporary directory, so files get written there
-#     tmpdir.chdir()
-
-
 @pytest.fixture(scope="module", params=["flox", "numpy", "numba"])
 def engine(request):
     if request.param == "numba":

From c5596b5a415484b34ab8e3af9e66789ce72edea6 Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Sat, 26 Nov 2022 20:59:53 -0700
Subject: [PATCH 31/31] Cleanup ci-additional

---
 .github/workflows/ci-additional.yaml | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
index 4a1f397a2..a097c606c 100644
--- a/.github/workflows/ci-additional.yaml
+++ b/.github/workflows/ci-additional.yaml
@@ -65,10 +65,6 @@ jobs:
       - name: Install flox
         run: |
           python -m pip install --no-deps -e .
-      - name: Version info
-        run: |
-          conda info -a
-          conda list
       - name: Run doctests
         run: |
           python -m pytest --doctest-modules flox --ignore flox/tests
@@ -100,17 +96,12 @@ jobs:
           environment-name: flox-tests
           extra-specs: |
             python=${{env.PYTHON_VERSION}}
-            conda
             lxml
           cache-env: true
           cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}"
       - name: Install flox
         run: |
           python -m pip install --no-deps -e .
-      - name: Version info
-        run: |
-          conda info -a
-          conda list
       - name: Install mypy
         run: |
           python -m pip install mypy