From 587ebb812ab9fa5160146195fe38859c7d183697 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 11 Aug 2022 03:11:52 -0400 Subject: [PATCH 01/95] copied files defining strategies over to this branch --- xarray/tests/duckarrays/base/strategies.py | 161 +++++++++++++++++++++ xarray/tests/duckarrays/base/utils.py | 36 +++++ 2 files changed, 197 insertions(+) create mode 100644 xarray/tests/duckarrays/base/strategies.py create mode 100644 xarray/tests/duckarrays/base/utils.py diff --git a/xarray/tests/duckarrays/base/strategies.py b/xarray/tests/duckarrays/base/strategies.py new file mode 100644 index 00000000000..42eee29b554 --- /dev/null +++ b/xarray/tests/duckarrays/base/strategies.py @@ -0,0 +1,161 @@ +import hypothesis.extra.numpy as npst +import hypothesis.strategies as st + +import xarray as xr +from xarray.core.utils import is_dict_like + +from . import utils + +all_dtypes = ( + npst.integer_dtypes() + | npst.unsigned_integer_dtypes() + | npst.floating_dtypes() + | npst.complex_number_dtypes() +) + + +def numpy_array(shape, dtypes=None): + if dtypes is None: + dtypes = all_dtypes + + def elements(dtype): + max_value = 100 + min_value = 0 if dtype.kind == "u" else -max_value + + return npst.from_dtype( + dtype, allow_infinity=False, min_value=min_value, max_value=max_value + ) + + return dtypes.flatmap( + lambda dtype: npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype)) + ) + + +def dimension_sizes(min_dims, max_dims, min_size, max_size): + sizes = st.lists( + elements=st.tuples(st.text(min_size=1), st.integers(min_size, max_size)), + min_size=min_dims, + max_size=max_dims, + unique_by=lambda x: x[0], + ) + return sizes + + +@st.composite +def variable( + draw, + create_data, + *, + sizes=None, + min_size=1, + max_size=3, + min_dims=1, + max_dims=3, + dtypes=None, +): + if sizes is None: + sizes = draw( + dimension_sizes( + min_size=min_size, + max_size=max_size, + min_dims=min_dims, + max_dims=max_dims, + ) + ) + + if not sizes: + dims = () + shape = () + else: + dims, shape = zip(*sizes) + data = create_data(shape, dtypes) + + return xr.Variable(dims, draw(data)) + + +@st.composite +def data_array( + draw, create_data, *, min_dims=1, max_dims=3, min_size=1, max_size=3, dtypes=None +): + name = draw(st.none() | st.text(min_size=1)) + if dtypes is None: + dtypes = all_dtypes + + sizes = st.lists( + elements=st.tuples(st.text(min_size=1), st.integers(min_size, max_size)), + min_size=min_dims, + max_size=max_dims, + unique_by=lambda x: x[0], + ) + drawn_sizes = draw(sizes) + dims, shape = zip(*drawn_sizes) + + data = draw(create_data(shape, dtypes)) + + return xr.DataArray( + data=data, + name=name, + dims=dims, + ) + + +@st.composite +def dataset( + draw, + create_data, + *, + min_dims=1, + max_dims=3, + min_size=1, + max_size=3, + min_vars=1, + max_vars=3, +): + dtypes = st.just(draw(all_dtypes)) + names = st.text(min_size=1) + sizes = dimension_sizes( + min_size=min_size, max_size=max_size, min_dims=min_dims, max_dims=max_dims + ) + + data_vars = sizes.flatmap( + lambda s: st.dictionaries( + keys=names.filter(lambda n: n not in dict(s)), + values=variable(create_data, sizes=s, dtypes=dtypes), + min_size=min_vars, + max_size=max_vars, + ) + ) + + return xr.Dataset(data_vars=draw(data_vars)) + + +def valid_axis(ndim): + if ndim == 0: + return st.none() | st.just(0) + return st.none() | st.integers(-ndim, ndim - 1) + + +def valid_axes(ndim): + return valid_axis(ndim) | npst.valid_tuple_axes(ndim, min_size=1) + + +def valid_dim(dims): + if not isinstance(dims, list): + dims = [dims] + + ndim = len(dims) + axis = valid_axis(ndim) + return axis.map(lambda axes: utils.valid_dims_from_axes(dims, axes)) + + +def valid_dims(dims): + if is_dict_like(dims): + dims = list(dims.keys()) + elif isinstance(dims, tuple): + dims = list(dims) + elif not isinstance(dims, list): + dims = [dims] + + ndim = len(dims) + axes = valid_axes(ndim) + return axes.map(lambda axes: utils.valid_dims_from_axes(dims, axes)) diff --git a/xarray/tests/duckarrays/base/utils.py b/xarray/tests/duckarrays/base/utils.py new file mode 100644 index 00000000000..2bd353e2116 --- /dev/null +++ b/xarray/tests/duckarrays/base/utils.py @@ -0,0 +1,36 @@ +import warnings +from contextlib import contextmanager + + +@contextmanager +def suppress_warning(category, message=""): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=category, message=message) + + yield + + +def create_dimension_names(ndim): + return [f"dim_{n}" for n in range(ndim)] + + +def valid_dims_from_axes(dims, axes): + if axes is None: + return None + + if axes == 0 and len(dims) == 0: + return None + + if isinstance(axes, int): + return dims[axes] + + return [dims[axis] for axis in axes] + + +def valid_axes_from_dims(all_dims, dims): + if dims is None: + return None + elif isinstance(dims, list): + return [all_dims.index(dim) for dim in dims] + else: + return all_dims.index(dims) From acbfa69e9a7d49ce8379d912775f0b7e3619cdc0 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 11 Aug 2022 03:26:57 -0400 Subject: [PATCH 02/95] placed testing functions in their own directory --- xarray/testing/__init__.py | 23 +++++++++++++++++++++++ xarray/{ => testing}/testing.py | 9 --------- 2 files changed, 23 insertions(+), 9 deletions(-) create mode 100644 xarray/testing/__init__.py rename xarray/{ => testing}/testing.py (98%) diff --git a/xarray/testing/__init__.py b/xarray/testing/__init__.py new file mode 100644 index 00000000000..c9d1f9b1790 --- /dev/null +++ b/xarray/testing/__init__.py @@ -0,0 +1,23 @@ +from .testing import ( # noqa: F401 + _assert_dataarray_invariants, + _assert_dataset_invariants, + _assert_indexes_invariants_checks, + _assert_internal_invariants, + _assert_variable_invariants, + _data_allclose_or_equiv, + assert_allclose, + assert_chunks_equal, + assert_duckarray_allclose, + assert_duckarray_equal, + assert_equal, + assert_identical, +) + +__all__ = [ + "assert_allclose", + "assert_chunks_equal", + "assert_duckarray_equal", + "assert_duckarray_allclose", + "assert_equal", + "assert_identical", +] diff --git a/xarray/testing.py b/xarray/testing/testing.py similarity index 98% rename from xarray/testing.py rename to xarray/testing/testing.py index 59737e1d23e..89f701e2ba4 100644 --- a/xarray/testing.py +++ b/xarray/testing/testing.py @@ -12,15 +12,6 @@ from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex, default_indexes from xarray.core.variable import IndexVariable, Variable -__all__ = ( - "assert_allclose", - "assert_chunks_equal", - "assert_duckarray_equal", - "assert_duckarray_allclose", - "assert_equal", - "assert_identical", -) - def ensure_warnings(func): # sometimes tests elevate warnings to errors From 73d763f520ee168c02a75c4129929ac2fe152337 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 11 Aug 2022 03:35:47 -0400 Subject: [PATCH 03/95] moved hypothesis strategies into new testing directory --- xarray/{tests/duckarrays/base => testing}/strategies.py | 0 xarray/{tests/duckarrays/base => testing}/utils.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename xarray/{tests/duckarrays/base => testing}/strategies.py (100%) rename xarray/{tests/duckarrays/base => testing}/utils.py (100%) diff --git a/xarray/tests/duckarrays/base/strategies.py b/xarray/testing/strategies.py similarity index 100% rename from xarray/tests/duckarrays/base/strategies.py rename to xarray/testing/strategies.py diff --git a/xarray/tests/duckarrays/base/utils.py b/xarray/testing/utils.py similarity index 100% rename from xarray/tests/duckarrays/base/utils.py rename to xarray/testing/utils.py From db2deff82202caf3346b13a64a1c44ba558115f8 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 11 Aug 2022 04:09:10 -0400 Subject: [PATCH 04/95] begin type hinting strategies --- xarray/testing/strategies.py | 45 ++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 42eee29b554..009dc788ccd 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,12 +1,15 @@ +from typing import Callable, List, Tuple, Union + import hypothesis.extra.numpy as npst import hypothesis.strategies as st +import numpy as np import xarray as xr from xarray.core.utils import is_dict_like from . import utils -all_dtypes = ( +all_dtypes: st.SearchStrategy[np.dtype] = ( npst.integer_dtypes() | npst.unsigned_integer_dtypes() | npst.floating_dtypes() @@ -14,7 +17,7 @@ ) -def numpy_array(shape, dtypes=None): +def numpy_array(shape, dtypes=None) -> st.SearchStrategy[np.ndarray]: if dtypes is None: dtypes = all_dtypes @@ -31,7 +34,9 @@ def elements(dtype): ) -def dimension_sizes(min_dims, max_dims, min_size, max_size): +def dimension_sizes( + min_dims, max_dims, min_size, max_size +) -> st.SearchStrategy[List[Tuple[str, int]]]: sizes = st.lists( elements=st.tuples(st.text(min_size=1), st.integers(min_size, max_size)), min_size=min_dims, @@ -43,8 +48,8 @@ def dimension_sizes(min_dims, max_dims, min_size, max_size): @st.composite def variable( - draw, - create_data, + draw: st.DrawFn, + create_data: Callable, *, sizes=None, min_size=1, @@ -52,7 +57,8 @@ def variable( min_dims=1, max_dims=3, dtypes=None, -): +) -> st.SearchStrategy[xr.Variable]: + if sizes is None: sizes = draw( dimension_sizes( @@ -75,8 +81,16 @@ def variable( @st.composite def data_array( - draw, create_data, *, min_dims=1, max_dims=3, min_size=1, max_size=3, dtypes=None -): + draw: st.DrawFn, + create_data: Callable, + *, + min_dims=1, + max_dims=3, + min_size=1, + max_size=3, + dtypes=None, +) -> st.SearchStrategy[xr.DataArray]: + name = draw(st.none() | st.text(min_size=1)) if dtypes is None: dtypes = all_dtypes @@ -101,8 +115,8 @@ def data_array( @st.composite def dataset( - draw, - create_data, + draw: st.DrawFn, + create_data: Callable, *, min_dims=1, max_dims=3, @@ -110,7 +124,8 @@ def dataset( max_size=3, min_vars=1, max_vars=3, -): +) -> st.SearchStrategy[xr.Dataset]: + dtypes = st.just(draw(all_dtypes)) names = st.text(min_size=1) sizes = dimension_sizes( @@ -129,17 +144,17 @@ def dataset( return xr.Dataset(data_vars=draw(data_vars)) -def valid_axis(ndim): +def valid_axis(ndim) -> st.SearchStrategy[Union[None, int]]: if ndim == 0: return st.none() | st.just(0) return st.none() | st.integers(-ndim, ndim - 1) -def valid_axes(ndim): +def valid_axes(ndim) -> st.SearchStrategy[Union[None, int, Tuple[int, ...]]]: return valid_axis(ndim) | npst.valid_tuple_axes(ndim, min_size=1) -def valid_dim(dims): +def valid_dim(dims) -> st.SearchStrategy[str]: if not isinstance(dims, list): dims = [dims] @@ -148,7 +163,7 @@ def valid_dim(dims): return axis.map(lambda axes: utils.valid_dims_from_axes(dims, axes)) -def valid_dims(dims): +def valid_dims(dims) -> st.SearchStrategy[xr.DataArray]: if is_dict_like(dims): dims = list(dims.keys()) elif isinstance(dims, tuple): From 746cfc8ed2f45bfdf2f3b4d1bc8b39b3601f0879 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 11 Aug 2022 04:26:21 -0400 Subject: [PATCH 05/95] renamed strategies for consistency with hypothesis conventions --- xarray/testing/strategies.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 009dc788ccd..d84c84d81a8 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,4 @@ -from typing import Callable, List, Tuple, Union +from typing import Any, Callable, List, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -17,18 +17,19 @@ ) +def elements(dtype) -> st.SearchStrategy[Any]: + max_value = 100 + min_value = 0 if dtype.kind == "u" else -max_value + + return npst.from_dtype( + dtype, allow_infinity=False, min_value=min_value, max_value=max_value + ) + + def numpy_array(shape, dtypes=None) -> st.SearchStrategy[np.ndarray]: if dtypes is None: dtypes = all_dtypes - def elements(dtype): - max_value = 100 - min_value = 0 if dtype.kind == "u" else -max_value - - return npst.from_dtype( - dtype, allow_infinity=False, min_value=min_value, max_value=max_value - ) - return dtypes.flatmap( lambda dtype: npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype)) ) @@ -47,7 +48,7 @@ def dimension_sizes( @st.composite -def variable( +def variables( draw: st.DrawFn, create_data: Callable, *, @@ -80,7 +81,7 @@ def variable( @st.composite -def data_array( +def dataarrays( draw: st.DrawFn, create_data: Callable, *, @@ -114,7 +115,7 @@ def data_array( @st.composite -def dataset( +def datasets( draw: st.DrawFn, create_data: Callable, *, @@ -135,7 +136,7 @@ def dataset( data_vars = sizes.flatmap( lambda s: st.dictionaries( keys=names.filter(lambda n: n not in dict(s)), - values=variable(create_data, sizes=s, dtypes=dtypes), + values=variables(create_data, sizes=s, dtypes=dtypes), min_size=min_vars, max_size=max_vars, ) From 03cd9debdb94b88257815e1e682d1845781219bf Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 11 Aug 2022 04:26:57 -0400 Subject: [PATCH 06/95] added strategies to public API (with experimental warning) --- doc/api.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index 11ae5de8531..86399891af3 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1060,6 +1060,19 @@ Testing testing.assert_allclose testing.assert_chunks_equal +Hypothesis Testing Strategies +============================= + +.. warning:: + These strategies should be considered highly experimental, and liable to change at any time. + +.. autosummary:: + :toctree: generated/ + + testing.strategies.variables + testing.strategies.dataarrays + testing.strategies.datasets + Exceptions ========== From 2fe358392b226866a5e2e0acca56dc9f37e37baa Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 11 Aug 2022 04:45:00 -0400 Subject: [PATCH 07/95] strategies for chunking patterns --- doc/api.rst | 2 + xarray/testing/strategies.py | 187 ++++++++++++++++++++++++++++++++++- 2 files changed, 188 insertions(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index 86399891af3..6fc2587b253 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1072,6 +1072,8 @@ Hypothesis Testing Strategies testing.strategies.variables testing.strategies.dataarrays testing.strategies.datasets + testing.strategies.chunks + testing.strategies.chunksizes Exceptions ========== diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index d84c84d81a8..68ac140f379 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, List, Tuple, Union +from typing import Any, Callable, List, Mapping, Optional, Set, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -175,3 +175,188 @@ def valid_dims(dims) -> st.SearchStrategy[xr.DataArray]: ndim = len(dims) axes = valid_axes(ndim) return axes.map(lambda axes: utils.valid_dims_from_axes(dims, axes)) + + +@st.composite +def block_lengths( + draw: st.DrawFn, + ax_length: int, + min_chunk_length: int = 1, + max_chunk_length: Optional[int] = None, +) -> st.SearchStrategy[Tuple[int, ...]]: + """Generate different chunking patterns along one dimension of an array.""" + + chunks = [] + remaining_length = ax_length + while remaining_length > 0: + _max_chunk_length = ( + min(remaining_length, max_chunk_length) + if max_chunk_length + else remaining_length + ) + + if min_chunk_length > _max_chunk_length: + # if we are at the end of the array we have no choice but to use a smaller chunk + chunk = remaining_length + else: + chunk = draw( + st.integers(min_value=min_chunk_length, max_value=_max_chunk_length) + ) + + chunks.append(chunk) + remaining_length = remaining_length - chunk + + return tuple(chunks) + + +# TODO we could remove this once dask/9374 is merged upstream +@st.composite +def chunks( + draw: st.DrawFn, + shape: Tuple[int, ...], + axes: Optional[Union[int, Tuple[int, ...]]] = None, + min_chunk_length: int = 1, + max_chunk_length: Optional[int] = None, +) -> st.SearchStrategy[Tuple[Tuple[int, ...], ...]]: + """ + Generates different chunking patterns for an N-D array with a given shape. + + Returns chunking structure as a tuple of tuples of ints, with each inner tuple containing + the block lengths along one dimension of the array. + + You can limit chunking to specific axes using the `axes` kwarg, and specify minimum and + maximum block lengths. + + Requires the hypothesis package to be installed. + + Parameters + ---------- + shape : tuple of ints + Shape of the array for which you want to generate a chunking pattern. + axes : None or int or tuple of ints, optional + ... + min_chunk_length : int, default is 1 + Minimum chunk length to use along all axes. + max_chunk_length: int, optional + Maximum chunk length to use along all axes. + Default is that the chunk can be as long as the length of the array along that axis. + + Examples + -------- + Chunking along all axes by default + + >>> chunks(shape=(2, 3)).example() + ((1, 1), (1, 2)) + + Chunking only along the second axis + + >>> chunks(shape=(2, 3), axis=1).example() + ((2,), (1, 1, 1)) + + Minimum size chunks of length 2 along all axes + + >>> chunks(shape=(2, 3), min_chunk_length=2).example() + ((2,), (2, 1)) + + Smallest possible chunks along all axes + + >>> chunks(shape=(2, 3), max_chunk_length=1).example() + ((1, 1), (1, 1, 1)) + + Maximum size chunks along all axes + + >>> chunks(shape=(2, 3), axes=()).example() + ((2,), (3,)) + + See Also + -------- + testing.strategies.chunks + DataArray.chunk + DataArray.chunks + """ + + if min_chunk_length < 1 or not isinstance(min_chunk_length, int): + raise ValueError("min_chunk_length must be an integer >= 1") + + if max_chunk_length: + if max_chunk_length < 1 or not isinstance(min_chunk_length, int): + raise ValueError("max_chunk_length must be an integer >= 1") + + if axes is None: + axes = tuple(range(len(shape))) + elif isinstance(axes, int): + axes = (axes,) + + chunks = [] + for axis, ax_length in enumerate(shape): + + _max_chunk_length = ( + min(max_chunk_length, ax_length) if max_chunk_length else ax_length + ) + + if axes is not None and axis in axes: + block_lengths_along_ax = draw( + block_lengths( + ax_length, + min_chunk_length=min_chunk_length, + max_chunk_length=_max_chunk_length, + ) + ) + else: + # don't chunk along this dimension + block_lengths_along_ax = (ax_length,) + + chunks.append(block_lengths_along_ax) + + return tuple(chunks) + + +@st.composite +def chunksizes( + draw: st.DrawFn, + sizes: Mapping[str, int], + dims: Set[str] = None, + min_chunk_length: int = 1, + max_chunk_length: int = None, +) -> st.SearchStrategy[Mapping[str, Tuple[int, ...]]]: + """ + Generate different chunking patterns for an xarray object with given sizes. + + Returns chunking structure as a mapping of dimension names to tuples of ints, + with each tuple containing the block lengths along one dimension of the object. + + You can limit chunking to specific dimensions given by the `dim` kwarg. + + Requires the hypothesis package to be installed. + + Parameters + ---------- + sizes : mapping of dimension names to ints + Size of the object for which you want to generate a chunking pattern. + dims : set of str, optional + Dimensions to chunk along. Default is to chunk along all dimensions. + min_chunk_length : int, default is 1 + Minimum chunk length to use along all dimensions. + max_chunk_length: int, optional + Maximum chunk length to use along all dimensions. + Default is that the chunk can be as long as the length of the array along that dimension. + + See Also + -------- + testing.strategies.chunks + DataArray.chunk + DataArray.chunksizes + DataArray.sizes + """ + shape = tuple(sizes.values()) + axes = tuple(list(sizes.keys()).index(d) for d in dims) if dims else None + _chunks = draw( + chunks( + shape=shape, + axes=axes, + min_chunk_length=min_chunk_length, + max_chunk_length=max_chunk_length, + ) + ) + + return {d: c for d, c in zip(list(sizes.keys()), _chunks)} From 4db36290bd61813c50168aef8c22a36b3b029843 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 12 Aug 2022 21:31:27 -0400 Subject: [PATCH 08/95] rewrote variables strategy to have same signature as Variable constructor --- xarray/testing/strategies.py | 145 +++++++++++++++++++++++++++++------ 1 file changed, 121 insertions(+), 24 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 68ac140f379..45079e56df8 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,15 +1,18 @@ -from typing import Any, Callable, List, Mapping, Optional, Set, Tuple, Union +from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Union +import string import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np +from hypothesis import assume import xarray as xr from xarray.core.utils import is_dict_like from . import utils -all_dtypes: st.SearchStrategy[np.dtype] = ( +# required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. +valid_dtypes: st.SearchStrategy[np.dtype] = ( npst.integer_dtypes() | npst.unsigned_integer_dtypes() | npst.floating_dtypes() @@ -47,37 +50,131 @@ def dimension_sizes( return sizes +@st.composite +def np_arrays( + draw: st.DrawFn, + shape: Union[Tuple[int], st.SearchStrategy[Tuple[int]]] = None, + dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = None, +) -> st.SearchStrategy[np.ndarray]: + """ + Generates arbitrary numpy arrays with xarray-compatible dtypes. + + Parameters + ---------- + shape + dtype + Default is to use any of the valid_dtypes defined for xarray. + """ + if shape is None: + shape = draw(npst.array_shapes()) + elif isinstance(shape, st.SearchStrategy): + shape = draw(shape) + + if dtype is None: + dtype = draw(valid_dtypes) + elif isinstance(dtype, st.SearchStrategy): + dtype = draw(dtype) + + return draw(npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype))) + + +def dimension_names( + min_ndims: int = 0, + max_ndims: int = 3, +) -> st.SearchStrategy[List[str]]: + """ + Generates arbitrary lists of valid dimension names. + """ + + return st.lists( + elements=st.text(alphabet=string.ascii_lowercase, min_size=1, max_size=5), + min_size=min_ndims, + max_size=max_ndims, + unique=True, + ) + + +# Is there a way to do this in general? +# Could make a Protocol... +T_Array = Any + + @st.composite def variables( draw: st.DrawFn, - create_data: Callable, - *, - sizes=None, - min_size=1, - max_size=3, - min_dims=1, - max_dims=3, - dtypes=None, + dims: Union[Sequence[str], st.SearchStrategy[str]] = None, + data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, + attrs=None, + convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.Variable]: + """ + Generates arbitrary xarray.Variable objects. - if sizes is None: - sizes = draw( - dimension_sizes( - min_size=min_size, - max_size=max_size, - min_dims=min_dims, - max_dims=max_dims, - ) + Follows the signature of the xarray.Variable constructor, but you can also pass alternative strategies to generate + either numpy-like array data or dimension names. Passing both at once is forbidden. + + Passing nothing will generate a completely arbitrary Variable (backed by a numpy array). + + Parameters + ---------- + data: array-like, strategy which generates array-likes, or None + Default is to generate numpy data of arbitrary shape, values and dtype. + dims: Sequence of str, strategy which generates sequence of str, or None + Default is to generate arbitrary dimension names for each axis in data. + attrs: None + convert: Callable + Function which accepts one numpy array and returns one numpy-like array. + Default is a no-op. + """ + + if isinstance(data, st.SearchStrategy) and isinstance(dims, st.SearchStrategy): + # TODO could we relax this by adding a constraint? + raise TypeError( + "Passing strategies for both dims and data could generate inconsistent contents for Variable" ) - if not sizes: - dims = () - shape = () + if data is not None and isinstance(data, st.SearchStrategy): + data = draw(data) + if dims is not None and isinstance(dims, st.SearchStrategy): + dims = draw(dims) + + print(dims) + print(data) + + if data is not None and not dims: + # no dims -> generate dims to match data + dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + + elif dims is not None and data is None: + # no data -> generate data to match dims + valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) + data = draw(np_arrays(shape=draw(valid_shapes))) + + elif data is not None and dims is not None: + # both data and dims provided -> check both are compatible + # TODO is this pointless because the xr.Variable constructor will check this anyway? + if len(dims) != data.ndim: + raise ValueError( + "Explicitly provided data must match explicitly provided dims, " + f"but len(dims) = {len(dims)} vs len(data.ndim) = {data.ndim}" + ) + else: - dims, shape = zip(*sizes) - data = create_data(shape, dtypes) + # nothing provided, so generate everything, but consistently + data = np_arrays() + # TODO this should be possible with flatmap + print(draw(data).ndim) + dims = data.flatmap( + lambda arr: dimension_names(min_ndims=arr.ndim, max_ndims=arr.ndim) + ) + # dims = draw(dimension_names()) + # assume(len(dims) == data.ndim) + + # duckarray = convert(data) - return xr.Variable(dims, draw(data)) + # print(data) + # print(dims) + return xr.Variable(dims=dims, data=data, attrs=attrs) @st.composite From 14d11aaa7cffd62f5e6fa12e0aa5bd9ce37890c8 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 12 Aug 2022 21:31:42 -0400 Subject: [PATCH 09/95] test variables strategy --- xarray/tests/test_strategies.py | 115 ++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 xarray/tests/test_strategies.py diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py new file mode 100644 index 00000000000..2e8e926a869 --- /dev/null +++ b/xarray/tests/test_strategies.py @@ -0,0 +1,115 @@ +import pytest +import numpy as np +import numpy.testing as npt + +from hypothesis import given +import hypothesis.strategies as st +import hypothesis.extra.numpy as npst + +from xarray.testing.strategies import valid_dtypes, np_arrays, dimension_names, variables +from xarray import Dataset, DataArray +from xarray.core.variable import Variable + + +class TestNumpyArraysStrategy: + @given(np_arrays()) + def test_given_nothing(self, arr): + assert isinstance(arr, np.ndarray) + + @given(np_arrays(dtype=np.dtype("int32"))) + def test_fixed_dtype(self, arr): + assert arr.dtype == np.dtype("int32") + + @given(st.data()) + def test_arbitrary_valid_dtype(self, data): + valid_dtype = data.draw(valid_dtypes) + arr = data.draw(np_arrays(dtype=valid_dtype)) + assert arr.dtype == valid_dtype + + @given(np_arrays(shape=(2, 3))) + def test_fixed_shape(self, arr): + assert arr.shape == (2, 3) + + @given(st.data()) + def test_arbitrary_shape(self, data): + shape = data.draw(npst.array_shapes()) + arr = data.draw(np_arrays(shape=shape)) + assert arr.shape == shape + + +class TestDimensionNamesStrategy: + @given(dimension_names()) + def test_types(self, dims): + assert isinstance(dims, list) + for d in dims: + assert isinstance(d, str) + + @given(dimension_names()) + def test_unique(self, dims): + assert len(set(dims)) == len(dims) + + @given(dimension_names(min_ndims=3, max_ndims=3)) + def test_fixed_number_of_dims(self, dims): + assert isinstance(dims, list) + assert len(dims) == 3 + + +class TestVariablesStrategy: + @given(variables()) + def test_given_nothing(self, var): + assert isinstance(var, Variable) + + @given(st.data()) + def test_given_fixed_dims_and_fixed_data(self, data): + dims = ["x", "y"] + arr = np.asarray([[1, 2], [3, 4]]) + var = data.draw(variables(dims=dims, data=arr)) + + assert isinstance(var, Variable) + assert list(var.dims) == dims + npt.assert_equal(var.data, arr) + + with pytest.raises(ValueError): + data.draw(variables(dims=["x"], data=arr)) + + @given(st.data()) + def test_given_arbitrary_dims_and_arbitrary_data(self, data): + arr = data.draw(np_arrays()) + dims = data.draw(dimension_names()) + var = data.draw(variables(data=arr, dims=dims)) + + assert isinstance(var, Variable) + npt.assert_equal(var.data, arr) + assert var.dims == dims + + @given(st.data()) + def test_given_fixed_data(self, data): + arr = np.asarray([[1, 2], [3, 4]]) + var = data.draw(variables(data=arr)) + + assert isinstance(var, Variable) + npt.assert_equal(arr.data, arr) + + @given(st.data()) + def test_given_arbitrary_data(self, data): + arr = data.draw(np_arrays()) + var = data.draw(variables(data=arr)) + + assert isinstance(var, Variable) + npt.assert_equal(var.data, arr) + + @given(st.data()) + def test_given_fixed_dims(self, data): + dims = ["x", "y"] + var = data.draw(variables(dims=dims)) + assert isinstance(var, Variable) + assert list(var.dims) == dims + + @given(st.data()) + def test_given_arbitrary_dims(self, data): + dims = data.draw(dimension_names()) + var = data.draw(variables(dims=dims)) + + assert isinstance(var, Variable) + assert list(var.dims) == dims + From 418a359b148abf0965d3f80e9321f0f46d24c25a Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 12 Aug 2022 23:52:20 -0400 Subject: [PATCH 10/95] fixed most tests --- xarray/testing/strategies.py | 42 ++++++++++++++++----------------- xarray/tests/test_strategies.py | 27 ++++++++++++++------- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 45079e56df8..344441da6fa 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,10 +1,9 @@ -from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Union import string +from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np -from hypothesis import assume import xarray as xr from xarray.core.utils import is_dict_like @@ -18,9 +17,13 @@ | npst.floating_dtypes() | npst.complex_number_dtypes() ) +valid_dtypes.__doc__ = """Generates only numpy dtypes which xarray can handle.""" def elements(dtype) -> st.SearchStrategy[Any]: + """ + Generates scalar elements to go in a numpy-like array. + """ max_value = 100 min_value = 0 if dtype.kind == "u" else -max_value @@ -84,10 +87,18 @@ def dimension_names( ) -> st.SearchStrategy[List[str]]: """ Generates arbitrary lists of valid dimension names. - """ + Parameters + ---------- + min_ndims + Minimum number of dimensions in generated list. + max_ndims + Maximum number of dimensions in generated list. + """ return st.lists( - elements=st.text(alphabet=string.ascii_lowercase, min_size=1, max_size=5), + elements=st.text( + alphabet=string.ascii_lowercase, min_size=min_ndims, max_size=max_ndims + ), min_size=min_ndims, max_size=max_ndims, unique=True, @@ -123,7 +134,7 @@ def variables( Default is to generate arbitrary dimension names for each axis in data. attrs: None convert: Callable - Function which accepts one numpy array and returns one numpy-like array. + Function which accepts one numpy array and returns one numpy-like array of the same shape. Default is a no-op. """ @@ -138,9 +149,6 @@ def variables( if dims is not None and isinstance(dims, st.SearchStrategy): dims = draw(dims) - print(dims) - print(data) - if data is not None and not dims: # no dims -> generate dims to match data dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) @@ -152,7 +160,7 @@ def variables( elif data is not None and dims is not None: # both data and dims provided -> check both are compatible - # TODO is this pointless because the xr.Variable constructor will check this anyway? + # sort of pointless because the xr.Variable constructor will check this anyway if len(dims) != data.ndim: raise ValueError( "Explicitly provided data must match explicitly provided dims, " @@ -161,20 +169,10 @@ def variables( else: # nothing provided, so generate everything, but consistently - data = np_arrays() - # TODO this should be possible with flatmap - print(draw(data).ndim) - dims = data.flatmap( - lambda arr: dimension_names(min_ndims=arr.ndim, max_ndims=arr.ndim) - ) - # dims = draw(dimension_names()) - # assume(len(dims) == data.ndim) - - # duckarray = convert(data) + data = draw(np_arrays()) + dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) - # print(data) - # print(dims) - return xr.Variable(dims=dims, data=data, attrs=attrs) + return xr.Variable(dims=dims, data=convert(data), attrs=attrs) @st.composite diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 2e8e926a869..56b0176608a 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -1,14 +1,18 @@ -import pytest +import hypothesis.extra.numpy as npst +import hypothesis.strategies as st import numpy as np import numpy.testing as npt +import pytest +from hypothesis import given, note -from hypothesis import given -import hypothesis.strategies as st -import hypothesis.extra.numpy as npst - -from xarray.testing.strategies import valid_dtypes, np_arrays, dimension_names, variables -from xarray import Dataset, DataArray +from xarray import DataArray, Dataset from xarray.core.variable import Variable +from xarray.testing.strategies import ( + dimension_names, + np_arrays, + valid_dtypes, + variables, +) class TestNumpyArraysStrategy: @@ -69,9 +73,10 @@ def test_given_fixed_dims_and_fixed_data(self, data): assert list(var.dims) == dims npt.assert_equal(var.data, arr) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="data must match"): data.draw(variables(dims=["x"], data=arr)) + @pytest.mark.xfail(reason="I don't understand why") @given(st.data()) def test_given_arbitrary_dims_and_arbitrary_data(self, data): arr = data.draw(np_arrays()) @@ -113,3 +118,9 @@ def test_given_arbitrary_dims(self, data): assert isinstance(var, Variable) assert list(var.dims) == dims + @given(st.data()) + def test_convert(self, data): + arr = data.draw(np_arrays()) + var = data.draw(variables(data=arr, convert=lambda x: x + 1)) + + npt.assert_equal(var.data, arr + 1) From c8a7d0e3741d0a289f918fd56716958fa1ef8471 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 12 Aug 2022 23:52:40 -0400 Subject: [PATCH 11/95] added helpers so far to API docs --- doc/api.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index 6fc2587b253..3bae86a5f9c 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1069,6 +1069,9 @@ Hypothesis Testing Strategies .. autosummary:: :toctree: generated/ + testing.strategies.valid_dtypes + testing.strategies.np_arrays + testing.strategies.dimension_names testing.strategies.variables testing.strategies.dataarrays testing.strategies.datasets From d48acebce1ff62eb125d81893308cde9c0f2f395 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 13 Aug 2022 00:11:09 -0400 Subject: [PATCH 12/95] add hypothesis to docs CI env --- ci/requirements/doc.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 437c493c92c..249c91a256e 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -10,6 +10,7 @@ dependencies: - cfgrib>=0.9 - dask-core>=2.30 - h5netcdf>=0.7.4 + - hypothesis - ipykernel - ipython - iris>=2.3 From a20e3410844c2e05e83df51183d0d84d626f7466 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 13 Aug 2022 00:11:35 -0400 Subject: [PATCH 13/95] add todo about attrs --- xarray/testing/strategies.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 344441da6fa..9225ae0e956 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -17,7 +17,7 @@ | npst.floating_dtypes() | npst.complex_number_dtypes() ) -valid_dtypes.__doc__ = """Generates only numpy dtypes which xarray can handle.""" +valid_dtypes.__doc__ = """Generates only those numpy dtypes which xarray can handle.""" def elements(dtype) -> st.SearchStrategy[Any]: @@ -95,6 +95,7 @@ def dimension_names( max_ndims Maximum number of dimensions in generated list. """ + return st.lists( elements=st.text( alphabet=string.ascii_lowercase, min_size=min_ndims, max_size=max_ndims @@ -115,7 +116,7 @@ def variables( draw: st.DrawFn, dims: Union[Sequence[str], st.SearchStrategy[str]] = None, data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, - attrs=None, + attrs: Union[Mapping, st.SearchStrategy[Mapping], None] = None, convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.Variable]: """ @@ -132,7 +133,7 @@ def variables( Default is to generate numpy data of arbitrary shape, values and dtype. dims: Sequence of str, strategy which generates sequence of str, or None Default is to generate arbitrary dimension names for each axis in data. - attrs: None + attrs: dict_like or strategy which generates dicts, or None, optional convert: Callable Function which accepts one numpy array and returns one numpy-like array of the same shape. Default is a no-op. @@ -172,6 +173,12 @@ def variables( data = draw(np_arrays()) dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + if isinstance(attrs, st.SearchStrategy): + attrs = draw(attrs) + elif attrs is None: + # TODO autogenerate some attributes + ... + return xr.Variable(dims=dims, data=convert(data), attrs=attrs) From 3a4816f77e70bc6a2979bf139b04ced8ba227b84 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 13 Aug 2022 12:24:26 -0400 Subject: [PATCH 14/95] draft of new user guide page on testing --- doc/api.rst | 2 + doc/user-guide/index.rst | 1 + doc/user-guide/testing.rst | 121 +++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 doc/user-guide/testing.rst diff --git a/doc/api.rst b/doc/api.rst index 3bae86a5f9c..38717bd964a 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1063,6 +1063,8 @@ Testing Hypothesis Testing Strategies ============================= +.. currentmodule:: xarray + .. warning:: These strategies should be considered highly experimental, and liable to change at any time. diff --git a/doc/user-guide/index.rst b/doc/user-guide/index.rst index 0ac25d68930..45f0ce352de 100644 --- a/doc/user-guide/index.rst +++ b/doc/user-guide/index.rst @@ -25,4 +25,5 @@ examples that describe many common tasks that you can accomplish with xarray. dask plotting options + testing duckarrays diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst new file mode 100644 index 00000000000..7b947a5b151 --- /dev/null +++ b/doc/user-guide/testing.rst @@ -0,0 +1,121 @@ +.. _testing: + +Testing your code +================= + +.. ipython:: python + :suppress: + + import numpy as np + import pandas as pd + import xarray as xr + + np.random.seed(123456) + +.. _asserts: + +Asserts +------- + +TODO + +.. _hypothesis: + +Hypothesis testing +------------------ + +.. note:: + + Testing with hypothesis is a fairly advanced topic. Before reading this section it is recommended that you take a look + at our guide to xarray's data structures, are familiar with conventional unit testing in pytest, and have seen the + hypothesis library documentation. + +``Hypothesis`` is a powerful library for property-based testing. +Instead of writing tests for one example at a time, it allows you to write tests parameterized by a source of many +dynamically generated examples. For example you might have written a test which you wish to be parameterized by the set +of all possible ``integers()``. + +Property-based testing is extremely powerful, because (unlike more conventional example-based testing) it can find bugs +that you did not even think to look for! + +Strategies +~~~~~~~~~~ + +Each source of examples is called a "strategy", and xarray provides a range of custom strategies which produce xarray +data structures containing arbitrary data. You can use these to efficiently test downstream code, +quickly ensuring that your code can handle xarray objects of all possible structures and contents. + +These strategies are accessible in the :py:module::`xarray.testing.strategies` module, which provides + +.. currentmodule:: xarray + +.. autosummary:: + :toctree: generated/ + + testing.strategies.valid_dtypes + testing.strategies.np_arrays + testing.strategies.dimension_names + testing.strategies.variables + testing.strategies.dataarrays + testing.strategies.datasets + testing.strategies.chunks + testing.strategies.chunksizes + +Generating Examples +~~~~~~~~~~~~~~~~~~~ + +To see an example of what each of these strategies might produce, you can call one followed by the `.example()` method, +which is a general hypothesis method valid for all strategies + +.. ipython:: python + + import xarray.testing.strategies as xrst + + # TODO change this to dataarray once written + xrst.variables().example() + xrst.variables().example() + xrst.variables().example() + +You can see that calling `.example()` multiple times will generate different examples, giving you an idea of the wide +range of data that the xarray strategies can generate. + +# TODO simple test example + +.. ipython:: python + + import hypothesis.strategies as st + +Chaining Strategies +~~~~~~~~~~~~~~~~~~~ + +Xarray's strategies can accept other strategies as arguments, allowing you to customise the contents of the generated +examples. + +.. ipython:: python + + xrst.variables(data=xrst.np_arrays(shape=(3, 4))) + +This also works with strategies defined in other packages, for example the ``chunks`` strategy defined in +``dask.array.strategies``. + + +Fixing Arguments +~~~~~~~~~~~~~~~~ + +If you want to fix one aspect of the data structure, whilst allowing variation in the generated examples +over all other aspects, then use ``st.just()``. + +.. ipython:: python + :okexcept: + + # Generates only dataarrays with dimensions ["x", "y"] + xrst.dataarrays(dims=st.just(["x", "y"]))).example() + +(This is technically another example of chaining strategies - ``hypothesis.strategies.just`` is simply a special +strategy that just contains a single example.) + + +Duck-type Conversion +~~~~~~~~~~~~~~~~~~~~ + +# TODO converting to duckarrays \ No newline at end of file From d0406a2a0178c860cd8a85fae09cd0ac64674f10 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 13 Aug 2022 12:25:01 -0400 Subject: [PATCH 15/95] types for dataarrays strategy --- xarray/testing/strategies.py | 41 +++++++++++++++--------------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 9225ae0e956..6f44002ac16 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,3 +1,5 @@ +import pandas as pd + import string from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Union @@ -114,8 +116,8 @@ def dimension_names( @st.composite def variables( draw: st.DrawFn, - dims: Union[Sequence[str], st.SearchStrategy[str]] = None, data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, + dims: Union[Sequence[str], st.SearchStrategy[str]] = None, attrs: Union[Mapping, st.SearchStrategy[Mapping], None] = None, convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.Variable]: @@ -145,6 +147,8 @@ def variables( "Passing strategies for both dims and data could generate inconsistent contents for Variable" ) + # TODO remove this handling of non-strategies in favour of passing `st.just(value)` + if data is not None and isinstance(data, st.SearchStrategy): data = draw(data) if dims is not None and isinstance(dims, st.SearchStrategy): @@ -185,34 +189,23 @@ def variables( @st.composite def dataarrays( draw: st.DrawFn, - create_data: Callable, - *, - min_dims=1, - max_dims=3, - min_size=1, - max_size=3, - dtypes=None, + data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, + coords: Union[Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable]] = None, + dims: Union[Sequence[str], st.SearchStrategy[str]] = None, + name: str = None, + attrs: Union[Mapping, st.SearchStrategy[Mapping], None] = None, + convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.DataArray]: - name = draw(st.none() | st.text(min_size=1)) - if dtypes is None: - dtypes = all_dtypes - - sizes = st.lists( - elements=st.tuples(st.text(min_size=1), st.integers(min_size, max_size)), - min_size=min_dims, - max_size=max_dims, - unique_by=lambda x: x[0], - ) - drawn_sizes = draw(sizes) - dims, shape = zip(*drawn_sizes) - - data = draw(create_data(shape, dtypes)) + if name is None: + name = draw(st.none() | st.text(min_size=1)) return xr.DataArray( - data=data, + data=convert(data), + coords=coords, name=name, dims=dims, + attrs=attrs, ) @@ -229,7 +222,7 @@ def datasets( max_vars=3, ) -> st.SearchStrategy[xr.Dataset]: - dtypes = st.just(draw(all_dtypes)) + dtypes = st.just(draw(valid_dtypes)) names = st.text(min_size=1) sizes = dimension_sizes( min_size=min_size, max_size=max_size, min_dims=min_dims, max_dims=max_dims From 65a222d8a2b01f4b2781e9452068e2848b614f6e Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 13 Aug 2022 12:25:50 -0400 Subject: [PATCH 16/95] draft for chained chunking example --- xarray/tests/test_strategies.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 56b0176608a..6ce36cb64f6 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -3,7 +3,7 @@ import numpy as np import numpy.testing as npt import pytest -from hypothesis import given, note +from hypothesis import given from xarray import DataArray, Dataset from xarray.core.variable import Variable @@ -124,3 +124,18 @@ def test_convert(self, data): var = data.draw(variables(data=arr, convert=lambda x: x + 1)) npt.assert_equal(var.data, arr + 1) + + +@pytest.mark.xfail +@given(st.data()) +def test_chained_chunking_example(data): + import dask.array.strategies as dast + + def chunk(da): + return da.chunk(dast.chunks(da.shape)) + + chunked_dataarrays = xrst.dataarrays().flatmap(chunk) + + chunked_da = data.draw(chunked_dataarrays()) + + assert ... From e1d718a2d9da53cba823690addfb89c7395baffc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 13 Aug 2022 16:39:04 +0000 Subject: [PATCH 17/95] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/user-guide/testing.rst | 2 +- xarray/testing/strategies.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 7b947a5b151..d561f9b1692 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -118,4 +118,4 @@ strategy that just contains a single example.) Duck-type Conversion ~~~~~~~~~~~~~~~~~~~~ -# TODO converting to duckarrays \ No newline at end of file +# TODO converting to duckarrays diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 6f44002ac16..c70a1751ee7 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,11 +1,10 @@ -import pandas as pd - import string from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np +import pandas as pd import xarray as xr from xarray.core.utils import is_dict_like @@ -190,7 +189,9 @@ def variables( def dataarrays( draw: st.DrawFn, data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, - coords: Union[Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable]] = None, + coords: Union[ + Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable] + ] = None, dims: Union[Sequence[str], st.SearchStrategy[str]] = None, name: str = None, attrs: Union[Mapping, st.SearchStrategy[Mapping], None] = None, From 57d0f5b672a4cbe9b3f9557f78897b18cf806d38 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 14 Aug 2022 08:12:21 -0400 Subject: [PATCH 18/95] only accept strategy objects --- xarray/testing/strategies.py | 50 +++++++++++++++++---------------- xarray/tests/test_strategies.py | 34 ++++++++++------------ 2 files changed, 41 insertions(+), 43 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 6f44002ac16..9ed2f83bfc1 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,11 +1,10 @@ -import pandas as pd - import string from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np +import pandas as pd import xarray as xr from xarray.core.utils import is_dict_like @@ -83,6 +82,10 @@ def np_arrays( return draw(npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype))) +names = st.text(alphabet=string.ascii_lowercase) +names.__doc__ = """Generates arbitrary string names for dimensions / variables.""" + + def dimension_names( min_ndims: int = 0, max_ndims: int = 3, @@ -99,9 +102,7 @@ def dimension_names( """ return st.lists( - elements=st.text( - alphabet=string.ascii_lowercase, min_size=min_ndims, max_size=max_ndims - ), + elements=names, min_size=min_ndims, max_size=max_ndims, unique=True, @@ -116,9 +117,9 @@ def dimension_names( @st.composite def variables( draw: st.DrawFn, - data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, - dims: Union[Sequence[str], st.SearchStrategy[str]] = None, - attrs: Union[Mapping, st.SearchStrategy[Mapping], None] = None, + data: st.SearchStrategy[T_Array] = None, + dims: st.SearchStrategy[str] = None, + attrs: st.SearchStrategy[Mapping] = None, convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.Variable]: """ @@ -131,41 +132,40 @@ def variables( Parameters ---------- - data: array-like, strategy which generates array-likes, or None + data: strategy which generates array-likes, optional Default is to generate numpy data of arbitrary shape, values and dtype. - dims: Sequence of str, strategy which generates sequence of str, or None + dims: Strategy which generates sequence of strings, optional Default is to generate arbitrary dimension names for each axis in data. - attrs: dict_like or strategy which generates dicts, or None, optional + attrs: Strategy which generates dicts, optional convert: Callable Function which accepts one numpy array and returns one numpy-like array of the same shape. Default is a no-op. """ - if isinstance(data, st.SearchStrategy) and isinstance(dims, st.SearchStrategy): - # TODO could we relax this by adding a constraint? + if any( + not isinstance(arg, st.SearchStrategy) and arg is not None + for arg in [data, dims, attrs] + ): raise TypeError( - "Passing strategies for both dims and data could generate inconsistent contents for Variable" + "Contents must be provided as a hypothesis.strategies.SearchStrategy object (or None)." + "To specify fixed contents, use hypothesis.strategies.just()." ) - # TODO remove this handling of non-strategies in favour of passing `st.just(value)` - - if data is not None and isinstance(data, st.SearchStrategy): - data = draw(data) - if dims is not None and isinstance(dims, st.SearchStrategy): - dims = draw(dims) - - if data is not None and not dims: + if data is not None and dims is None: # no dims -> generate dims to match data + data = draw(data) dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) elif dims is not None and data is None: # no data -> generate data to match dims + dims = draw(dims) valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) data = draw(np_arrays(shape=draw(valid_shapes))) elif data is not None and dims is not None: # both data and dims provided -> check both are compatible # sort of pointless because the xr.Variable constructor will check this anyway + data, dims = draw(data), draw(dims) if len(dims) != data.ndim: raise ValueError( "Explicitly provided data must match explicitly provided dims, " @@ -173,7 +173,7 @@ def variables( ) else: - # nothing provided, so generate everything, but consistently + # nothing provided, so generate everything consistently by drawing dims to match data data = draw(np_arrays()) dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) @@ -190,7 +190,9 @@ def variables( def dataarrays( draw: st.DrawFn, data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, - coords: Union[Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable]] = None, + coords: Union[ + Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable] + ] = None, dims: Union[Sequence[str], st.SearchStrategy[str]] = None, name: str = None, attrs: Union[Mapping, st.SearchStrategy[Mapping], None] = None, diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 6ce36cb64f6..feef19c25e9 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -67,14 +67,13 @@ def test_given_nothing(self, var): def test_given_fixed_dims_and_fixed_data(self, data): dims = ["x", "y"] arr = np.asarray([[1, 2], [3, 4]]) - var = data.draw(variables(dims=dims, data=arr)) + var = data.draw(variables(dims=st.just(dims), data=st.just(arr))) - assert isinstance(var, Variable) assert list(var.dims) == dims npt.assert_equal(var.data, arr) with pytest.raises(ValueError, match="data must match"): - data.draw(variables(dims=["x"], data=arr)) + data.draw(variables(dims=st.just(["x"]), data=st.just(arr))) @pytest.mark.xfail(reason="I don't understand why") @given(st.data()) @@ -83,47 +82,44 @@ def test_given_arbitrary_dims_and_arbitrary_data(self, data): dims = data.draw(dimension_names()) var = data.draw(variables(data=arr, dims=dims)) - assert isinstance(var, Variable) npt.assert_equal(var.data, arr) assert var.dims == dims @given(st.data()) def test_given_fixed_data(self, data): arr = np.asarray([[1, 2], [3, 4]]) - var = data.draw(variables(data=arr)) + var = data.draw(variables(data=st.just(arr))) - assert isinstance(var, Variable) - npt.assert_equal(arr.data, arr) + npt.assert_equal(var.data, arr) @given(st.data()) def test_given_arbitrary_data(self, data): - arr = data.draw(np_arrays()) - var = data.draw(variables(data=arr)) + shape = (2, 3) + arrs = np_arrays(shape=shape) + var = data.draw(variables(data=arrs)) - assert isinstance(var, Variable) - npt.assert_equal(var.data, arr) + assert var.data.shape == shape @given(st.data()) def test_given_fixed_dims(self, data): dims = ["x", "y"] - var = data.draw(variables(dims=dims)) - assert isinstance(var, Variable) + var = data.draw(variables(dims=st.just(dims))) + assert list(var.dims) == dims @given(st.data()) def test_given_arbitrary_dims(self, data): - dims = data.draw(dimension_names()) + dims = dimension_names(min_ndims=1, max_ndims=1) var = data.draw(variables(dims=dims)) - assert isinstance(var, Variable) - assert list(var.dims) == dims + assert len(list(var.dims)) == 1 @given(st.data()) def test_convert(self, data): - arr = data.draw(np_arrays()) - var = data.draw(variables(data=arr, convert=lambda x: x + 1)) + arr = st.just(np.asarray([1, 2, 3])) + var = data.draw(variables(data=arr, convert=lambda x: x * 2)) - npt.assert_equal(var.data, arr + 1) + npt.assert_equal(var.data, np.asarray([2, 4, 6])) @pytest.mark.xfail From 82c734cc9712fe6c5404676206452115e58ebcc0 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 14 Aug 2022 08:26:52 -0400 Subject: [PATCH 19/95] fixed failure with passing in two custom strategies that must be compatible --- xarray/testing/strategies.py | 12 +++++------- xarray/tests/test_strategies.py | 13 ++++--------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 9ed2f83bfc1..60cca9cbbd6 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -5,6 +5,7 @@ import hypothesis.strategies as st import numpy as np import pandas as pd +from hypothesis import assume import xarray as xr from xarray.core.utils import is_dict_like @@ -163,14 +164,11 @@ def variables( data = draw(np_arrays(shape=draw(valid_shapes))) elif data is not None and dims is not None: - # both data and dims provided -> check both are compatible - # sort of pointless because the xr.Variable constructor will check this anyway + # both data and dims provided -> check drawn examples are compatible data, dims = draw(data), draw(dims) - if len(dims) != data.ndim: - raise ValueError( - "Explicitly provided data must match explicitly provided dims, " - f"but len(dims) = {len(dims)} vs len(data.ndim) = {data.ndim}" - ) + # TODO is there another way to enforce this assumption? + # TODO how do I write a test that checks that the hypothesis Unsatisfiable error will be raised? + assume(data.ndim == len(dims)) else: # nothing provided, so generate everything consistently by drawing dims to match data diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index feef19c25e9..601315f9b29 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -72,18 +72,13 @@ def test_given_fixed_dims_and_fixed_data(self, data): assert list(var.dims) == dims npt.assert_equal(var.data, arr) - with pytest.raises(ValueError, match="data must match"): - data.draw(variables(dims=st.just(["x"]), data=st.just(arr))) - - @pytest.mark.xfail(reason="I don't understand why") @given(st.data()) def test_given_arbitrary_dims_and_arbitrary_data(self, data): - arr = data.draw(np_arrays()) - dims = data.draw(dimension_names()) - var = data.draw(variables(data=arr, dims=dims)) + arrs = np_arrays(shape=(2, 3)) + dims = dimension_names(min_ndims=2) + var = data.draw(variables(data=arrs, dims=dims)) - npt.assert_equal(var.data, arr) - assert var.dims == dims + assert var.shape == (2, 3) @given(st.data()) def test_given_fixed_data(self, data): From 029f19a309c54ee64a20a86135aa8bd0468f61bd Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 14 Aug 2022 11:54:11 -0400 Subject: [PATCH 20/95] syntax error in example --- doc/user-guide/testing.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 7b947a5b151..85eda5efd01 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -109,7 +109,7 @@ over all other aspects, then use ``st.just()``. :okexcept: # Generates only dataarrays with dimensions ["x", "y"] - xrst.dataarrays(dims=st.just(["x", "y"]))).example() + xrst.dataarrays(dims=st.just(["x", "y"])).example() (This is technically another example of chaining strategies - ``hypothesis.strategies.just`` is simply a special strategy that just contains a single example.) @@ -118,4 +118,4 @@ strategy that just contains a single example.) Duck-type Conversion ~~~~~~~~~~~~~~~~~~~~ -# TODO converting to duckarrays \ No newline at end of file +# TODO converting to duckarrays From 46895fe3ea3bcd251f68a428ef78c7d80dc6098d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 14 Aug 2022 21:58:23 -0400 Subject: [PATCH 21/95] allow sizes dict as argument to variables --- doc/api.rst | 4 + xarray/testing/strategies.py | 153 ++++++++++++++++++++++++-------- xarray/tests/test_strategies.py | 48 +++++++++- 3 files changed, 166 insertions(+), 39 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 38717bd964a..581ab74a6f9 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1073,9 +1073,13 @@ Hypothesis Testing Strategies testing.strategies.valid_dtypes testing.strategies.np_arrays + testing.strategies.names testing.strategies.dimension_names + testing.strategies.dimension_sizes testing.strategies.variables + testing.strategies.coordinate_variables testing.strategies.dataarrays + testing.strategies.data_variables testing.strategies.datasets testing.strategies.chunks testing.strategies.chunksizes diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 60cca9cbbd6..01e62c61ecf 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -12,6 +12,19 @@ from . import utils +__all__ = [ + "valid_dtypes", + "np_arrays", + "names", + "dimension_names", + "dimension_sizes", + "variables", + "coordinate_variables", + "dataarrays", + "data_variables", + "datasets", +] + # required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. valid_dtypes: st.SearchStrategy[np.dtype] = ( npst.integer_dtypes() @@ -34,27 +47,6 @@ def elements(dtype) -> st.SearchStrategy[Any]: ) -def numpy_array(shape, dtypes=None) -> st.SearchStrategy[np.ndarray]: - if dtypes is None: - dtypes = all_dtypes - - return dtypes.flatmap( - lambda dtype: npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype)) - ) - - -def dimension_sizes( - min_dims, max_dims, min_size, max_size -) -> st.SearchStrategy[List[Tuple[str, int]]]: - sizes = st.lists( - elements=st.tuples(st.text(min_size=1), st.integers(min_size, max_size)), - min_size=min_dims, - max_size=max_dims, - unique_by=lambda x: x[0], - ) - return sizes - - @st.composite def np_arrays( draw: st.DrawFn, @@ -83,7 +75,7 @@ def np_arrays( return draw(npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype))) -names = st.text(alphabet=string.ascii_lowercase) +names = st.text(alphabet=string.ascii_lowercase, min_size=1) names.__doc__ = """Generates arbitrary string names for dimensions / variables.""" @@ -92,7 +84,7 @@ def dimension_names( max_ndims: int = 3, ) -> st.SearchStrategy[List[str]]: """ - Generates arbitrary lists of valid dimension names. + Generates an arbitrary list of valid dimension names. Parameters ---------- @@ -110,6 +102,42 @@ def dimension_names( ) +def dimension_sizes( + min_ndims: int = 0, + max_ndims: int = 3, + min_length=1, + max_length=None, +) -> st.SearchStrategy[Mapping[str, int]]: + """ + Generates an arbitrary mapping from dimension names to lengths. + + Parameters + ---------- + min_ndims: int, optional + Minimum number of dimensions in generated list. + Default is 1. + max_ndims: int, optional + Maximum number of dimensions in generated list. + Default is 3 + min_length: int, optional + Minimum size of a dimension. + Default is 1. + max_length: int, optional + Minimum size of a dimension. + Default is `min_size + 5` + """ + + if max_length is None: + max_length = min_length + 5 + + return st.dictionaries( + keys=names, + values=st.integers(min_value=min_length, max_value=max_length), + min_size=min_ndims, + max_size=max_ndims, + ) + + # Is there a way to do this in general? # Could make a Protocol... T_Array = Any @@ -119,7 +147,9 @@ def dimension_names( def variables( draw: st.DrawFn, data: st.SearchStrategy[T_Array] = None, - dims: st.SearchStrategy[str] = None, + dims: Union[ + st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] + ] = None, attrs: st.SearchStrategy[Mapping] = None, convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.Variable]: @@ -133,7 +163,7 @@ def variables( Parameters ---------- - data: strategy which generates array-likes, optional + data: Strategy generating array-likes, optional Default is to generate numpy data of arbitrary shape, values and dtype. dims: Strategy which generates sequence of strings, optional Default is to generate arbitrary dimension names for each axis in data. @@ -160,15 +190,29 @@ def variables( elif dims is not None and data is None: # no data -> generate data to match dims dims = draw(dims) - valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) - data = draw(np_arrays(shape=draw(valid_shapes))) + if isinstance(dims, List): + valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) + data = draw(np_arrays(shape=draw(valid_shapes))) + else: + # should be a mapping of form {dim_names: lengths} + shape = tuple(dims.values()) + data = draw(np_arrays(shape=shape)) elif data is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible - data, dims = draw(data), draw(dims) - # TODO is there another way to enforce this assumption? + dims = draw(dims) + + # TODO is there another way to enforce these assumptions? This is very like to fail hypothesis' health checks # TODO how do I write a test that checks that the hypothesis Unsatisfiable error will be raised? - assume(data.ndim == len(dims)) + # TODO or we could just raise in this case? + if isinstance(dims, List): + data = draw(data) + assume(data.ndim == len(dims)) + else: + # should be a mapping of form {dim_names: lengths} + data = draw(data) + shape = tuple(dims.values()) + assume(data.shape == shape) else: # nothing provided, so generate everything consistently by drawing dims to match data @@ -184,21 +228,54 @@ def variables( return xr.Variable(dims=dims, data=convert(data), attrs=attrs) +def subsets_of(l: st.SearchStrategy[List[Any]]) -> st.SearchStrategy[List[Any]]: + + return st.lists(elements=st.sampled_from(l), unique=True) + + +@st.composite +def _alignable_variables( + draw: st.DrawFn, + dims: st.SearchStrategy[List[str]], +) -> st.SearchStrategy[List[xr.Variable]]: + dims = draw(subsets_of(dims)) + sizes = ... + return st.lists(variables(dims=dims)) + + +def coordinate_variables( + dims: st.SearchStrategy[List[str]], +) -> st.SearchStrategy[List[xr.Variable]]: + # TODO specifically generate dimension coordinates + return _alignable_variables(dims) + + @st.composite def dataarrays( draw: st.DrawFn, - data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, + data: st.SearchStrategy[T_Array] = None, coords: Union[ Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable] ] = None, - dims: Union[Sequence[str], st.SearchStrategy[str]] = None, - name: str = None, - attrs: Union[Mapping, st.SearchStrategy[Mapping], None] = None, + dims: st.SearchStrategy[List[str]] = None, + name: st.SearchStrategy[Union[str, None]] = None, + attrs: st.SearchStrategy[Mapping] = None, convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.DataArray]: if name is None: - name = draw(st.none() | st.text(min_size=1)) + name = draw(st.none() | names) + + if data is not None and dims is None: + raise NotImplementedError() + elif data is None and dims is not None: + raise NotImplementedError() + elif data is not None and dims is None: + raise NotImplementedError() + else: + data = draw(np_arrays()) + dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + coords = draw(coordinate_variables(dims=dims)) return xr.DataArray( data=convert(data), @@ -209,6 +286,12 @@ def dataarrays( ) +def data_variables( + dims: st.SearchStrategy[List[str]], +) -> st.SearchStrategy[List[xr.Variable]]: + return _alignable_variables(dims) + + @st.composite def datasets( draw: st.DrawFn, diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 601315f9b29..d3c3266e831 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -8,7 +8,9 @@ from xarray import DataArray, Dataset from xarray.core.variable import Variable from xarray.testing.strategies import ( + dataarrays, dimension_names, + dimension_sizes, np_arrays, valid_dtypes, variables, @@ -58,13 +60,27 @@ def test_fixed_number_of_dims(self, dims): assert len(dims) == 3 +class TestDimensionSizesStrategy: + @given(dimension_sizes()) + def test_types(self, dims): + assert isinstance(dims, dict) + for d, n in dims.items(): + assert isinstance(d, str) + assert isinstance(n, int) + + @given(dimension_sizes(min_ndims=3, max_ndims=3)) + def test_fixed_number_of_dims(self, dims): + assert isinstance(dims, dict) + assert len(dims) == 3 + + class TestVariablesStrategy: @given(variables()) def test_given_nothing(self, var): assert isinstance(var, Variable) @given(st.data()) - def test_given_fixed_dims_and_fixed_data(self, data): + def test_given_fixed_dims_list_and_fixed_data(self, data): dims = ["x", "y"] arr = np.asarray([[1, 2], [3, 4]]) var = data.draw(variables(dims=st.just(dims), data=st.just(arr))) @@ -73,7 +89,7 @@ def test_given_fixed_dims_and_fixed_data(self, data): npt.assert_equal(var.data, arr) @given(st.data()) - def test_given_arbitrary_dims_and_arbitrary_data(self, data): + def test_given_arbitrary_dims_list_and_arbitrary_data(self, data): arrs = np_arrays(shape=(2, 3)) dims = dimension_names(min_ndims=2) var = data.draw(variables(data=arrs, dims=dims)) @@ -96,19 +112,34 @@ def test_given_arbitrary_data(self, data): assert var.data.shape == shape @given(st.data()) - def test_given_fixed_dims(self, data): + def test_given_fixed_dims_list(self, data): dims = ["x", "y"] var = data.draw(variables(dims=st.just(dims))) assert list(var.dims) == dims @given(st.data()) - def test_given_arbitrary_dims(self, data): + def test_given_arbitrary_dims_list(self, data): dims = dimension_names(min_ndims=1, max_ndims=1) var = data.draw(variables(dims=dims)) assert len(list(var.dims)) == 1 + @given(st.data()) + def test_given_fixed_sizes(self, data): + dims = {"x": 3, "y": 4} + var = data.draw(variables(dims=st.just(dims))) + + assert var.dims == ("x", "y") + assert var.shape == (3, 4) + + @given(st.data()) + def test_given_fixed_sizes_and_arbitrary_data(self, data): + arrs = np_arrays(shape=(2, 3)) + var = data.draw(variables(data=arrs, dims=st.just({"x": 2, "y": 3}))) + + assert var.shape == (2, 3) + @given(st.data()) def test_convert(self, data): arr = st.just(np.asarray([1, 2, 3])) @@ -117,6 +148,15 @@ def test_convert(self, data): npt.assert_equal(var.data, np.asarray([2, 4, 6])) +@pytest.mark.xfail +class TestDataArraysStrategy: + @given(dataarrays()) + def test_given_nothing(self, da): + print(da) + assert isinstance(da, DataArray) + assert False + + @pytest.mark.xfail @given(st.data()) def test_chained_chunking_example(data): From 50c62e9ffdf9f6f03db05b54caf60259b5ef91c3 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 14 Aug 2022 23:12:20 -0400 Subject: [PATCH 22/95] copied subsequences_of strategy --- xarray/testing/strategies.py | 64 ++++++++++++++++++++++--- xarray/tests/test_strategies.py | 83 +++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+), 7 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 01e62c61ecf..fcebd06f75b 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,11 +1,23 @@ import string -from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Union +from typing import ( + Any, + Callable, + List, + Mapping, + Optional, + Sequence, + Set, + Tuple, + TypeVar, + Union, +) import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np import pandas as pd from hypothesis import assume +from hypothesis.internal.validation import check_valid_sizes import xarray as xr from xarray.core.utils import is_dict_like @@ -228,18 +240,54 @@ def variables( return xr.Variable(dims=dims, data=convert(data), attrs=attrs) -def subsets_of(l: st.SearchStrategy[List[Any]]) -> st.SearchStrategy[List[Any]]: +El = TypeVar("El") + + +# All from the unfinished PR https://github.com/HypothesisWorks/hypothesis/pull/1533 +# TODO Should move this function upstream by opening new PR +@st.composite +def subsequences_of( + draw: st.DrawFn, + elements: Sequence[El], + min_size: int = 0, + max_size: int = None, +) -> st.SearchStrategy[Sequence[El]]: + """ + Returns a strategy which generates sub-sequences of the input sequence. + + Order is guaranteed to be preserved in the result. + + Parameters + ---------- + elements: Elements from which to construct the subsequence + min_size: int + Minimum size of the returned subsequences. + Default is 0. + max_size: int, optional + Maximum size of the returned subsequences. + Default is the full size of the input sequence. + """ + if max_size is None: + max_size = len(elements) + check_valid_sizes(min_size, max_size) + + def element_mask() -> List[bool]: + num_include = draw(st.integers(min_size, max_size)) + num_exclude = len(elements) - num_include + choices = [True] * num_include + [False] * num_exclude + assert len(elements) == len(choices) + return draw(st.permutations(choices)) - return st.lists(elements=st.sampled_from(l), unique=True) + element_includes = zip(elements, element_mask()) + return sorted(element for element, include in element_includes if include) @st.composite def _alignable_variables( draw: st.DrawFn, - dims: st.SearchStrategy[List[str]], + dim_sizes: st.SearchStrategy[Mapping[str, int]], ) -> st.SearchStrategy[List[xr.Variable]]: - dims = draw(subsets_of(dims)) - sizes = ... + dims = draw(subsequences_of(dim_sizes)) return st.lists(variables(dims=dims)) @@ -257,7 +305,9 @@ def dataarrays( coords: Union[ Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable] ] = None, - dims: st.SearchStrategy[List[str]] = None, + dims: Union[ + st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] + ] = None, name: st.SearchStrategy[Union[str, None]] = None, attrs: st.SearchStrategy[Mapping] = None, convert: Callable[[np.ndarray], T_Array] = lambda a: a, diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index d3c3266e831..bf382919442 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -12,6 +12,7 @@ dimension_names, dimension_sizes, np_arrays, + subsequences_of, valid_dtypes, variables, ) @@ -148,6 +149,88 @@ def test_convert(self, data): npt.assert_equal(var.data, np.asarray([2, 4, 6])) +# All from the unfinished PR https://github.com/HypothesisWorks/hypothesis/pull/1533 +class TestSubsequencesOfStrategy: + @pytest.mark.xfail( + reason="Can't work out how to import assert_no_examples from hypothesis.tests.common.debug" + ) + def test_subsequence_of_empty(self): + sub_seq_strat = st.lists(st.none(), max_size=0) + assert_no_examples(sub_seq_strat) + + @given(st.data(), st.lists(st.integers())) + def test_subsequence_sizing(self, data, seq): + sub_seq_strat = subsequences_of(seq) + sub_seq = data.draw(sub_seq_strat) + + assert isinstance(sub_seq, list) + assert len(sub_seq) <= len(seq) + + @given(st.data(), st.lists(st.integers())) + def test_subsequence_only_original_elements(self, data, seq): + sub_seq_strat = subsequences_of(seq) + sub_seq = data.draw(sub_seq_strat) + + assert isinstance(sub_seq, list) + assert len(sub_seq) <= len(seq) + + @given(st.data(), st.lists(st.integers())) + def test_subsequence_elements_not_over_drawn(self, data, seq): + sub_seq_strat = subsequences_of(seq) + sub_seq = data.draw(sub_seq_strat) + + assert not (set(sub_seq) - set(seq)) + + @given(st.data(), st.lists(st.integers())) + def test_subsequence_original_elements_not_over_produced(self, data, seq): + sub_seq_strat = subsequences_of(seq) + sub_seq = data.draw(sub_seq_strat) + + # Per unique item, check that they don't occur in the subsequence + # more times that they appear in the source. + for item in set(sub_seq): + assert sub_seq.count(item) <= seq.count(item) + + @given(st.data(), st.lists(st.integers())) + def test_subsequence_max_size_constraint(self, data, seq): + max_size_strat = st.integers(min_value=0, max_value=len(seq)) + max_size = data.draw(max_size_strat) + + sub_seq_strat = subsequences_of(seq, max_size=max_size) + sub_seq = data.draw(sub_seq_strat) + + assert len(sub_seq) <= max_size + + @given(st.data(), st.lists(st.integers())) + def test_subsequence_min_size_constraint(self, data, seq): + min_size_strat = st.integers(min_value=0, max_value=len(seq)) + min_size = data.draw(min_size_strat) + + sub_seq_strat = subsequences_of(seq, min_size=min_size) + sub_seq = data.draw(sub_seq_strat) + + assert len(sub_seq) >= min_size + + @given(st.data(), st.lists(st.integers())) + def test_subsequence_min_max_size_constraint(self, data, seq): + min_size_strat = st.integers(min_value=0, max_value=len(seq)) + min_size = data.draw(min_size_strat) + + max_size_strat = st.integers(min_value=min_size, max_value=len(seq)) + max_size = data.draw(max_size_strat) + + sub_seq_strat = subsequences_of(seq, min_size=min_size, max_size=max_size) + sub_seq = data.draw(sub_seq_strat) + + assert min_size <= len(sub_seq) <= max_size + + # this is a new test, important for keeping dimension names in order + @given(st.data(), st.lists(st.integers())) + def test_ordering_preserved(self, data, seq): + subsequence_of_dims = data.draw(subsequences_of(seq)) + assert sorted(subsequence_of_dims) == subsequence_of_dims + + @pytest.mark.xfail class TestDataArraysStrategy: @given(dataarrays()) From e21555a05094790dbf44926f65eb7c6462c3f955 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 15 Aug 2022 13:41:05 -0400 Subject: [PATCH 23/95] coordinate_variables generates non-dimensional coords --- xarray/testing/strategies.py | 91 ++++++++++++++++++++++++++------- xarray/tests/test_strategies.py | 30 +++++++++-- 2 files changed, 99 insertions(+), 22 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index fcebd06f75b..489434eb99b 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -87,7 +87,7 @@ def np_arrays( return draw(npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype))) -names = st.text(alphabet=string.ascii_lowercase, min_size=1) +names = st.text(alphabet=string.ascii_lowercase, min_size=1, max_size=3) names.__doc__ = """Generates arbitrary string names for dimensions / variables.""" @@ -117,8 +117,8 @@ def dimension_names( def dimension_sizes( min_ndims: int = 0, max_ndims: int = 3, - min_length=1, - max_length=None, + min_length: int = 1, + max_length: int = None, ) -> st.SearchStrategy[Mapping[str, int]]: """ Generates an arbitrary mapping from dimension names to lengths. @@ -130,13 +130,13 @@ def dimension_sizes( Default is 1. max_ndims: int, optional Maximum number of dimensions in generated list. - Default is 3 + Default is 3. min_length: int, optional Minimum size of a dimension. Default is 1. max_length: int, optional Minimum size of a dimension. - Default is `min_size + 5` + Default is `min_length` + 5. """ if max_length is None: @@ -243,12 +243,12 @@ def variables( El = TypeVar("El") -# All from the unfinished PR https://github.com/HypothesisWorks/hypothesis/pull/1533 +# Mostly from the unfinished PR https://github.com/HypothesisWorks/hypothesis/pull/1533 # TODO Should move this function upstream by opening new PR @st.composite def subsequences_of( draw: st.DrawFn, - elements: Sequence[El], + elements: Union[Sequence[El], Mapping[str, El]], min_size: int = 0, max_size: int = None, ) -> st.SearchStrategy[Sequence[El]]: @@ -278,24 +278,71 @@ def element_mask() -> List[bool]: assert len(elements) == len(choices) return draw(st.permutations(choices)) - element_includes = zip(elements, element_mask()) - return sorted(element for element, include in element_includes if include) + if isinstance(elements, dict): + element_includes = zip(elements.keys(), elements.values(), element_mask()) + return {k: v for k, v, include in element_includes if include} + else: + element_includes = zip(elements, element_mask()) + return sorted(element for element, include in element_includes if include) @st.composite def _alignable_variables( draw: st.DrawFn, - dim_sizes: st.SearchStrategy[Mapping[str, int]], + dim_sizes: Mapping[str, int], ) -> st.SearchStrategy[List[xr.Variable]]: - dims = draw(subsequences_of(dim_sizes)) - return st.lists(variables(dims=dims)) + """Generates lists of variables with compatible (i.e. alignable) dimensions and sizes.""" + alignable_dim_sizes = subsequences_of(dim_sizes) + # TODO don't hard code max number of variables + return draw(st.lists(variables(dims=alignable_dim_sizes), max_size=3)) +@st.composite def coordinate_variables( - dims: st.SearchStrategy[List[str]], -) -> st.SearchStrategy[List[xr.Variable]]: - # TODO specifically generate dimension coordinates - return _alignable_variables(dims) + draw: st.DrawFn, + dim_sizes: Mapping[str, int], +) -> st.SearchStrategy[Mapping[str, xr.Variable]]: + """ + Generates dicts of alignable Variable objects for use as coordinates. + + Differs from data_variables strategy in that it deliberately creates dimension coordinates + (i.e. 1D variables with the same name as a dimension) as well as non-dimension coordinates. + + Parameters + ---------- + dim_sizes + """ + dim_names = list(dim_sizes.keys()) + + all_coords = {} + + # Possibly generate 1D "dimension coordinates" - explicit possibility not to include amy helps with shrinking + if st.booleans(): + # TODO specifically generate dimension coordinates + # TODO first generate subset of dimension names + # TODO then generate 1D variables for each name + ... + + # Possibly generate ND "non-dimension coordinates" - explicit possibility not to include any helps with shrinking + if st.booleans(): + non_dim_coord_vars = draw(_alignable_variables(dim_sizes=dim_sizes)) + + # can't have same name as a dimension + valid_non_dim_coord_names = names.filter(lambda n: n not in dim_names) + # TODO do I actually need to draw from st.lists for this? + non_dim_coord_names = draw( + st.lists( + valid_non_dim_coord_names, + min_size=len(non_dim_coord_vars), + max_size=len(non_dim_coord_vars), + unique=True, + ) + ) + + non_dim_coords = {n: c for n, c in zip(non_dim_coord_names, non_dim_coord_vars)} + all_coords.update(non_dim_coords) + + return all_coords @st.composite @@ -324,14 +371,16 @@ def dataarrays( raise NotImplementedError() else: data = draw(np_arrays()) - dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) - coords = draw(coordinate_variables(dims=dims)) + dim_names = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} + print(dim_sizes) + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) return xr.DataArray( data=convert(data), coords=coords, name=name, - dims=dims, + dims=dim_names, attrs=attrs, ) @@ -339,6 +388,10 @@ def dataarrays( def data_variables( dims: st.SearchStrategy[List[str]], ) -> st.SearchStrategy[List[xr.Variable]]: + """ + Generates dicts of alignable Variable objects for use as Dataset data variables. + """ + # TODO these shouldn't have the same name as any dimensions or any coordinates... return _alignable_variables(dims) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index bf382919442..57452d345d0 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -5,10 +5,13 @@ import pytest from hypothesis import given -from xarray import DataArray, Dataset +from xarray import DataArray, Dataset, merge from xarray.core.variable import Variable from xarray.testing.strategies import ( + coordinate_variables, + data_variables, dataarrays, + datasets, dimension_names, dimension_sizes, np_arrays, @@ -231,13 +234,34 @@ def test_ordering_preserved(self, data, seq): assert sorted(subsequence_of_dims) == subsequence_of_dims -@pytest.mark.xfail +class TestCoordinateVariablesStrategy: + @given(coordinate_variables(dim_sizes={"x": 2, "y": 3})) + def test_alignable(self, coord_vars): + + # TODO there must be a better way of checking align-ability than this + for v in coord_vars.values(): + if "x" in v.dims: + assert v.sizes["x"] == 2 + if "y" in v.dims: + assert v.sizes["y"] == 3 + if not set(v.dims).issubset({"x", "y"}): + assert False, v + + def test_generates_1d_dim_coords(self): + # TODO having a hypothesis.find(strat, predicate) would be very useful here + # see https://github.com/HypothesisWorks/hypothesis/issues/3436#issuecomment-1212369645 + ... + + def test_generates_non_dim_coords(self, coord_vars): + ... + + +# @pytest.mark.xfail class TestDataArraysStrategy: @given(dataarrays()) def test_given_nothing(self, da): print(da) assert isinstance(da, DataArray) - assert False @pytest.mark.xfail From 1688779c373ae83759df96c750bda55ef77b259d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 15 Aug 2022 14:11:26 -0400 Subject: [PATCH 24/95] dataarrays strategy given nothing working! --- xarray/testing/strategies.py | 17 +++++++++++------ xarray/tests/test_strategies.py | 14 ++++++++++---- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 489434eb99b..acc7c058def 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -310,7 +310,8 @@ def coordinate_variables( Parameters ---------- - dim_sizes + dim_sizes: Mapping of str to int + Sizes of dimensions to use for coordinates. """ dim_names = list(dim_sizes.keys()) @@ -318,10 +319,15 @@ def coordinate_variables( # Possibly generate 1D "dimension coordinates" - explicit possibility not to include amy helps with shrinking if st.booleans(): - # TODO specifically generate dimension coordinates - # TODO first generate subset of dimension names - # TODO then generate 1D variables for each name - ... + # first generate subset of dimension names - these set which dimension coords will be included + dim_coord_names_and_lengths = draw(subsequences_of(dim_sizes)) + + # then generate 1D variables for each name + dim_coords = { + n: draw(variables(dims=st.just({n: l}))) + for n, l in dim_coord_names_and_lengths.items() + } + all_coords.update(dim_coords) # Possibly generate ND "non-dimension coordinates" - explicit possibility not to include any helps with shrinking if st.booleans(): @@ -373,7 +379,6 @@ def dataarrays( data = draw(np_arrays()) dim_names = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} - print(dim_sizes) coords = draw(coordinate_variables(dim_sizes=dim_sizes)) return xr.DataArray( diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 57452d345d0..62bdf0841ab 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -247,20 +247,26 @@ def test_alignable(self, coord_vars): if not set(v.dims).issubset({"x", "y"}): assert False, v + @given(st.data()) + def test_valid_set_of_coords(self, data): + coord_vars = data.draw(coordinate_variables(dim_sizes={"x": 2, "y": 3})) + + arr = data.draw(np_arrays(shape=(2, 3))) + da = DataArray(data=arr, coords=coord_vars, dims=["x", "y"]) + assert isinstance(da, DataArray) + def test_generates_1d_dim_coords(self): - # TODO having a hypothesis.find(strat, predicate) would be very useful here + # TODO having a `hypothesis.find(strat, predicate)` function would be very useful here # see https://github.com/HypothesisWorks/hypothesis/issues/3436#issuecomment-1212369645 ... - def test_generates_non_dim_coords(self, coord_vars): + def test_generates_non_dim_coords(self): ... -# @pytest.mark.xfail class TestDataArraysStrategy: @given(dataarrays()) def test_given_nothing(self, da): - print(da) assert isinstance(da, DataArray) From 0a29d320285d50881d18a357e9f7190f9b8c1a78 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 15 Aug 2022 15:04:18 -0400 Subject: [PATCH 25/95] improved docstrings --- doc/user-guide/testing.rst | 14 ++++++---- xarray/testing/strategies.py | 52 +++++++++++++++++++++++++++++++----- 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 85eda5efd01..75eaa94b857 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -50,13 +50,16 @@ These strategies are accessible in the :py:module::`xarray.testing.strategies` m .. currentmodule:: xarray .. autosummary:: - :toctree: generated/ testing.strategies.valid_dtypes testing.strategies.np_arrays + testing.strategies.names testing.strategies.dimension_names + testing.strategies.dimension_sizes testing.strategies.variables + testing.strategies.coordinate_variables testing.strategies.dataarrays + testing.strategies.data_variables testing.strategies.datasets testing.strategies.chunks testing.strategies.chunksizes @@ -67,14 +70,15 @@ Generating Examples To see an example of what each of these strategies might produce, you can call one followed by the `.example()` method, which is a general hypothesis method valid for all strategies +(TODO we should specify a seed to hypothesis so that the docs generate the same examples on every build) + .. ipython:: python import xarray.testing.strategies as xrst - # TODO change this to dataarray once written - xrst.variables().example() - xrst.variables().example() - xrst.variables().example() + xrst.dataarrays().example() + xrst.dataarrays().example() + xrst.dataarrays().example() You can see that calling `.example()` multiple times will generate different examples, giving you an idea of the wide range of data that the xarray strategies can generate. diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index acc7c058def..a6beb238e37 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -15,7 +15,6 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np -import pandas as pd from hypothesis import assume from hypothesis.internal.validation import check_valid_sizes @@ -169,7 +168,7 @@ def variables( Generates arbitrary xarray.Variable objects. Follows the signature of the xarray.Variable constructor, but you can also pass alternative strategies to generate - either numpy-like array data or dimension names. Passing both at once is forbidden. + either numpy-like array data or dimension names. Passing nothing will generate a completely arbitrary Variable (backed by a numpy array). @@ -177,11 +176,17 @@ def variables( ---------- data: Strategy generating array-likes, optional Default is to generate numpy data of arbitrary shape, values and dtype. - dims: Strategy which generates sequence of strings, optional + dims: Strategy for generating the dimensions, optional + Can either be a strategy for generating a list of string dimension names, + or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. + If provided in the former form the lengths of the returned Variable will either be determined from the + data argument if given or arbitrarily generated if not. Default is to generate arbitrary dimension names for each axis in data. attrs: Strategy which generates dicts, optional convert: Callable Function which accepts one numpy array and returns one numpy-like array of the same shape. + Applied to the data after it is drawn from the `data` strategy provided. + Useful for converting numpy arrays to other types of arrays, e.g. sparse arrays. Default is a no-op. """ @@ -355,9 +360,7 @@ def coordinate_variables( def dataarrays( draw: st.DrawFn, data: st.SearchStrategy[T_Array] = None, - coords: Union[ - Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable] - ] = None, + coords: Mapping[str, xr.Variable] = None, dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, @@ -365,17 +368,54 @@ def dataarrays( attrs: st.SearchStrategy[Mapping] = None, convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.DataArray]: + """ + Generates arbitrary xarray.DataArray objects. + + Follows the basic signature of the xarray.DataArray constructor, but you can also pass alternative strategies to + generate either numpy-like array data, dimensions, or coordinates. + + Passing nothing will generate a completely arbitrary DataArray (backed by a numpy array). + + Parameters + ---------- + data: Strategy generating array-likes, optional + Default is to generate numpy data of arbitrary shape, values and dtype. + coords: Strategy generating mappings from coordinate names to xr.Variables objects, optional + Default is to generate an arbitrary combination of both dimension and non-dimension coordinates, + with sizes matching data and/or dims, but arbitrary names, dtypes, and values. + dims: Strategy for generating the dimensions, optional + Can either be a strategy for generating a list of string dimension names, + or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. + If provided in the former form the lengths of the returned Variable will either be determined from the + data argument if given or arbitrarily generated if not. + Default is to generate arbitrary dimension names for each axis in data. + name: Strategy for generating a string name, optional + Default is to use the `names` strategy, or to create an unnamed DataArray. + attrs: Strategy which generates dicts, optional + convert: Callable + Function which accepts one numpy array and returns one numpy-like array of the same shape. + Applied to the data after it is drawn from the `data` strategy provided. + Useful for converting numpy arrays to other types of arrays, e.g. sparse arrays. + Default is a no-op. + """ if name is None: name = draw(st.none() | names) + if coords is not None: + raise NotImplementedError() + if data is not None and dims is None: raise NotImplementedError() + elif data is None and dims is not None: raise NotImplementedError() + elif data is not None and dims is None: raise NotImplementedError() + else: + # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both data = draw(np_arrays()) dim_names = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} From 3259849835ebd09d8b289e4333e8289c9e3c0a66 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 15 Aug 2022 16:05:25 -0400 Subject: [PATCH 26/95] datasets strategy works (given nothing) --- xarray/testing/strategies.py | 140 ++++++++++++++++++-------------- xarray/tests/test_strategies.py | 8 +- 2 files changed, 87 insertions(+), 61 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index a6beb238e37..d2b14dd7a58 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -19,9 +19,7 @@ from hypothesis.internal.validation import check_valid_sizes import xarray as xr -from xarray.core.utils import is_dict_like -from . import utils __all__ = [ "valid_dtypes", @@ -350,7 +348,7 @@ def coordinate_variables( ) ) - non_dim_coords = {n: c for n, c in zip(non_dim_coord_names, non_dim_coord_vars)} + non_dim_coords = {n: v for n, v in zip(non_dim_coord_names, non_dim_coord_vars)} all_coords.update(non_dim_coords) return all_coords @@ -379,7 +377,7 @@ def dataarrays( Parameters ---------- data: Strategy generating array-likes, optional - Default is to generate numpy data of arbitrary shape, values and dtype. + Default is to generate numpy data of arbitrary shape, values and dtypes. coords: Strategy generating mappings from coordinate names to xr.Variables objects, optional Default is to generate an arbitrary combination of both dimension and non-dimension coordinates, with sizes matching data and/or dims, but arbitrary names, dtypes, and values. @@ -388,13 +386,13 @@ def dataarrays( or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. If provided in the former form the lengths of the returned Variable will either be determined from the data argument if given or arbitrarily generated if not. - Default is to generate arbitrary dimension names for each axis in data. + Default is to generate arbitrary dimension sizes, or arbitrary dimension names for each axis in data. name: Strategy for generating a string name, optional Default is to use the `names` strategy, or to create an unnamed DataArray. attrs: Strategy which generates dicts, optional convert: Callable Function which accepts one numpy array and returns one numpy-like array of the same shape. - Applied to the data after it is drawn from the `data` strategy provided. + Applied to the data after it is drawn from the `data` strategy. Useful for converting numpy arrays to other types of arrays, e.g. sparse arrays. Default is a no-op. """ @@ -430,77 +428,99 @@ def dataarrays( ) +@st.composite def data_variables( - dims: st.SearchStrategy[List[str]], -) -> st.SearchStrategy[List[xr.Variable]]: + draw: st.DrawFn, + dim_sizes: Mapping[str, int], + allowed_names: st.SearchStrategy[str] = None, +) -> st.SearchStrategy[Mapping[str, xr.Variable]]: """ Generates dicts of alignable Variable objects for use as Dataset data variables. + + Parameters + ---------- + dim_sizes: Mapping of str to int + Sizes of dimensions to use for variables. + allowed_names: Strategy generating strings + Allowed names for data variables. Needed to avoid conflict with names of coordinate variables & dimensions. """ # TODO these shouldn't have the same name as any dimensions or any coordinates... - return _alignable_variables(dims) - - -@st.composite -def datasets( - draw: st.DrawFn, - create_data: Callable, - *, - min_dims=1, - max_dims=3, - min_size=1, - max_size=3, - min_vars=1, - max_vars=3, -) -> st.SearchStrategy[xr.Dataset]: - - dtypes = st.just(draw(valid_dtypes)) - names = st.text(min_size=1) - sizes = dimension_sizes( - min_size=min_size, max_size=max_size, min_dims=min_dims, max_dims=max_dims - ) + vars = draw(_alignable_variables(dim_sizes=dim_sizes)) + dim_names = list(dim_sizes.keys()) - data_vars = sizes.flatmap( - lambda s: st.dictionaries( - keys=names.filter(lambda n: n not in dict(s)), - values=variables(create_data, sizes=s, dtypes=dtypes), - min_size=min_vars, - max_size=max_vars, + # can't have same name as a dimension + # TODO this is also used in coordinate_variables so refactor it out into separate function + valid_var_names = allowed_names.filter(lambda n: n not in dim_names) + # TODO do I actually need to draw from st.lists for this? + var_names = draw( + st.lists( + valid_var_names, + min_size=len(vars), + max_size=len(vars), + unique=True, ) ) - return xr.Dataset(data_vars=draw(data_vars)) + data_vars = {n: v for n, v in zip(var_names, vars)} + return data_vars -def valid_axis(ndim) -> st.SearchStrategy[Union[None, int]]: - if ndim == 0: - return st.none() | st.just(0) - return st.none() | st.integers(-ndim, ndim - 1) - - -def valid_axes(ndim) -> st.SearchStrategy[Union[None, int, Tuple[int, ...]]]: - return valid_axis(ndim) | npst.valid_tuple_axes(ndim, min_size=1) +@st.composite +def datasets( + draw: st.DrawFn, + data_vars: st.SearchStrategy[Mapping[str, xr.Variable]] = None, + coords: Mapping[str, xr.Variable] = None, + dims: Union[ + st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] + ] = None, + attrs: st.SearchStrategy[Mapping] = None, + convert: Callable[[np.ndarray], T_Array] = lambda a: a, +) -> st.SearchStrategy[xr.Dataset]: + """ + Generates arbitrary xarray.Dataset objects. + Follows the basic signature of the xarray.Dataset constructor, but you can also pass alternative strategies to + generate either numpy-like array data variables, dimensions, or coordinates. -def valid_dim(dims) -> st.SearchStrategy[str]: - if not isinstance(dims, list): - dims = [dims] + Passing nothing will generate a completely arbitrary Dataset (backed by numpy arrays). - ndim = len(dims) - axis = valid_axis(ndim) - return axis.map(lambda axes: utils.valid_dims_from_axes(dims, axes)) + Parameters + ---------- + data_vars: Strategy generating mappings from variable names to xr.Variable objects, optional + Default is to generate an arbitrary combination of compatible variables with sizes matching dims, + but arbitrary names, dtypes, and values. + coords: Strategy generating mappings from coordinate names to xr.Variable objects, optional + Default is to generate an arbitrary combination of both dimension and non-dimension coordinates, + with sizes matching data_vars and/or dims, but arbitrary names, dtypes, and values. + dims: Strategy for generating the dimensions, optional + Can either be a strategy for generating a list of string dimension names, + or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. + If provided in the former form the lengths of the returned Variable will either be determined from the + data argument if given or arbitrarily generated if not. + Default is to generate arbitrary dimension sizes. + attrs: Strategy which generates dicts, optional + convert: Callable + Function which accepts one numpy array and returns one numpy-like array of the same shape. + Applied to the data variables after they are drawn from the `data_vars` strategy. + Useful for converting numpy arrays to other types of arrays, e.g. sparse arrays. + Default is a no-op. + """ + if any(arg is not None for arg in [data_vars, coords, dims, attrs]): + raise NotImplementedError() + else: + # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both + dim_sizes = draw(dimension_sizes()) + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + coord_names = list(coords.keys()) + data_var_names = names.filter(lambda n: n not in coord_names) + data_vars = draw( + data_variables(dim_sizes=dim_sizes, allowed_names=data_var_names) + ) -def valid_dims(dims) -> st.SearchStrategy[xr.DataArray]: - if is_dict_like(dims): - dims = list(dims.keys()) - elif isinstance(dims, tuple): - dims = list(dims) - elif not isinstance(dims, list): - dims = [dims] + # TODO convert data_vars - ndim = len(dims) - axes = valid_axes(ndim) - return axes.map(lambda axes: utils.valid_dims_from_axes(dims, axes)) + return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) @st.composite diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 62bdf0841ab..ade995be2c0 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -5,7 +5,7 @@ import pytest from hypothesis import given -from xarray import DataArray, Dataset, merge +from xarray import DataArray, Dataset from xarray.core.variable import Variable from xarray.testing.strategies import ( coordinate_variables, @@ -270,6 +270,12 @@ def test_given_nothing(self, da): assert isinstance(da, DataArray) +class TestDatasetsStrategy: + @given(datasets()) + def test_given_nothing(self, ds): + assert isinstance(ds, Dataset) + + @pytest.mark.xfail @given(st.data()) def test_chained_chunking_example(data): From d76e5b6b6efc908a1eea0eb55e999133fcc0642a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Aug 2022 20:08:37 +0000 Subject: [PATCH 27/95] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/testing/strategies.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index d2b14dd7a58..a327db8b365 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -20,7 +20,6 @@ import xarray as xr - __all__ = [ "valid_dtypes", "np_arrays", From c25940c1820c4bedb0c832d0fa3b2c94b5dd6cff Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 15 Aug 2022 20:46:34 -0400 Subject: [PATCH 28/95] pass dims or data to dataarrays() strategy --- doc/user-guide/testing.rst | 52 +++++++++++++++++++++++---------- xarray/testing/strategies.py | 43 +++++++++++++++++++++++---- xarray/tests/test_strategies.py | 30 +++++++++++++++++++ 3 files changed, 105 insertions(+), 20 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 75eaa94b857..b56bb457f99 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -12,13 +12,6 @@ Testing your code np.random.seed(123456) -.. _asserts: - -Asserts -------- - -TODO - .. _hypothesis: Hypothesis testing @@ -67,8 +60,8 @@ These strategies are accessible in the :py:module::`xarray.testing.strategies` m Generating Examples ~~~~~~~~~~~~~~~~~~~ -To see an example of what each of these strategies might produce, you can call one followed by the `.example()` method, -which is a general hypothesis method valid for all strategies +To see an example of what each of these strategies might produce, you can call one followed by the ``.example()`` method, +which is a general hypothesis method valid for all strategies. (TODO we should specify a seed to hypothesis so that the docs generate the same examples on every build) @@ -80,14 +73,22 @@ which is a general hypothesis method valid for all strategies xrst.dataarrays().example() xrst.dataarrays().example() -You can see that calling `.example()` multiple times will generate different examples, giving you an idea of the wide +You can see that calling ``.example()`` multiple times will generate different examples, giving you an idea of the wide range of data that the xarray strategies can generate. -# TODO simple test example +In your tests however you should not use ``.example()`` - instead you should parameterize your tests with the +``hypothesis.given`` decorator: + +# TODO finishsimple test example .. ipython:: python - import hypothesis.strategies as st + from hypothesis import given + + @given(xrst.dataarrays()) + def test_something(da): + ... + Chaining Strategies ~~~~~~~~~~~~~~~~~~~ @@ -96,8 +97,12 @@ Xarray's strategies can accept other strategies as arguments, allowing you to cu examples. .. ipython:: python + :okexcept: - xrst.variables(data=xrst.np_arrays(shape=(3, 4))) + # generate a DataArray with shape (3, 4), but all other details still arbitrary + xrst.dataarrays( + data=xrst.np_arrays(shape=(3, 4), dtype=np.dtype("int32")) + ).example() This also works with strategies defined in other packages, for example the ``chunks`` strategy defined in ``dask.array.strategies``. @@ -107,10 +112,11 @@ Fixing Arguments ~~~~~~~~~~~~~~~~ If you want to fix one aspect of the data structure, whilst allowing variation in the generated examples -over all other aspects, then use ``st.just()``. +over all other aspects, then use ``hypothesis.strategies.just()``. .. ipython:: python - :okexcept: + + import hypothesis.strategies as st # Generates only dataarrays with dimensions ["x", "y"] xrst.dataarrays(dims=st.just(["x", "y"])).example() @@ -118,6 +124,22 @@ over all other aspects, then use ``st.just()``. (This is technically another example of chaining strategies - ``hypothesis.strategies.just`` is simply a special strategy that just contains a single example.) +To fix the length of dimensions you can instead pass `dims` as a mapping of dimension names to lengths +(i.e. following xarray object's ``.sizes()`` property), e.g. + +.. ipython:: python + + # Generates only dataarrays with dimensions ["x", "y"], of lengths 2 & 3 respectively + xrst.dataarrays(dims=st.just({"x": 2, "y": 3})).example() + +You can also use this to specify that you want examples which are missing some part of the data structure, for instance + +.. ipython:: python + :okexcept: + + # Generates only dataarrays with no coordinates + xrst.dataarrays(coords=st.just({})).example() + Duck-type Conversion ~~~~~~~~~~~~~~~~~~~~ diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index d2b14dd7a58..cf7c08f7d0d 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -20,7 +20,6 @@ import xarray as xr - __all__ = [ "valid_dtypes", "np_arrays", @@ -286,6 +285,7 @@ def element_mask() -> List[bool]: return {k: v for k, v, include in element_includes if include} else: element_includes = zip(elements, element_mask()) + # TODO this sorted call doesn't actually guarantee elements are sorted in same order they were supplied in return sorted(element for element, include in element_includes if include) @@ -404,13 +404,44 @@ def dataarrays( raise NotImplementedError() if data is not None and dims is None: - raise NotImplementedError() + # no dims -> generate dims to match data + data = draw(data) + dim_names = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) elif data is None and dims is not None: - raise NotImplementedError() + # no data -> generate data to match dims + dims = draw(dims) + if isinstance(dims, List): + dim_names = dims + valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) + data = draw(np_arrays(shape=draw(valid_shapes))) + dim_sizes = {n: l for n, l in zip(dims, data.shape)} + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - elif data is not None and dims is None: - raise NotImplementedError() + else: + # should be a mapping of form {dim_names: lengths} + dim_names, shape = list(dims.keys()), tuple(dims.values()) + data = draw(np_arrays(shape=shape)) + coords = draw(coordinate_variables(dim_sizes=dims)) + + elif data is not None and dims is not None: + # both data and dims provided -> check drawn examples are compatible + dims = draw(dims) + if isinstance(dims, List): + dim_names = dims + data = draw(data) + assume(data.ndim == len(dims)) + dim_sizes = {n: l for n, l in zip(dims, data.shape)} + else: + # should be a mapping of form {dim_names: lengths} + data = draw(data) + dim_sizes = dims + dim_names, shape = list(dims.keys()), tuple(dims.values()) + assume(data.shape == shape) + + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) else: # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both @@ -511,9 +542,11 @@ def datasets( else: # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both dim_sizes = draw(dimension_sizes()) + # TODO allow for no coordinate variables coords = draw(coordinate_variables(dim_sizes=dim_sizes)) coord_names = list(coords.keys()) data_var_names = names.filter(lambda n: n not in coord_names) + # TODO allow for no data variables data_vars = draw( data_variables(dim_sizes=dim_sizes, allowed_names=data_var_names) ) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index ade995be2c0..8d9b3e085de 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -269,6 +269,36 @@ class TestDataArraysStrategy: def test_given_nothing(self, da): assert isinstance(da, DataArray) + @given(st.data()) + def test_given_dims(self, data): + da = data.draw(dataarrays(dims=st.just(["x", "y"]))) + assert da.dims == ("x", "y") + + da = data.draw(dataarrays(dims=st.just({"x": 2, "y": 3}))) + assert da.sizes == {"x": 2, "y": 3} + + @given(st.data()) + def test_given_data(self, data): + shape = (2, 3) + arrs = np_arrays(shape=shape) + da = data.draw(dataarrays(data=arrs)) + + assert da.shape == shape + + @given(st.data()) + def test_given_data_and_dims(self, data): + arrs = np_arrays(shape=(2, 3)) + dims = dimension_names(min_ndims=2) + da = data.draw(dataarrays(data=arrs, dims=dims)) + + assert da.shape == (2, 3) + + arrs = np_arrays(shape=(3, 4)) + dims = st.just({"x": 3, "y": 4}) + da = data.draw(dataarrays(data=arrs, dims=dims)) + + assert da.sizes == {"x": 3, "y": 4} + class TestDatasetsStrategy: @given(datasets()) From cd7b0654b7f9f8fbbc12dfb49e4b71218e247542 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 16 Aug 2022 09:58:22 -0400 Subject: [PATCH 29/95] importorskip hypothesis in tests --- xarray/testing/strategies.py | 20 ++++++++++++++++++++ xarray/tests/test_strategies.py | 8 ++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index cf7c08f7d0d..ebd9532de40 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -46,6 +46,8 @@ def elements(dtype) -> st.SearchStrategy[Any]: """ Generates scalar elements to go in a numpy-like array. + + Requires the hypothesis package to be installed. """ max_value = 100 min_value = 0 if dtype.kind == "u" else -max_value @@ -64,6 +66,8 @@ def np_arrays( """ Generates arbitrary numpy arrays with xarray-compatible dtypes. + Requires the hypothesis package to be installed. + Parameters ---------- shape @@ -94,6 +98,8 @@ def dimension_names( """ Generates an arbitrary list of valid dimension names. + Requires the hypothesis package to be installed. + Parameters ---------- min_ndims @@ -119,6 +125,8 @@ def dimension_sizes( """ Generates an arbitrary mapping from dimension names to lengths. + Requires the hypothesis package to be installed. + Parameters ---------- min_ndims: int, optional @@ -169,6 +177,8 @@ def variables( Passing nothing will generate a completely arbitrary Variable (backed by a numpy array). + Requires the hypothesis package to be installed. + Parameters ---------- data: Strategy generating array-likes, optional @@ -259,6 +269,8 @@ def subsequences_of( Order is guaranteed to be preserved in the result. + Requires the hypothesis package to be installed. + Parameters ---------- elements: Elements from which to construct the subsequence @@ -311,6 +323,8 @@ def coordinate_variables( Differs from data_variables strategy in that it deliberately creates dimension coordinates (i.e. 1D variables with the same name as a dimension) as well as non-dimension coordinates. + Requires the hypothesis package to be installed. + Parameters ---------- dim_sizes: Mapping of str to int @@ -374,6 +388,8 @@ def dataarrays( Passing nothing will generate a completely arbitrary DataArray (backed by a numpy array). + Requires the hypothesis package to be installed. + Parameters ---------- data: Strategy generating array-likes, optional @@ -468,6 +484,8 @@ def data_variables( """ Generates dicts of alignable Variable objects for use as Dataset data variables. + Requires the hypothesis package to be installed. + Parameters ---------- dim_sizes: Mapping of str to int @@ -515,6 +533,8 @@ def datasets( Passing nothing will generate a completely arbitrary Dataset (backed by numpy arrays). + Requires the hypothesis package to be installed. + Parameters ---------- data_vars: Strategy generating mappings from variable names to xr.Variable objects, optional diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 8d9b3e085de..b85342b0902 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -1,8 +1,12 @@ -import hypothesis.extra.numpy as npst -import hypothesis.strategies as st import numpy as np import numpy.testing as npt import pytest + +pytest.importorskip("hypothesis") +# isort: split + +import hypothesis.extra.numpy as npst +import hypothesis.strategies as st from hypothesis import given from xarray import DataArray, Dataset From 8e548b13518b1db31e48740ae088f952e4912a1c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Aug 2022 14:00:20 +0000 Subject: [PATCH 30/95] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/user-guide/testing.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index b56bb457f99..40eb5f99f05 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -85,6 +85,7 @@ In your tests however you should not use ``.example()`` - instead you should par from hypothesis import given + @given(xrst.dataarrays()) def test_something(da): ... From d1487d481da0292c122ee51a7383f0fe3a4079e7 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 16 Aug 2022 10:49:57 -0400 Subject: [PATCH 31/95] added warning about inefficient example generation --- doc/user-guide/testing.rst | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index b56bb457f99..da0d5ac26de 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -85,6 +85,8 @@ In your tests however you should not use ``.example()`` - instead you should par from hypothesis import given +.. ipython:: python + @given(xrst.dataarrays()) def test_something(da): ... @@ -107,6 +109,35 @@ examples. This also works with strategies defined in other packages, for example the ``chunks`` strategy defined in ``dask.array.strategies``. +.. warning:: + Passing multiple different strategies to the same constructor can lead to poor example generation performance. + + This is because in order to construct a valid xarray object to return, our strategies must check that the + variables / dimensions / coordinates are mutually compatible. We do this using ``hypothesis.assume``, which throws + away any generated examples not meeting the required condition. + + Therefore if you pass multiple custom strategies to a strategy constructor which are not compatible in enough cases, + most of the examples they generate will be mutually incompatible. This will likely lead to poor example generation + performance, manifesting as a ``hypothesis.errors.FailedHealthCheck`` being raised. For example: + + .. code-block:: + + @given(st.data()) + def test_something_else_inefficiently(data): + arrs = xrst.np_arrays() # generates arrays of any shape + dims = xrst.dimension_names() # generates lists of any number of dimensions + + # Drawing examples from this strategy is likely to have poor performance + var = data.draw(xrst.variables(data=arrs, dims=dims)) + + assert ... + + Here we have passed custom strategies which won't often be compatible: only rarely will the array's ``ndims`` + correspond to the number of dimensions drawn. + + To avoid this problem either allow xarray's strategies to automatically generate compatible data for you, or be more + selective about cases when passing multiple custom strategies to the same constructor. + Fixing Arguments ~~~~~~~~~~~~~~~~ @@ -125,7 +156,7 @@ over all other aspects, then use ``hypothesis.strategies.just()``. strategy that just contains a single example.) To fix the length of dimensions you can instead pass `dims` as a mapping of dimension names to lengths -(i.e. following xarray object's ``.sizes()`` property), e.g. +(i.e. following xarray objects' ``.sizes()`` property), e.g. .. ipython:: python From 8bac610e25fa39d80065599533467b6079a68b0f Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 17 Aug 2022 15:41:25 -0400 Subject: [PATCH 32/95] remove TODO about deterministic examples in docs --- doc/user-guide/testing.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index da0d5ac26de..a26a342f4fc 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -63,8 +63,6 @@ Generating Examples To see an example of what each of these strategies might produce, you can call one followed by the ``.example()`` method, which is a general hypothesis method valid for all strategies. -(TODO we should specify a seed to hypothesis so that the docs generate the same examples on every build) - .. ipython:: python import xarray.testing.strategies as xrst From cf3beb58d33ed369ab587323354bd14373d18c03 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 17 Aug 2022 15:44:50 -0400 Subject: [PATCH 33/95] un-restrict names strategy --- xarray/testing/strategies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index ebd9532de40..a8c8ce32253 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -87,7 +87,7 @@ def np_arrays( return draw(npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype))) -names = st.text(alphabet=string.ascii_lowercase, min_size=1, max_size=3) +names = st.text(st.characters(), min_size=1) names.__doc__ = """Generates arbitrary string names for dimensions / variables.""" From d991357860c1395c8aefb7d73c6508107c1a29b1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Aug 2022 19:46:58 +0000 Subject: [PATCH 34/95] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/testing/strategies.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index a8c8ce32253..a663203a713 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,3 @@ -import string from typing import ( Any, Callable, From a6405cff9e9137f72dfbd506de46b33c7ef66a25 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 17 Aug 2022 15:53:38 -0400 Subject: [PATCH 35/95] removed convert kwarg --- doc/user-guide/testing.rst | 6 +++--- xarray/testing/strategies.py | 38 +++------------------------------ xarray/tests/test_strategies.py | 7 ------ 3 files changed, 6 insertions(+), 45 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index a26a342f4fc..018c17c5aad 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -170,7 +170,7 @@ You can also use this to specify that you want examples which are missing some p xrst.dataarrays(coords=st.just({})).example() -Duck-type Conversion -~~~~~~~~~~~~~~~~~~~~ +Creating Duck-type Arrays +~~~~~~~~~~~~~~~~~~~~~~~~~ -# TODO converting to duckarrays +# TODO creating duckarrays by passing custom strategies to data arg diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index a8c8ce32253..dae42c38d77 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,16 +1,4 @@ -import string -from typing import ( - Any, - Callable, - List, - Mapping, - Optional, - Sequence, - Set, - Tuple, - TypeVar, - Union, -) +from typing import Any, List, Mapping, Optional, Sequence, Set, Tuple, TypeVar, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -167,7 +155,6 @@ def variables( st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, attrs: st.SearchStrategy[Mapping] = None, - convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.Variable]: """ Generates arbitrary xarray.Variable objects. @@ -190,11 +177,6 @@ def variables( data argument if given or arbitrarily generated if not. Default is to generate arbitrary dimension names for each axis in data. attrs: Strategy which generates dicts, optional - convert: Callable - Function which accepts one numpy array and returns one numpy-like array of the same shape. - Applied to the data after it is drawn from the `data` strategy provided. - Useful for converting numpy arrays to other types of arrays, e.g. sparse arrays. - Default is a no-op. """ if any( @@ -249,7 +231,7 @@ def variables( # TODO autogenerate some attributes ... - return xr.Variable(dims=dims, data=convert(data), attrs=attrs) + return xr.Variable(dims=dims, data=data, attrs=attrs) El = TypeVar("El") @@ -378,7 +360,6 @@ def dataarrays( ] = None, name: st.SearchStrategy[Union[str, None]] = None, attrs: st.SearchStrategy[Mapping] = None, - convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.DataArray]: """ Generates arbitrary xarray.DataArray objects. @@ -406,11 +387,6 @@ def dataarrays( name: Strategy for generating a string name, optional Default is to use the `names` strategy, or to create an unnamed DataArray. attrs: Strategy which generates dicts, optional - convert: Callable - Function which accepts one numpy array and returns one numpy-like array of the same shape. - Applied to the data after it is drawn from the `data` strategy. - Useful for converting numpy arrays to other types of arrays, e.g. sparse arrays. - Default is a no-op. """ if name is None: @@ -467,7 +443,7 @@ def dataarrays( coords = draw(coordinate_variables(dim_sizes=dim_sizes)) return xr.DataArray( - data=convert(data), + data=data, coords=coords, name=name, dims=dim_names, @@ -523,7 +499,6 @@ def datasets( st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, attrs: st.SearchStrategy[Mapping] = None, - convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.Dataset]: """ Generates arbitrary xarray.Dataset objects. @@ -550,11 +525,6 @@ def datasets( data argument if given or arbitrarily generated if not. Default is to generate arbitrary dimension sizes. attrs: Strategy which generates dicts, optional - convert: Callable - Function which accepts one numpy array and returns one numpy-like array of the same shape. - Applied to the data variables after they are drawn from the `data_vars` strategy. - Useful for converting numpy arrays to other types of arrays, e.g. sparse arrays. - Default is a no-op. """ if any(arg is not None for arg in [data_vars, coords, dims, attrs]): @@ -571,8 +541,6 @@ def datasets( data_variables(dim_sizes=dim_sizes, allowed_names=data_var_names) ) - # TODO convert data_vars - return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index b85342b0902..25034280814 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -148,13 +148,6 @@ def test_given_fixed_sizes_and_arbitrary_data(self, data): assert var.shape == (2, 3) - @given(st.data()) - def test_convert(self, data): - arr = st.just(np.asarray([1, 2, 3])) - var = data.draw(variables(data=arr, convert=lambda x: x * 2)) - - npt.assert_equal(var.data, np.asarray([2, 4, 6])) - # All from the unfinished PR https://github.com/HypothesisWorks/hypothesis/pull/1533 class TestSubsequencesOfStrategy: From 3609a3404fb78cf5f59ef7e348055d351234339e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Aug 2022 19:55:50 +0000 Subject: [PATCH 36/95] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/testing/strategies.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 1b57b09a579..dae42c38d77 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,14 +1,4 @@ -from typing import ( - Any, - List, - Mapping, - Optional, - Sequence, - Set, - Tuple, - TypeVar, - Union, -) +from typing import Any, List, Mapping, Optional, Sequence, Set, Tuple, TypeVar, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st From 63ad529d4b02e3aeebed16f48c3791b6149700e0 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 17 Aug 2022 16:47:04 -0400 Subject: [PATCH 37/95] avoid using subsequences_of --- xarray/testing/strategies.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 1b57b09a579..37ecf5e00d1 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,14 +1,4 @@ -from typing import ( - Any, - List, - Mapping, - Optional, - Sequence, - Set, - Tuple, - TypeVar, - Union, -) +from typing import Any, List, Mapping, Optional, Sequence, Set, Tuple, TypeVar, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -299,9 +289,17 @@ def _alignable_variables( dim_sizes: Mapping[str, int], ) -> st.SearchStrategy[List[xr.Variable]]: """Generates lists of variables with compatible (i.e. alignable) dimensions and sizes.""" - alignable_dim_sizes = subsequences_of(dim_sizes) + + # TODO refactor this out into separate function + if dim_sizes: + dims = list(dim_sizes.keys()) + subset_dims = draw(st.lists(st.sampled_from(dims), unique=True)) + alignable_dim_sizes = {d: dim_sizes[d] for d in subset_dims} + else: + alignable_dim_sizes = {} + # TODO don't hard code max number of variables - return draw(st.lists(variables(dims=alignable_dim_sizes), max_size=3)) + return draw(st.lists(variables(dims=st.just(alignable_dim_sizes)), max_size=3)) @st.composite @@ -327,9 +325,10 @@ def coordinate_variables( all_coords = {} # Possibly generate 1D "dimension coordinates" - explicit possibility not to include amy helps with shrinking - if st.booleans(): + if dim_names and st.booleans(): # first generate subset of dimension names - these set which dimension coords will be included - dim_coord_names_and_lengths = draw(subsequences_of(dim_sizes)) + subset_dims = draw(st.lists(st.sampled_from(dim_names), unique=True)) + dim_coord_names_and_lengths = {d: dim_sizes[d] for d in subset_dims} # then generate 1D variables for each name dim_coords = { @@ -344,7 +343,6 @@ def coordinate_variables( # can't have same name as a dimension valid_non_dim_coord_names = names.filter(lambda n: n not in dim_names) - # TODO do I actually need to draw from st.lists for this? non_dim_coord_names = draw( st.lists( valid_non_dim_coord_names, From 4ffbcbd5e5726060bb02d4fee9ed504b6e93b19e Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 17 Aug 2022 16:59:11 -0400 Subject: [PATCH 38/95] refactored into separate function for unique subset of dims --- xarray/testing/strategies.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 37ecf5e00d1..71edbc1fc75 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,15 @@ -from typing import Any, List, Mapping, Optional, Sequence, Set, Tuple, TypeVar, Union +from typing import ( + Any, + Hashable, + List, + Mapping, + Optional, + Sequence, + Set, + Tuple, + TypeVar, + Union, +) import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -283,6 +294,14 @@ def element_mask() -> List[bool]: return sorted(element for element, include in element_includes if include) +@st.composite +def _unique_subset_of( + draw: st.DrawFn, d: Mapping[Hashable, Any] +) -> st.SearchStrategy[Mapping[Hashable, Any]]: + subset_keys = draw(st.lists(st.sampled_from(list(d.keys())), unique=True)) + return {k: d[k] for k in subset_keys} + + @st.composite def _alignable_variables( draw: st.DrawFn, @@ -290,13 +309,7 @@ def _alignable_variables( ) -> st.SearchStrategy[List[xr.Variable]]: """Generates lists of variables with compatible (i.e. alignable) dimensions and sizes.""" - # TODO refactor this out into separate function - if dim_sizes: - dims = list(dim_sizes.keys()) - subset_dims = draw(st.lists(st.sampled_from(dims), unique=True)) - alignable_dim_sizes = {d: dim_sizes[d] for d in subset_dims} - else: - alignable_dim_sizes = {} + alignable_dim_sizes = draw(_unique_subset_of(dim_sizes)) if dim_sizes else {} # TODO don't hard code max number of variables return draw(st.lists(variables(dims=st.just(alignable_dim_sizes)), max_size=3)) @@ -327,8 +340,7 @@ def coordinate_variables( # Possibly generate 1D "dimension coordinates" - explicit possibility not to include amy helps with shrinking if dim_names and st.booleans(): # first generate subset of dimension names - these set which dimension coords will be included - subset_dims = draw(st.lists(st.sampled_from(dim_names), unique=True)) - dim_coord_names_and_lengths = {d: dim_sizes[d] for d in subset_dims} + dim_coord_names_and_lengths = draw(_unique_subset_of(dim_sizes)) # then generate 1D variables for each name dim_coords = { From 469482df933beefa322b8b2ab98ad452311a29f5 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 17 Aug 2022 17:01:40 -0400 Subject: [PATCH 39/95] removed subsequences_of --- xarray/testing/strategies.py | 62 +----------------------- xarray/tests/test_strategies.py | 83 --------------------------------- 2 files changed, 1 insertion(+), 144 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 71edbc1fc75..f6a9a7ea575 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,15 +1,4 @@ -from typing import ( - Any, - Hashable, - List, - Mapping, - Optional, - Sequence, - Set, - Tuple, - TypeVar, - Union, -) +from typing import Any, Hashable, List, Mapping, Optional, Set, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -245,55 +234,6 @@ def variables( return xr.Variable(dims=dims, data=data, attrs=attrs) -El = TypeVar("El") - - -# Mostly from the unfinished PR https://github.com/HypothesisWorks/hypothesis/pull/1533 -# TODO Should move this function upstream by opening new PR -@st.composite -def subsequences_of( - draw: st.DrawFn, - elements: Union[Sequence[El], Mapping[str, El]], - min_size: int = 0, - max_size: int = None, -) -> st.SearchStrategy[Sequence[El]]: - """ - Returns a strategy which generates sub-sequences of the input sequence. - - Order is guaranteed to be preserved in the result. - - Requires the hypothesis package to be installed. - - Parameters - ---------- - elements: Elements from which to construct the subsequence - min_size: int - Minimum size of the returned subsequences. - Default is 0. - max_size: int, optional - Maximum size of the returned subsequences. - Default is the full size of the input sequence. - """ - if max_size is None: - max_size = len(elements) - check_valid_sizes(min_size, max_size) - - def element_mask() -> List[bool]: - num_include = draw(st.integers(min_size, max_size)) - num_exclude = len(elements) - num_include - choices = [True] * num_include + [False] * num_exclude - assert len(elements) == len(choices) - return draw(st.permutations(choices)) - - if isinstance(elements, dict): - element_includes = zip(elements.keys(), elements.values(), element_mask()) - return {k: v for k, v, include in element_includes if include} - else: - element_includes = zip(elements, element_mask()) - # TODO this sorted call doesn't actually guarantee elements are sorted in same order they were supplied in - return sorted(element for element, include in element_includes if include) - - @st.composite def _unique_subset_of( draw: st.DrawFn, d: Mapping[Hashable, Any] diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 25034280814..dd6975dc1b9 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -19,7 +19,6 @@ dimension_names, dimension_sizes, np_arrays, - subsequences_of, valid_dtypes, variables, ) @@ -149,88 +148,6 @@ def test_given_fixed_sizes_and_arbitrary_data(self, data): assert var.shape == (2, 3) -# All from the unfinished PR https://github.com/HypothesisWorks/hypothesis/pull/1533 -class TestSubsequencesOfStrategy: - @pytest.mark.xfail( - reason="Can't work out how to import assert_no_examples from hypothesis.tests.common.debug" - ) - def test_subsequence_of_empty(self): - sub_seq_strat = st.lists(st.none(), max_size=0) - assert_no_examples(sub_seq_strat) - - @given(st.data(), st.lists(st.integers())) - def test_subsequence_sizing(self, data, seq): - sub_seq_strat = subsequences_of(seq) - sub_seq = data.draw(sub_seq_strat) - - assert isinstance(sub_seq, list) - assert len(sub_seq) <= len(seq) - - @given(st.data(), st.lists(st.integers())) - def test_subsequence_only_original_elements(self, data, seq): - sub_seq_strat = subsequences_of(seq) - sub_seq = data.draw(sub_seq_strat) - - assert isinstance(sub_seq, list) - assert len(sub_seq) <= len(seq) - - @given(st.data(), st.lists(st.integers())) - def test_subsequence_elements_not_over_drawn(self, data, seq): - sub_seq_strat = subsequences_of(seq) - sub_seq = data.draw(sub_seq_strat) - - assert not (set(sub_seq) - set(seq)) - - @given(st.data(), st.lists(st.integers())) - def test_subsequence_original_elements_not_over_produced(self, data, seq): - sub_seq_strat = subsequences_of(seq) - sub_seq = data.draw(sub_seq_strat) - - # Per unique item, check that they don't occur in the subsequence - # more times that they appear in the source. - for item in set(sub_seq): - assert sub_seq.count(item) <= seq.count(item) - - @given(st.data(), st.lists(st.integers())) - def test_subsequence_max_size_constraint(self, data, seq): - max_size_strat = st.integers(min_value=0, max_value=len(seq)) - max_size = data.draw(max_size_strat) - - sub_seq_strat = subsequences_of(seq, max_size=max_size) - sub_seq = data.draw(sub_seq_strat) - - assert len(sub_seq) <= max_size - - @given(st.data(), st.lists(st.integers())) - def test_subsequence_min_size_constraint(self, data, seq): - min_size_strat = st.integers(min_value=0, max_value=len(seq)) - min_size = data.draw(min_size_strat) - - sub_seq_strat = subsequences_of(seq, min_size=min_size) - sub_seq = data.draw(sub_seq_strat) - - assert len(sub_seq) >= min_size - - @given(st.data(), st.lists(st.integers())) - def test_subsequence_min_max_size_constraint(self, data, seq): - min_size_strat = st.integers(min_value=0, max_value=len(seq)) - min_size = data.draw(min_size_strat) - - max_size_strat = st.integers(min_value=min_size, max_value=len(seq)) - max_size = data.draw(max_size_strat) - - sub_seq_strat = subsequences_of(seq, min_size=min_size, max_size=max_size) - sub_seq = data.draw(sub_seq_strat) - - assert min_size <= len(sub_seq) <= max_size - - # this is a new test, important for keeping dimension names in order - @given(st.data(), st.lists(st.integers())) - def test_ordering_preserved(self, data, seq): - subsequence_of_dims = data.draw(subsequences_of(seq)) - assert sorted(subsequence_of_dims) == subsequence_of_dims - - class TestCoordinateVariablesStrategy: @given(coordinate_variables(dim_sizes={"x": 2, "y": 3})) def test_alignable(self, coord_vars): From ced1a9f1172c565c7ec14998cdcd1ddb1b13a6bb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Aug 2022 21:04:09 +0000 Subject: [PATCH 40/95] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/testing/strategies.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 6eb477c73e9..ef532f37b11 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,3 @@ - from typing import Any, Hashable, List, Mapping, Optional, Set, Tuple, Union import hypothesis.extra.numpy as npst From a3c9ad07559963044c3c489d124298d8a52ebf78 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 17 Aug 2022 17:21:31 -0400 Subject: [PATCH 41/95] fix draw(st.booleans()) --- xarray/testing/strategies.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 6eb477c73e9..01fbae00ac9 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,3 @@ - from typing import Any, Hashable, List, Mapping, Optional, Set, Tuple, Union import hypothesis.extra.numpy as npst @@ -278,7 +277,7 @@ def coordinate_variables( all_coords = {} # Possibly generate 1D "dimension coordinates" - explicit possibility not to include amy helps with shrinking - if dim_names and st.booleans(): + if dim_names and draw(st.booleans()): # first generate subset of dimension names - these set which dimension coords will be included dim_coord_names_and_lengths = draw(_unique_subset_of(dim_sizes)) @@ -290,7 +289,7 @@ def coordinate_variables( all_coords.update(dim_coords) # Possibly generate ND "non-dimension coordinates" - explicit possibility not to include any helps with shrinking - if st.booleans(): + if draw(st.booleans()): non_dim_coord_vars = draw(_alignable_variables(dim_sizes=dim_sizes)) # can't have same name as a dimension @@ -492,14 +491,25 @@ def datasets( else: # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both dim_sizes = draw(dimension_sizes()) - # TODO allow for no coordinate variables - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + + # Allow for no coordinate variables - helps with shrinking + if draw(st.booleans()): + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + else: + coords = {} + coord_names = list(coords.keys()) - data_var_names = names.filter(lambda n: n not in coord_names) - # TODO allow for no data variables - data_vars = draw( - data_variables(dim_sizes=dim_sizes, allowed_names=data_var_names) - ) + allowed_data_var_names = names.filter(lambda n: n not in coord_names) + + # Allow for no data variables - helps with shrinking + if draw(st.booleans()): + draw( + data_variables( + dim_sizes=dim_sizes, allowed_names=allowed_data_var_names + ) + ) + else: + data_vars = {} return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) From 404111d0fdf5f9854514f61c9aeeaf7dcc6a3b6f Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 23 Aug 2022 13:46:47 -0400 Subject: [PATCH 42/95] remove all references to chunking until chunks strategy merged upstream in dask --- doc/api.rst | 2 - doc/user-guide/testing.rst | 6 +- xarray/testing/strategies.py | 187 +------------------------------- xarray/tests/test_strategies.py | 15 --- 4 files changed, 3 insertions(+), 207 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 581ab74a6f9..189c5285b8f 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1081,8 +1081,6 @@ Hypothesis Testing Strategies testing.strategies.dataarrays testing.strategies.data_variables testing.strategies.datasets - testing.strategies.chunks - testing.strategies.chunksizes Exceptions ========== diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 018c17c5aad..8b2e2b9fea7 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -54,8 +54,6 @@ These strategies are accessible in the :py:module::`xarray.testing.strategies` m testing.strategies.dataarrays testing.strategies.data_variables testing.strategies.datasets - testing.strategies.chunks - testing.strategies.chunksizes Generating Examples ~~~~~~~~~~~~~~~~~~~ @@ -104,8 +102,8 @@ examples. data=xrst.np_arrays(shape=(3, 4), dtype=np.dtype("int32")) ).example() -This also works with strategies defined in other packages, for example the ``chunks`` strategy defined in -``dask.array.strategies``. +This also works with customs strategies, or strategies defined in other packages. +For example you could create a ``chunks`` strategy to specify particular chunking patterns for a dask-backed array. .. warning:: Passing multiple different strategies to the same constructor can lead to poor example generation performance. diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 01fbae00ac9..1411ebf2862 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,4 @@ -from typing import Any, Hashable, List, Mapping, Optional, Set, Tuple, Union +from typing import Any, Hashable, List, Mapping, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -512,188 +512,3 @@ def datasets( data_vars = {} return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) - - -@st.composite -def block_lengths( - draw: st.DrawFn, - ax_length: int, - min_chunk_length: int = 1, - max_chunk_length: Optional[int] = None, -) -> st.SearchStrategy[Tuple[int, ...]]: - """Generate different chunking patterns along one dimension of an array.""" - - chunks = [] - remaining_length = ax_length - while remaining_length > 0: - _max_chunk_length = ( - min(remaining_length, max_chunk_length) - if max_chunk_length - else remaining_length - ) - - if min_chunk_length > _max_chunk_length: - # if we are at the end of the array we have no choice but to use a smaller chunk - chunk = remaining_length - else: - chunk = draw( - st.integers(min_value=min_chunk_length, max_value=_max_chunk_length) - ) - - chunks.append(chunk) - remaining_length = remaining_length - chunk - - return tuple(chunks) - - -# TODO we could remove this once dask/9374 is merged upstream -@st.composite -def chunks( - draw: st.DrawFn, - shape: Tuple[int, ...], - axes: Optional[Union[int, Tuple[int, ...]]] = None, - min_chunk_length: int = 1, - max_chunk_length: Optional[int] = None, -) -> st.SearchStrategy[Tuple[Tuple[int, ...], ...]]: - """ - Generates different chunking patterns for an N-D array with a given shape. - - Returns chunking structure as a tuple of tuples of ints, with each inner tuple containing - the block lengths along one dimension of the array. - - You can limit chunking to specific axes using the `axes` kwarg, and specify minimum and - maximum block lengths. - - Requires the hypothesis package to be installed. - - Parameters - ---------- - shape : tuple of ints - Shape of the array for which you want to generate a chunking pattern. - axes : None or int or tuple of ints, optional - ... - min_chunk_length : int, default is 1 - Minimum chunk length to use along all axes. - max_chunk_length: int, optional - Maximum chunk length to use along all axes. - Default is that the chunk can be as long as the length of the array along that axis. - - Examples - -------- - Chunking along all axes by default - - >>> chunks(shape=(2, 3)).example() - ((1, 1), (1, 2)) - - Chunking only along the second axis - - >>> chunks(shape=(2, 3), axis=1).example() - ((2,), (1, 1, 1)) - - Minimum size chunks of length 2 along all axes - - >>> chunks(shape=(2, 3), min_chunk_length=2).example() - ((2,), (2, 1)) - - Smallest possible chunks along all axes - - >>> chunks(shape=(2, 3), max_chunk_length=1).example() - ((1, 1), (1, 1, 1)) - - Maximum size chunks along all axes - - >>> chunks(shape=(2, 3), axes=()).example() - ((2,), (3,)) - - See Also - -------- - testing.strategies.chunks - DataArray.chunk - DataArray.chunks - """ - - if min_chunk_length < 1 or not isinstance(min_chunk_length, int): - raise ValueError("min_chunk_length must be an integer >= 1") - - if max_chunk_length: - if max_chunk_length < 1 or not isinstance(min_chunk_length, int): - raise ValueError("max_chunk_length must be an integer >= 1") - - if axes is None: - axes = tuple(range(len(shape))) - elif isinstance(axes, int): - axes = (axes,) - - chunks = [] - for axis, ax_length in enumerate(shape): - - _max_chunk_length = ( - min(max_chunk_length, ax_length) if max_chunk_length else ax_length - ) - - if axes is not None and axis in axes: - block_lengths_along_ax = draw( - block_lengths( - ax_length, - min_chunk_length=min_chunk_length, - max_chunk_length=_max_chunk_length, - ) - ) - else: - # don't chunk along this dimension - block_lengths_along_ax = (ax_length,) - - chunks.append(block_lengths_along_ax) - - return tuple(chunks) - - -@st.composite -def chunksizes( - draw: st.DrawFn, - sizes: Mapping[str, int], - dims: Set[str] = None, - min_chunk_length: int = 1, - max_chunk_length: int = None, -) -> st.SearchStrategy[Mapping[str, Tuple[int, ...]]]: - """ - Generate different chunking patterns for an xarray object with given sizes. - - Returns chunking structure as a mapping of dimension names to tuples of ints, - with each tuple containing the block lengths along one dimension of the object. - - You can limit chunking to specific dimensions given by the `dim` kwarg. - - Requires the hypothesis package to be installed. - - Parameters - ---------- - sizes : mapping of dimension names to ints - Size of the object for which you want to generate a chunking pattern. - dims : set of str, optional - Dimensions to chunk along. Default is to chunk along all dimensions. - min_chunk_length : int, default is 1 - Minimum chunk length to use along all dimensions. - max_chunk_length: int, optional - Maximum chunk length to use along all dimensions. - Default is that the chunk can be as long as the length of the array along that dimension. - - See Also - -------- - testing.strategies.chunks - DataArray.chunk - DataArray.chunksizes - DataArray.sizes - """ - shape = tuple(sizes.values()) - axes = tuple(list(sizes.keys()).index(d) for d in dims) if dims else None - _chunks = draw( - chunks( - shape=shape, - axes=axes, - min_chunk_length=min_chunk_length, - max_chunk_length=max_chunk_length, - ) - ) - - return {d: c for d, c in zip(list(sizes.keys()), _chunks)} diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index dd6975dc1b9..7ac43253ef6 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -218,18 +218,3 @@ class TestDatasetsStrategy: @given(datasets()) def test_given_nothing(self, ds): assert isinstance(ds, Dataset) - - -@pytest.mark.xfail -@given(st.data()) -def test_chained_chunking_example(data): - import dask.array.strategies as dast - - def chunk(da): - return da.chunk(dast.chunks(da.shape)) - - chunked_dataarrays = xrst.dataarrays().flatmap(chunk) - - chunked_da = data.draw(chunked_dataarrays()) - - assert ... From 3764a7b11c1a8a529ed6b9fbcba238caa6d579f9 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 23 Aug 2022 18:17:56 -0400 Subject: [PATCH 43/95] added example of complicated strategy for dims dict --- doc/user-guide/testing.rst | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 8b2e2b9fea7..00107eeab74 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -102,7 +102,7 @@ examples. data=xrst.np_arrays(shape=(3, 4), dtype=np.dtype("int32")) ).example() -This also works with customs strategies, or strategies defined in other packages. +This also works with custom strategies, or strategies defined in other packages. For example you could create a ``chunks`` strategy to specify particular chunking patterns for a dask-backed array. .. warning:: @@ -167,6 +167,29 @@ You can also use this to specify that you want examples which are missing some p # Generates only dataarrays with no coordinates xrst.dataarrays(coords=st.just({})).example() +Through a combination of chaining strategies and fixing arguments, you can specify quite complicated requirements on the +objects your chained strategy will generate. + +.. ipython:: python + + fixed_x_variable_y_maybe_z = st.fixed_dictionaries( + {"x": st.just(2), "y": st.integers(3, 4)}, optional={"z": st.just(2)} + ) + + fixed_x_variable_y_maybe_z.example() + + special_dataarrays = xrst.dataarrays(dims=fixed_x_variable_y_maybe_z) + + special_dataarrays.example() + special_dataarrays.example() + +Here we have used one of hypothesis' built-in strategies ``fixed_dictionaries`` to create a strategy which generates +mappings of dimension names to lengths (i.e. the ``size`` of the xarray object we want). +This particular strategy will always generate an ``x`` dimension of length 2, and a ``y`` dimension of +length either 3 or 4, and will sometimes also generate a ``z`` dimension of length 2. +By feeding this strategy for dictionaries into the `dims` argument of xarray's `dataarrays` strategy, we can generate +arbitrary ``DataArray`` objects whose dimensions will always match these specifications. + Creating Duck-type Arrays ~~~~~~~~~~~~~~~~~~~~~~~~~ From 9723e454e50aafa0c1e0e174ed3b7b9e9a4bf376 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 30 Aug 2022 15:51:05 -0400 Subject: [PATCH 44/95] remove superfluous utils file --- xarray/testing/utils.py | 36 ------------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 xarray/testing/utils.py diff --git a/xarray/testing/utils.py b/xarray/testing/utils.py deleted file mode 100644 index 2bd353e2116..00000000000 --- a/xarray/testing/utils.py +++ /dev/null @@ -1,36 +0,0 @@ -import warnings -from contextlib import contextmanager - - -@contextmanager -def suppress_warning(category, message=""): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=category, message=message) - - yield - - -def create_dimension_names(ndim): - return [f"dim_{n}" for n in range(ndim)] - - -def valid_dims_from_axes(dims, axes): - if axes is None: - return None - - if axes == 0 and len(dims) == 0: - return None - - if isinstance(axes, int): - return dims[axes] - - return [dims[axis] for axis in axes] - - -def valid_axes_from_dims(all_dims, dims): - if dims is None: - return None - elif isinstance(dims, list): - return [all_dims.index(dim) for dim in dims] - else: - return all_dims.index(dims) From 2e44860aeed8d25510b547dd1974b7f071e7eb17 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 30 Aug 2022 16:58:46 -0400 Subject: [PATCH 45/95] removed elements strategy --- xarray/testing/strategies.py | 34 +++++----------------------------- 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 1411ebf2862..2cee03ccf65 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -9,7 +9,6 @@ __all__ = [ "valid_dtypes", - "np_arrays", "names", "dimension_names", "dimension_sizes", @@ -30,25 +29,11 @@ valid_dtypes.__doc__ = """Generates only those numpy dtypes which xarray can handle.""" -def elements(dtype) -> st.SearchStrategy[Any]: - """ - Generates scalar elements to go in a numpy-like array. - - Requires the hypothesis package to be installed. - """ - max_value = 100 - min_value = 0 if dtype.kind == "u" else -max_value - - return npst.from_dtype( - dtype, allow_infinity=False, min_value=min_value, max_value=max_value - ) - - -@st.composite def np_arrays( - draw: st.DrawFn, - shape: Union[Tuple[int], st.SearchStrategy[Tuple[int]]] = None, - dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = None, + shape: Union[Tuple[int], st.SearchStrategy[Tuple[int]]] = npst.array_shapes( + max_side=4 + ), + dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = valid_dtypes, ) -> st.SearchStrategy[np.ndarray]: """ Generates arbitrary numpy arrays with xarray-compatible dtypes. @@ -61,17 +46,8 @@ def np_arrays( dtype Default is to use any of the valid_dtypes defined for xarray. """ - if shape is None: - shape = draw(npst.array_shapes()) - elif isinstance(shape, st.SearchStrategy): - shape = draw(shape) - - if dtype is None: - dtype = draw(valid_dtypes) - elif isinstance(dtype, st.SearchStrategy): - dtype = draw(dtype) - return draw(npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype))) + return npst.arrays(dtype=dtype, shape=shape) names = st.text(st.characters(), min_size=1) From 1cc073b0e3f1888f80db7e7ab6bcabd6012a5b16 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 30 Aug 2022 16:59:38 -0400 Subject: [PATCH 46/95] removed np_arrays strategy from public API --- doc/api.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index 189c5285b8f..aba048453ef 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1072,7 +1072,6 @@ Hypothesis Testing Strategies :toctree: generated/ testing.strategies.valid_dtypes - testing.strategies.np_arrays testing.strategies.names testing.strategies.dimension_names testing.strategies.dimension_sizes From 603e6bbf99f585b3fd10de71f3f18f513e6ca24d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 30 Aug 2022 17:04:32 -0400 Subject: [PATCH 47/95] min_ndims -> min_dims --- doc/user-guide/testing.rst | 2 +- xarray/testing/strategies.py | 32 ++++++++++++++++---------------- xarray/tests/test_strategies.py | 10 +++++----- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 00107eeab74..3a56e6b229b 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -120,7 +120,7 @@ For example you could create a ``chunks`` strategy to specify particular chunkin @given(st.data()) def test_something_else_inefficiently(data): - arrs = xrst.np_arrays() # generates arrays of any shape + arrs = npst.arrays(dtype=) # generates arrays of any shape dims = xrst.dimension_names() # generates lists of any number of dimensions # Drawing examples from this strategy is likely to have poor performance diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 2cee03ccf65..63c176daa2e 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -55,8 +55,8 @@ def np_arrays( def dimension_names( - min_ndims: int = 0, - max_ndims: int = 3, + min_dims: int = 0, + max_dims: int = 3, ) -> st.SearchStrategy[List[str]]: """ Generates an arbitrary list of valid dimension names. @@ -65,23 +65,23 @@ def dimension_names( Parameters ---------- - min_ndims + min_dims Minimum number of dimensions in generated list. - max_ndims + max_dims Maximum number of dimensions in generated list. """ return st.lists( elements=names, - min_size=min_ndims, - max_size=max_ndims, + min_size=min_dims, + max_size=max_dims, unique=True, ) def dimension_sizes( - min_ndims: int = 0, - max_ndims: int = 3, + min_dims: int = 0, + max_dims: int = 3, min_length: int = 1, max_length: int = None, ) -> st.SearchStrategy[Mapping[str, int]]: @@ -92,10 +92,10 @@ def dimension_sizes( Parameters ---------- - min_ndims: int, optional + min_dims: int, optional Minimum number of dimensions in generated list. Default is 1. - max_ndims: int, optional + max_dims: int, optional Maximum number of dimensions in generated list. Default is 3. min_length: int, optional @@ -112,8 +112,8 @@ def dimension_sizes( return st.dictionaries( keys=names, values=st.integers(min_value=min_length, max_value=max_length), - min_size=min_ndims, - max_size=max_ndims, + min_size=min_dims, + max_size=max_dims, ) @@ -166,7 +166,7 @@ def variables( if data is not None and dims is None: # no dims -> generate dims to match data data = draw(data) - dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + dims = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) elif dims is not None and data is None: # no data -> generate data to match dims @@ -198,7 +198,7 @@ def variables( else: # nothing provided, so generate everything consistently by drawing dims to match data data = draw(np_arrays()) - dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + dims = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) if isinstance(attrs, st.SearchStrategy): attrs = draw(attrs) @@ -333,7 +333,7 @@ def dataarrays( if data is not None and dims is None: # no dims -> generate dims to match data data = draw(data) - dim_names = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + dim_names = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} coords = draw(coordinate_variables(dim_sizes=dim_sizes)) @@ -373,7 +373,7 @@ def dataarrays( else: # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both data = draw(np_arrays()) - dim_names = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + dim_names = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} coords = draw(coordinate_variables(dim_sizes=dim_sizes)) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 7ac43253ef6..4ffece16d4c 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -61,7 +61,7 @@ def test_types(self, dims): def test_unique(self, dims): assert len(set(dims)) == len(dims) - @given(dimension_names(min_ndims=3, max_ndims=3)) + @given(dimension_names(min_dims=3, max_dims=3)) def test_fixed_number_of_dims(self, dims): assert isinstance(dims, list) assert len(dims) == 3 @@ -75,7 +75,7 @@ def test_types(self, dims): assert isinstance(d, str) assert isinstance(n, int) - @given(dimension_sizes(min_ndims=3, max_ndims=3)) + @given(dimension_sizes(min_dims=3, max_dims=3)) def test_fixed_number_of_dims(self, dims): assert isinstance(dims, dict) assert len(dims) == 3 @@ -98,7 +98,7 @@ def test_given_fixed_dims_list_and_fixed_data(self, data): @given(st.data()) def test_given_arbitrary_dims_list_and_arbitrary_data(self, data): arrs = np_arrays(shape=(2, 3)) - dims = dimension_names(min_ndims=2) + dims = dimension_names(min_dims=2) var = data.draw(variables(data=arrs, dims=dims)) assert var.shape == (2, 3) @@ -127,7 +127,7 @@ def test_given_fixed_dims_list(self, data): @given(st.data()) def test_given_arbitrary_dims_list(self, data): - dims = dimension_names(min_ndims=1, max_ndims=1) + dims = dimension_names(min_dims=1, max_dims=1) var = data.draw(variables(dims=dims)) assert len(list(var.dims)) == 1 @@ -202,7 +202,7 @@ def test_given_data(self, data): @given(st.data()) def test_given_data_and_dims(self, data): arrs = np_arrays(shape=(2, 3)) - dims = dimension_names(min_ndims=2) + dims = dimension_names(min_dims=2) da = data.draw(dataarrays(data=arrs, dims=dims)) assert da.shape == (2, 3) From 63bb36254377d40bfeaf835f6179f0a06dc0e70e Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 31 Aug 2022 15:22:17 -0400 Subject: [PATCH 48/95] forbid non-matching dims and data completely --- doc/user-guide/testing.rst | 24 +++++++++----------- xarray/testing/strategies.py | 40 ++++++++++++++++++++++++++++----- xarray/tests/test_strategies.py | 16 ++++++++----- 3 files changed, 56 insertions(+), 24 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 3a56e6b229b..f638628d113 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -106,33 +106,29 @@ This also works with custom strategies, or strategies defined in other packages. For example you could create a ``chunks`` strategy to specify particular chunking patterns for a dask-backed array. .. warning:: - Passing multiple different strategies to the same constructor can lead to poor example generation performance. + When passing multiple different strategies to the same constructor the drawn examples must be mutually compatible. - This is because in order to construct a valid xarray object to return, our strategies must check that the - variables / dimensions / coordinates are mutually compatible. We do this using ``hypothesis.assume``, which throws - away any generated examples not meeting the required condition. - - Therefore if you pass multiple custom strategies to a strategy constructor which are not compatible in enough cases, - most of the examples they generate will be mutually incompatible. This will likely lead to poor example generation - performance, manifesting as a ``hypothesis.errors.FailedHealthCheck`` being raised. For example: + In order to construct a valid xarray object to return, our strategies must check that the + variables / dimensions / coordinates are mutually compatible. If you pass multiple custom strategies to a strategy + constructor which are not compatible in all cases, an error will be raised, *even if they are still compatible in + other cases*. For example .. code-block:: @given(st.data()) def test_something_else_inefficiently(data): - arrs = npst.arrays(dtype=) # generates arrays of any shape + arrs = npst.arrays(dtype=valid_dtypes) # generates arrays of any shape dims = xrst.dimension_names() # generates lists of any number of dimensions - # Drawing examples from this strategy is likely to have poor performance + # Drawing examples from this strategy will raise a hypothesis.errors.Unsatisfiable error. var = data.draw(xrst.variables(data=arrs, dims=dims)) assert ... Here we have passed custom strategies which won't often be compatible: only rarely will the array's ``ndims`` - correspond to the number of dimensions drawn. - - To avoid this problem either allow xarray's strategies to automatically generate compatible data for you, or be more - selective about cases when passing multiple custom strategies to the same constructor. + correspond to the number of dimensions drawn. We forbid arguments that are only *sometimes* compatible in order to + avoid extremely poor example generation performance (as generating invalid examples and rejecting them is + potentially unboundedly inefficient). Fixing Arguments diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 63c176daa2e..56b45f1354d 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -3,7 +3,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np -from hypothesis import assume +from hypothesis.errors import Unsatisfiable import xarray as xr @@ -152,6 +152,11 @@ def variables( data argument if given or arbitrarily generated if not. Default is to generate arbitrary dimension names for each axis in data. attrs: Strategy which generates dicts, optional + + Raises + ------ + hypothesis.errors.Unsatisfiable + If custom strategies passed try to draw examples which together cannot create a valid Variable. """ if any( @@ -188,12 +193,22 @@ def variables( # TODO or we could just raise in this case? if isinstance(dims, List): data = draw(data) - assume(data.ndim == len(dims)) + if data.ndim != len(dims): + raise Unsatisfiable( + f"Strategy attempting to generate data with {data.ndim} dims but {len(dims)} " + "unique dimension names. Please only pass strategies which are guaranteed to " + "draw compatible examples for data and dims." + ) else: # should be a mapping of form {dim_names: lengths} data = draw(data) shape = tuple(dims.values()) - assume(data.shape == shape) + if data.shape != shape: + raise Unsatisfiable( + f"Strategy attempting to generate data with shape {data.shape} dims but dimension " + f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " + "draw compatible examples for data and dims." + ) else: # nothing provided, so generate everything consistently by drawing dims to match data @@ -322,6 +337,11 @@ def dataarrays( name: Strategy for generating a string name, optional Default is to use the `names` strategy, or to create an unnamed DataArray. attrs: Strategy which generates dicts, optional + + Raises + ------ + hypothesis.errors.Unsatisfiable + If custom strategies passed try to draw examples which together cannot create a valid DataArray. """ if name is None: @@ -359,14 +379,24 @@ def dataarrays( if isinstance(dims, List): dim_names = dims data = draw(data) - assume(data.ndim == len(dims)) + if data.ndim != len(dims): + raise Unsatisfiable( + f"Strategy attempting to generate data with {data.ndim} dims but {len(dims)} " + "unique dimension names. Please only pass strategies which are guaranteed to " + "draw compatible examples for data and dims." + ) dim_sizes = {n: l for n, l in zip(dims, data.shape)} else: # should be a mapping of form {dim_names: lengths} data = draw(data) dim_sizes = dims dim_names, shape = list(dims.keys()), tuple(dims.values()) - assume(data.shape == shape) + if data.shape != shape: + raise Unsatisfiable( + f"Strategy attempting to generate data with shape {data.shape} dims but dimension " + f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " + "draw compatible examples for data and dims." + ) coords = draw(coordinate_variables(dim_sizes=dim_sizes)) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 4ffece16d4c..25566bf95fb 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -8,6 +8,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import given +from hypothesis.errors import Unsatisfiable from xarray import DataArray, Dataset from xarray.core.variable import Variable @@ -98,11 +99,14 @@ def test_given_fixed_dims_list_and_fixed_data(self, data): @given(st.data()) def test_given_arbitrary_dims_list_and_arbitrary_data(self, data): arrs = np_arrays(shape=(2, 3)) - dims = dimension_names(min_dims=2) + dims = dimension_names(min_dims=2, max_dims=2) var = data.draw(variables(data=arrs, dims=dims)) - assert var.shape == (2, 3) + dims = dimension_names(min_dims=3) + with pytest.raises(Unsatisfiable): + data.draw(variables(data=arrs, dims=dims)) + @given(st.data()) def test_given_fixed_data(self, data): arr = np.asarray([[1, 2], [3, 4]]) @@ -202,15 +206,17 @@ def test_given_data(self, data): @given(st.data()) def test_given_data_and_dims(self, data): arrs = np_arrays(shape=(2, 3)) - dims = dimension_names(min_dims=2) + dims = dimension_names(min_dims=2, max_dims=2) da = data.draw(dataarrays(data=arrs, dims=dims)) - assert da.shape == (2, 3) + dims = dimension_names(min_dims=3, max_dims=3) + with pytest.raises(Unsatisfiable): + data.draw(dataarrays(data=arrs, dims=dims)) + arrs = np_arrays(shape=(3, 4)) dims = st.just({"x": 3, "y": 4}) da = data.draw(dataarrays(data=arrs, dims=dims)) - assert da.sizes == {"x": 3, "y": 4} From 69ec230e3d34f24c9ddcc646dc5ca6d6f948d569 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 31 Aug 2022 17:54:00 -0400 Subject: [PATCH 49/95] simple test for data_variables strategy --- xarray/testing/strategies.py | 10 ++++++++-- xarray/tests/test_strategies.py | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 56b45f1354d..4a99e37a842 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -420,7 +420,7 @@ def dataarrays( def data_variables( draw: st.DrawFn, dim_sizes: Mapping[str, int], - allowed_names: st.SearchStrategy[str] = None, + allowed_names: st.SearchStrategy[str] = names, ) -> st.SearchStrategy[Mapping[str, xr.Variable]]: """ Generates dicts of alignable Variable objects for use as Dataset data variables. @@ -492,7 +492,10 @@ def datasets( attrs: Strategy which generates dicts, optional """ - if any(arg is not None for arg in [data_vars, coords, dims, attrs]): + if coords is not None: + raise NotImplementedError() + + if any(arg is not None for arg in [data_vars, dims]): raise NotImplementedError() else: # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both @@ -517,4 +520,7 @@ def datasets( else: data_vars = {} + if attrs is not None: + raise NotImplementedError() + return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 25566bf95fb..9c6acfe089a 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -220,6 +220,20 @@ def test_given_data_and_dims(self, data): assert da.sizes == {"x": 3, "y": 4} +class TestDataVariablesStrategy: + @given(st.data()) + def test_given_only_sizes(self, data): + dim_sizes = {"x": 2, "y": 3} + data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) + for k, v in data_vars.items(): + assert isinstance(v, Variable) + assert set(v.sizes.items()).issubset(set(dim_sizes.items())) + + @given(st.data()) + def test_given_restricted_names(self, data): + ... + + class TestDatasetsStrategy: @given(datasets()) def test_given_nothing(self, ds): From e5c7e230f7499483401aa91a7488340d7c55e099 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 2 Sep 2022 14:14:06 -0400 Subject: [PATCH 50/95] passing arguments to datasets strategy --- xarray/testing/strategies.py | 71 +++++++++++++++++++++++++++++++-- xarray/tests/test_strategies.py | 29 ++++++++++++++ 2 files changed, 97 insertions(+), 3 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 4a99e37a842..efc01d3644e 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -490,15 +490,57 @@ def datasets( data argument if given or arbitrarily generated if not. Default is to generate arbitrary dimension sizes. attrs: Strategy which generates dicts, optional + + Raises + ------ + hypothesis.errors.Unsatisfiable + If custom strategies passed try to draw examples which together cannot create a valid DataArray. """ if coords is not None: raise NotImplementedError() - if any(arg is not None for arg in [data_vars, dims]): - raise NotImplementedError() + if data_vars is not None and dims is None: + # no dims -> generate dims to match data + data_vars = draw(data_vars) + dim_sizes = _find_overall_sizes(data_vars) + # TODO only draw coordinate variables whose names don't conflict with data variables + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + + elif data_vars is None and dims is not None: + # no data -> generate data to match dims + if isinstance(dims, List): + # TODO support dims as list too? + raise NotImplementedError() + else: + # should be a mapping of form {dim_names: lengths} + dim_sizes = draw(dims) + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + coord_names = list(coords.keys()) + allowed_data_var_names = names.filter(lambda n: n not in coord_names) + data_vars = draw( + data_variables( + dim_sizes=dim_sizes, allowed_names=allowed_data_var_names + ) + ) + + elif data_vars is not None and dims is not None: + # both data and dims provided -> check drawn examples are compatible + dims = draw(dims) + if isinstance(dims, List): + # TODO support dims as list too? + raise NotImplementedError() + else: + # should be a mapping of form {dim_names: lengths} + dim_sizes = dims + data_vars = draw(data_vars) + _check_compatible_sizes(data_vars, dim_sizes) + + # TODO only draw coordinate variables whose names don't conflict with data variables + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + else: - # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both + # nothing provided, so generate everything consistently by drawing data to match dims, and coords to match both dim_sizes = draw(dimension_sizes()) # Allow for no coordinate variables - helps with shrinking @@ -524,3 +566,26 @@ def datasets( raise NotImplementedError() return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) + + +def _find_overall_sizes(vars: Mapping[str, xr.Variable]) -> Mapping[str, int]: + """Given a set of variables, find their common sizes.""" + # TODO raise an error if inconsistent (i.e. if different values appear under same key) + sizes_dicts = [v.sizes for v in vars.values()] + dim_sizes = {d: s for dim_sizes in sizes_dicts for d, s in dim_sizes.items()} + return dim_sizes + + +def _check_compatible_sizes( + vars: Mapping[str, xr.Variable], dim_sizes: Mapping[str, int] +): + """Check set of variables have sizes compatible with given dim_sizes. If not raise Unsatisfiable error.""" + + for name, v in vars.items(): + if not set(v.sizes.items()).issubset(set(dim_sizes.items())): + raise Unsatisfiable( + f"Strategy attempting to generate object with dimension sizes {dim_sizes} but drawn " + f"variable {name} has sizes {v.sizes}, which is incompatible." + "Please only pass strategies which are guaranteed to draw compatible examples for data " + "and dims." + ) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 9c6acfe089a..068e54cb65b 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -238,3 +238,32 @@ class TestDatasetsStrategy: @given(datasets()) def test_given_nothing(self, ds): assert isinstance(ds, Dataset) + + @given(st.data()) + def test_given_data(self, data): + dim_sizes = {"x": 3, "y": 4} + data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) + ds = data.draw(datasets(data_vars=st.just(data_vars))) + assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) + + @given(st.data()) + def test_given_dims(self, data): + dim_sizes = {"x": 3, "y": 4} + ds = data.draw(datasets(dims=st.just(dim_sizes))) + assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) + + @given(st.data()) + def test_given_data_and_dims(self, data): + dim_sizes = {"x": 3, "y": 4} + data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) + ds = data.draw(datasets(data_vars=st.just(data_vars), dims=st.just(dim_sizes))) + assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) + + incompatible_dim_sizes = {"x": 1, "y": 4} + data_vars = {"foo": Variable(data=[0, 1, 2], dims="x")} + with pytest.raises(Unsatisfiable, match="drawn variable"): + data.draw( + datasets( + data_vars=st.just(data_vars), dims=st.just(incompatible_dim_sizes) + ) + ) From 52f2490a18082347c5bc4f09317eee4f0c0c8cf4 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 2 Sep 2022 14:25:23 -0400 Subject: [PATCH 51/95] whatsnew --- doc/whats-new.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9ce51e48983..40403519301 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,10 @@ v2022.07.0 (unreleased) New Features ~~~~~~~~~~~~ +- Added a suite of hypothesis strategies for generating xarray objects containing arbitrary data, useful for testing. + Accessible under :py:func:`testing.strategies`, and documented in a new page on testing in the User Guide. + (:issue:`6911`, :pull:`6908`) + By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ From 9b964708c8b8895ebbbf9340d891cb9aa8c98fe7 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 2 Sep 2022 14:53:41 -0400 Subject: [PATCH 52/95] add attrs strategy --- doc/api.rst | 1 + doc/user-guide/testing.rst | 1 + xarray/testing/strategies.py | 17 ++++++++++++++++- xarray/tests/test_strategies.py | 8 ++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index aba048453ef..1be613a37a4 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1075,6 +1075,7 @@ Hypothesis Testing Strategies testing.strategies.names testing.strategies.dimension_names testing.strategies.dimension_sizes + testing.strategies.attrs testing.strategies.variables testing.strategies.coordinate_variables testing.strategies.dataarrays diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index f638628d113..f4d86620c45 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -49,6 +49,7 @@ These strategies are accessible in the :py:module::`xarray.testing.strategies` m testing.strategies.names testing.strategies.dimension_names testing.strategies.dimension_sizes + testing.strategies.attrs testing.strategies.variables testing.strategies.coordinate_variables testing.strategies.dataarrays diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index efc01d3644e..8351ca058db 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -12,6 +12,7 @@ "names", "dimension_names", "dimension_sizes", + "attrs", "variables", "coordinate_variables", "dataarrays", @@ -50,7 +51,7 @@ def np_arrays( return npst.arrays(dtype=dtype, shape=shape) -names = st.text(st.characters(), min_size=1) +names: st.SearchStrategy[str] = st.text(st.characters(), min_size=1) names.__doc__ = """Generates arbitrary string names for dimensions / variables.""" @@ -117,6 +118,20 @@ def dimension_sizes( ) +_attr_keys = st.text(st.characters()) +_attr_values = st.none() | st.booleans() | st.text(st.characters()) | np_arrays() + + +attrs: st.SearchStrategy[Mapping[str, Any]] = st.recursive( + st.dictionaries(_attr_keys, _attr_values), + lambda children: st.dictionaries(_attr_keys, children), + max_leaves=3, +) +attrs.__doc__ = ( + """Generates arbitrary valid attributes dictionaries for xarray objects.""" +) + + # Is there a way to do this in general? # Could make a Protocol... T_Array = Any diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 068e54cb65b..22d9d72491d 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -13,6 +13,7 @@ from xarray import DataArray, Dataset from xarray.core.variable import Variable from xarray.testing.strategies import ( + attrs, coordinate_variables, data_variables, dataarrays, @@ -82,6 +83,13 @@ def test_fixed_number_of_dims(self, dims): assert len(dims) == 3 +class TestAttrsStrategy: + @given(attrs) + def test_type(self, attrs): + assert isinstance(attrs, dict) + # TODO how to test the types of values in a recursive object? + + class TestVariablesStrategy: @given(variables()) def test_given_nothing(self, var): From 41fe0b489d0e189187c57a140b85a3224fc3d181 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 2 Sep 2022 15:00:23 -0400 Subject: [PATCH 53/95] autogenerate attrs for all objects --- xarray/testing/strategies.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 8351ca058db..8b0b5f45848 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -144,7 +144,7 @@ def variables( dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, - attrs: st.SearchStrategy[Mapping] = None, + attrs: st.SearchStrategy[Mapping] = attrs, ) -> st.SearchStrategy[xr.Variable]: """ Generates arbitrary xarray.Variable objects. @@ -230,13 +230,7 @@ def variables( data = draw(np_arrays()) dims = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) - if isinstance(attrs, st.SearchStrategy): - attrs = draw(attrs) - elif attrs is None: - # TODO autogenerate some attributes - ... - - return xr.Variable(dims=dims, data=data, attrs=attrs) + return xr.Variable(dims=dims, data=data, attrs=draw(attrs)) @st.composite @@ -324,7 +318,7 @@ def dataarrays( st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, name: st.SearchStrategy[Union[str, None]] = None, - attrs: st.SearchStrategy[Mapping] = None, + attrs: st.SearchStrategy[Mapping] = attrs, ) -> st.SearchStrategy[xr.DataArray]: """ Generates arbitrary xarray.DataArray objects. @@ -427,7 +421,7 @@ def dataarrays( coords=coords, name=name, dims=dim_names, - attrs=attrs, + attrs=draw(attrs), ) @@ -478,7 +472,7 @@ def datasets( dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, - attrs: st.SearchStrategy[Mapping] = None, + attrs: st.SearchStrategy[Mapping] = attrs, ) -> st.SearchStrategy[xr.Dataset]: """ Generates arbitrary xarray.Dataset objects. @@ -577,10 +571,7 @@ def datasets( else: data_vars = {} - if attrs is not None: - raise NotImplementedError() - - return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) + return xr.Dataset(data_vars=data_vars, coords=coords, attrs=draw(attrs)) def _find_overall_sizes(vars: Mapping[str, xr.Variable]) -> Mapping[str, int]: From 0e53aa10594c409ae35880df74ccb8996dd39f1d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 2 Sep 2022 15:21:04 -0400 Subject: [PATCH 54/95] attempt to make attrs strategy quicker --- xarray/testing/strategies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 8b0b5f45848..f126889bb78 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -125,7 +125,7 @@ def dimension_sizes( attrs: st.SearchStrategy[Mapping[str, Any]] = st.recursive( st.dictionaries(_attr_keys, _attr_values), lambda children: st.dictionaries(_attr_keys, children), - max_leaves=3, + max_leaves=2, ) attrs.__doc__ = ( """Generates arbitrary valid attributes dictionaries for xarray objects.""" From f659b4bdd4074e0110bb4437951594618c42ae66 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 2 Sep 2022 15:21:14 -0400 Subject: [PATCH 55/95] extend deadline --- xarray/tests/test_strategies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 22d9d72491d..9917e0b5a25 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -7,7 +7,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st -from hypothesis import given +from hypothesis import given, settings from hypothesis.errors import Unsatisfiable from xarray import DataArray, Dataset @@ -260,6 +260,7 @@ def test_given_dims(self, data): ds = data.draw(datasets(dims=st.just(dim_sizes))) assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) + @settings(deadline=400) @given(st.data()) def test_given_data_and_dims(self, data): dim_sizes = {"x": 3, "y": 4} From d1be3ee672171d8c6f670bcea6d748cb1b427948 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 6 Sep 2022 17:31:04 -0400 Subject: [PATCH 56/95] attempt to speed up attrs strategy --- xarray/testing/strategies.py | 10 ++++++++-- xarray/tests/test_strategies.py | 1 - 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index f126889bb78..2f806f47ab1 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -119,13 +119,19 @@ def dimension_sizes( _attr_keys = st.text(st.characters()) -_attr_values = st.none() | st.booleans() | st.text(st.characters()) | np_arrays() +_small_arrays = np_arrays( + shape=npst.array_shapes( + max_side=2, + max_dims=3, + ) +) +_attr_values = st.none() | st.booleans() | st.text(st.characters()) | _small_arrays attrs: st.SearchStrategy[Mapping[str, Any]] = st.recursive( st.dictionaries(_attr_keys, _attr_values), lambda children: st.dictionaries(_attr_keys, children), - max_leaves=2, + max_leaves=5, ) attrs.__doc__ = ( """Generates arbitrary valid attributes dictionaries for xarray objects.""" diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 9917e0b5a25..17ffa7dc60e 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -260,7 +260,6 @@ def test_given_dims(self, data): ds = data.draw(datasets(dims=st.just(dim_sizes))) assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) - @settings(deadline=400) @given(st.data()) def test_given_data_and_dims(self, data): dim_sizes = {"x": 3, "y": 4} From e88f5f0552e69e2b5d3dd17f054db518cdb9ce45 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 6 Sep 2022 22:37:37 -0400 Subject: [PATCH 57/95] promote all strategies to be functions --- xarray/testing/strategies.py | 60 +++++++++++++++++---------------- xarray/tests/test_strategies.py | 6 ++-- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 2f806f47ab1..3224a7e6205 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -20,21 +20,24 @@ "datasets", ] + # required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. -valid_dtypes: st.SearchStrategy[np.dtype] = ( - npst.integer_dtypes() - | npst.unsigned_integer_dtypes() - | npst.floating_dtypes() - | npst.complex_number_dtypes() -) -valid_dtypes.__doc__ = """Generates only those numpy dtypes which xarray can handle.""" +def valid_dtypes() -> st.SearchStrategy[np.dtype]: + """Generates only those numpy dtypes which xarray can handle.""" + + return ( + npst.integer_dtypes() + | npst.unsigned_integer_dtypes() + | npst.floating_dtypes() + | npst.complex_number_dtypes() + ) def np_arrays( shape: Union[Tuple[int], st.SearchStrategy[Tuple[int]]] = npst.array_shapes( max_side=4 ), - dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = valid_dtypes, + dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = valid_dtypes(), ) -> st.SearchStrategy[np.ndarray]: """ Generates arbitrary numpy arrays with xarray-compatible dtypes. @@ -51,8 +54,9 @@ def np_arrays( return npst.arrays(dtype=dtype, shape=shape) -names: st.SearchStrategy[str] = st.text(st.characters(), min_size=1) -names.__doc__ = """Generates arbitrary string names for dimensions / variables.""" +def names() -> st.SearchStrategy[str]: + """Generates arbitrary string names for dimensions / variables.""" + return st.text(st.characters(), min_size=1) def dimension_names( @@ -73,7 +77,7 @@ def dimension_names( """ return st.lists( - elements=names, + elements=names(), min_size=min_dims, max_size=max_dims, unique=True, @@ -111,7 +115,7 @@ def dimension_sizes( max_length = min_length + 5 return st.dictionaries( - keys=names, + keys=names(), values=st.integers(min_value=min_length, max_value=max_length), min_size=min_dims, max_size=max_dims, @@ -128,14 +132,13 @@ def dimension_sizes( _attr_values = st.none() | st.booleans() | st.text(st.characters()) | _small_arrays -attrs: st.SearchStrategy[Mapping[str, Any]] = st.recursive( - st.dictionaries(_attr_keys, _attr_values), - lambda children: st.dictionaries(_attr_keys, children), - max_leaves=5, -) -attrs.__doc__ = ( +def attrs() -> st.SearchStrategy[Mapping[str, Any]]: """Generates arbitrary valid attributes dictionaries for xarray objects.""" -) + return st.recursive( + st.dictionaries(_attr_keys, _attr_values), + lambda children: st.dictionaries(_attr_keys, children), + max_leaves=3, + ) # Is there a way to do this in general? @@ -150,7 +153,7 @@ def variables( dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, - attrs: st.SearchStrategy[Mapping] = attrs, + attrs: st.SearchStrategy[Mapping] = attrs(), ) -> st.SearchStrategy[xr.Variable]: """ Generates arbitrary xarray.Variable objects. @@ -299,7 +302,7 @@ def coordinate_variables( non_dim_coord_vars = draw(_alignable_variables(dim_sizes=dim_sizes)) # can't have same name as a dimension - valid_non_dim_coord_names = names.filter(lambda n: n not in dim_names) + valid_non_dim_coord_names = names().filter(lambda n: n not in dim_names) non_dim_coord_names = draw( st.lists( valid_non_dim_coord_names, @@ -323,8 +326,8 @@ def dataarrays( dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, - name: st.SearchStrategy[Union[str, None]] = None, - attrs: st.SearchStrategy[Mapping] = attrs, + name: st.SearchStrategy[Union[str, None]] = names(), + attrs: st.SearchStrategy[Mapping] = attrs(), ) -> st.SearchStrategy[xr.DataArray]: """ Generates arbitrary xarray.DataArray objects. @@ -359,8 +362,7 @@ def dataarrays( If custom strategies passed try to draw examples which together cannot create a valid DataArray. """ - if name is None: - name = draw(st.none() | names) + name = draw(st.none() | name) if coords is not None: raise NotImplementedError() @@ -435,7 +437,7 @@ def dataarrays( def data_variables( draw: st.DrawFn, dim_sizes: Mapping[str, int], - allowed_names: st.SearchStrategy[str] = names, + allowed_names: st.SearchStrategy[str] = names(), ) -> st.SearchStrategy[Mapping[str, xr.Variable]]: """ Generates dicts of alignable Variable objects for use as Dataset data variables. @@ -478,7 +480,7 @@ def datasets( dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, - attrs: st.SearchStrategy[Mapping] = attrs, + attrs: st.SearchStrategy[Mapping] = attrs(), ) -> st.SearchStrategy[xr.Dataset]: """ Generates arbitrary xarray.Dataset objects. @@ -532,7 +534,7 @@ def datasets( dim_sizes = draw(dims) coords = draw(coordinate_variables(dim_sizes=dim_sizes)) coord_names = list(coords.keys()) - allowed_data_var_names = names.filter(lambda n: n not in coord_names) + allowed_data_var_names = names().filter(lambda n: n not in coord_names) data_vars = draw( data_variables( dim_sizes=dim_sizes, allowed_names=allowed_data_var_names @@ -565,7 +567,7 @@ def datasets( coords = {} coord_names = list(coords.keys()) - allowed_data_var_names = names.filter(lambda n: n not in coord_names) + allowed_data_var_names = names().filter(lambda n: n not in coord_names) # Allow for no data variables - helps with shrinking if draw(st.booleans()): diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 17ffa7dc60e..4b5b207babd 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -7,7 +7,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st -from hypothesis import given, settings +from hypothesis import given from hypothesis.errors import Unsatisfiable from xarray import DataArray, Dataset @@ -37,7 +37,7 @@ def test_fixed_dtype(self, arr): @given(st.data()) def test_arbitrary_valid_dtype(self, data): - valid_dtype = data.draw(valid_dtypes) + valid_dtype = data.draw(valid_dtypes()) arr = data.draw(np_arrays(dtype=valid_dtype)) assert arr.dtype == valid_dtype @@ -84,7 +84,7 @@ def test_fixed_number_of_dims(self, dims): class TestAttrsStrategy: - @given(attrs) + @given(attrs()) def test_type(self, attrs): assert isinstance(attrs, dict) # TODO how to test the types of values in a recursive object? From 4b888875d84ff62875a3598466ef56be692932f9 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 6 Sep 2022 22:41:09 -0400 Subject: [PATCH 58/95] valid_dtypes -> numeric_dtypes --- doc/api.rst | 2 +- doc/user-guide/testing.rst | 4 ++-- xarray/testing/strategies.py | 8 ++++---- xarray/tests/test_strategies.py | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 1be613a37a4..1a7c97cff77 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1071,7 +1071,7 @@ Hypothesis Testing Strategies .. autosummary:: :toctree: generated/ - testing.strategies.valid_dtypes + testing.strategies.numeric_dtypes testing.strategies.names testing.strategies.dimension_names testing.strategies.dimension_sizes diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index f4d86620c45..5eeaf7d3eb8 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -44,7 +44,7 @@ These strategies are accessible in the :py:module::`xarray.testing.strategies` m .. autosummary:: - testing.strategies.valid_dtypes + testing.strategies.numeric_dtypes testing.strategies.np_arrays testing.strategies.names testing.strategies.dimension_names @@ -118,7 +118,7 @@ For example you could create a ``chunks`` strategy to specify particular chunkin @given(st.data()) def test_something_else_inefficiently(data): - arrs = npst.arrays(dtype=valid_dtypes) # generates arrays of any shape + arrs = npst.arrays(dtype=numeric_dtypes) # generates arrays of any shape dims = xrst.dimension_names() # generates lists of any number of dimensions # Drawing examples from this strategy will raise a hypothesis.errors.Unsatisfiable error. diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 3224a7e6205..b520f6d5c96 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -8,7 +8,7 @@ import xarray as xr __all__ = [ - "valid_dtypes", + "numeric_dtypes", "names", "dimension_names", "dimension_sizes", @@ -22,7 +22,7 @@ # required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. -def valid_dtypes() -> st.SearchStrategy[np.dtype]: +def numeric_dtypes() -> st.SearchStrategy[np.dtype]: """Generates only those numpy dtypes which xarray can handle.""" return ( @@ -37,7 +37,7 @@ def np_arrays( shape: Union[Tuple[int], st.SearchStrategy[Tuple[int]]] = npst.array_shapes( max_side=4 ), - dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = valid_dtypes(), + dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = numeric_dtypes(), ) -> st.SearchStrategy[np.ndarray]: """ Generates arbitrary numpy arrays with xarray-compatible dtypes. @@ -48,7 +48,7 @@ def np_arrays( ---------- shape dtype - Default is to use any of the valid_dtypes defined for xarray. + Default is to use any of the numeric_dtypes defined for xarray. """ return npst.arrays(dtype=dtype, shape=shape) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 4b5b207babd..1f339bbd19b 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -21,7 +21,7 @@ dimension_names, dimension_sizes, np_arrays, - valid_dtypes, + numeric_dtypes, variables, ) @@ -37,7 +37,7 @@ def test_fixed_dtype(self, arr): @given(st.data()) def test_arbitrary_valid_dtype(self, data): - valid_dtype = data.draw(valid_dtypes()) + valid_dtype = data.draw(numeric_dtypes()) arr = data.draw(np_arrays(dtype=valid_dtype)) assert arr.dtype == valid_dtype From 2a1dc662ff84bc784128fa5de37fa41919c8f880 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 6 Sep 2022 22:44:22 -0400 Subject: [PATCH 59/95] changed hypothesis error type --- xarray/testing/strategies.py | 22 +++++++++++----------- xarray/tests/test_strategies.py | 8 ++++---- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index b520f6d5c96..27347de90c2 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -3,7 +3,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np -from hypothesis.errors import Unsatisfiable +from hypothesis.errors import InvalidArgument import xarray as xr @@ -179,7 +179,7 @@ def variables( Raises ------ - hypothesis.errors.Unsatisfiable + hypothesis.errors.InvalidArgument If custom strategies passed try to draw examples which together cannot create a valid Variable. """ @@ -213,12 +213,12 @@ def variables( dims = draw(dims) # TODO is there another way to enforce these assumptions? This is very like to fail hypothesis' health checks - # TODO how do I write a test that checks that the hypothesis Unsatisfiable error will be raised? + # TODO how do I write a test that checks that the hypothesis InvalidArgument error will be raised? # TODO or we could just raise in this case? if isinstance(dims, List): data = draw(data) if data.ndim != len(dims): - raise Unsatisfiable( + raise InvalidArgument( f"Strategy attempting to generate data with {data.ndim} dims but {len(dims)} " "unique dimension names. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." @@ -228,7 +228,7 @@ def variables( data = draw(data) shape = tuple(dims.values()) if data.shape != shape: - raise Unsatisfiable( + raise InvalidArgument( f"Strategy attempting to generate data with shape {data.shape} dims but dimension " f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." @@ -358,7 +358,7 @@ def dataarrays( Raises ------ - hypothesis.errors.Unsatisfiable + hypothesis.errors.InvalidArgument If custom strategies passed try to draw examples which together cannot create a valid DataArray. """ @@ -397,7 +397,7 @@ def dataarrays( dim_names = dims data = draw(data) if data.ndim != len(dims): - raise Unsatisfiable( + raise InvalidArgument( f"Strategy attempting to generate data with {data.ndim} dims but {len(dims)} " "unique dimension names. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." @@ -409,7 +409,7 @@ def dataarrays( dim_sizes = dims dim_names, shape = list(dims.keys()), tuple(dims.values()) if data.shape != shape: - raise Unsatisfiable( + raise InvalidArgument( f"Strategy attempting to generate data with shape {data.shape} dims but dimension " f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." @@ -510,7 +510,7 @@ def datasets( Raises ------ - hypothesis.errors.Unsatisfiable + hypothesis.errors.InvalidArgument If custom strategies passed try to draw examples which together cannot create a valid DataArray. """ @@ -593,11 +593,11 @@ def _find_overall_sizes(vars: Mapping[str, xr.Variable]) -> Mapping[str, int]: def _check_compatible_sizes( vars: Mapping[str, xr.Variable], dim_sizes: Mapping[str, int] ): - """Check set of variables have sizes compatible with given dim_sizes. If not raise Unsatisfiable error.""" + """Check set of variables have sizes compatible with given dim_sizes. If not raise InvalidArgument error.""" for name, v in vars.items(): if not set(v.sizes.items()).issubset(set(dim_sizes.items())): - raise Unsatisfiable( + raise InvalidArgument( f"Strategy attempting to generate object with dimension sizes {dim_sizes} but drawn " f"variable {name} has sizes {v.sizes}, which is incompatible." "Please only pass strategies which are guaranteed to draw compatible examples for data " diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 1f339bbd19b..8377a259db3 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -8,7 +8,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import given -from hypothesis.errors import Unsatisfiable +from hypothesis.errors import InvalidArgument from xarray import DataArray, Dataset from xarray.core.variable import Variable @@ -112,7 +112,7 @@ def test_given_arbitrary_dims_list_and_arbitrary_data(self, data): assert var.shape == (2, 3) dims = dimension_names(min_dims=3) - with pytest.raises(Unsatisfiable): + with pytest.raises(InvalidArgument): data.draw(variables(data=arrs, dims=dims)) @given(st.data()) @@ -219,7 +219,7 @@ def test_given_data_and_dims(self, data): assert da.shape == (2, 3) dims = dimension_names(min_dims=3, max_dims=3) - with pytest.raises(Unsatisfiable): + with pytest.raises(InvalidArgument): data.draw(dataarrays(data=arrs, dims=dims)) arrs = np_arrays(shape=(3, 4)) @@ -269,7 +269,7 @@ def test_given_data_and_dims(self, data): incompatible_dim_sizes = {"x": 1, "y": 4} data_vars = {"foo": Variable(data=[0, 1, 2], dims="x")} - with pytest.raises(Unsatisfiable, match="drawn variable"): + with pytest.raises(InvalidArgument, match="drawn variable"): data.draw( datasets( data_vars=st.just(data_vars), dims=st.just(incompatible_dim_sizes) From 9bddcec786cc87c73afc19df26505c80a2a45f35 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 6 Sep 2022 23:14:03 -0400 Subject: [PATCH 60/95] make all strategies keyword-arg only --- xarray/testing/strategies.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 27347de90c2..f503c4159d0 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -34,6 +34,7 @@ def numeric_dtypes() -> st.SearchStrategy[np.dtype]: def np_arrays( + *, shape: Union[Tuple[int], st.SearchStrategy[Tuple[int]]] = npst.array_shapes( max_side=4 ), @@ -60,6 +61,7 @@ def names() -> st.SearchStrategy[str]: def dimension_names( + *, min_dims: int = 0, max_dims: int = 3, ) -> st.SearchStrategy[List[str]]: @@ -85,6 +87,7 @@ def dimension_names( def dimension_sizes( + *, min_dims: int = 0, max_dims: int = 3, min_length: int = 1, @@ -149,6 +152,7 @@ def attrs() -> st.SearchStrategy[Mapping[str, Any]]: @st.composite def variables( draw: st.DrawFn, + *, data: st.SearchStrategy[T_Array] = None, dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] @@ -253,6 +257,7 @@ def _unique_subset_of( @st.composite def _alignable_variables( draw: st.DrawFn, + *, dim_sizes: Mapping[str, int], ) -> st.SearchStrategy[List[xr.Variable]]: """Generates lists of variables with compatible (i.e. alignable) dimensions and sizes.""" @@ -266,6 +271,7 @@ def _alignable_variables( @st.composite def coordinate_variables( draw: st.DrawFn, + *, dim_sizes: Mapping[str, int], ) -> st.SearchStrategy[Mapping[str, xr.Variable]]: """ @@ -321,6 +327,7 @@ def coordinate_variables( @st.composite def dataarrays( draw: st.DrawFn, + *, data: st.SearchStrategy[T_Array] = None, coords: Mapping[str, xr.Variable] = None, dims: Union[ @@ -436,6 +443,7 @@ def dataarrays( @st.composite def data_variables( draw: st.DrawFn, + *, dim_sizes: Mapping[str, int], allowed_names: st.SearchStrategy[str] = names(), ) -> st.SearchStrategy[Mapping[str, xr.Variable]]: @@ -475,6 +483,7 @@ def data_variables( @st.composite def datasets( draw: st.DrawFn, + *, data_vars: st.SearchStrategy[Mapping[str, xr.Variable]] = None, coords: Mapping[str, xr.Variable] = None, dims: Union[ From b2887d43b92caf552bfe3e4cd590eef9df61fdcb Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 6 Sep 2022 23:21:46 -0400 Subject: [PATCH 61/95] min_length -> min_side --- xarray/testing/strategies.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index f503c4159d0..f29b4075bc4 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -90,8 +90,8 @@ def dimension_sizes( *, min_dims: int = 0, max_dims: int = 3, - min_length: int = 1, - max_length: int = None, + min_side: int = 1, + max_side: int = None, ) -> st.SearchStrategy[Mapping[str, int]]: """ Generates an arbitrary mapping from dimension names to lengths. @@ -106,20 +106,20 @@ def dimension_sizes( max_dims: int, optional Maximum number of dimensions in generated list. Default is 3. - min_length: int, optional + min_side: int, optional Minimum size of a dimension. Default is 1. - max_length: int, optional + max_side: int, optional Minimum size of a dimension. Default is `min_length` + 5. """ - if max_length is None: - max_length = min_length + 5 + if max_side is None: + max_side = min_side + 5 return st.dictionaries( keys=names(), - values=st.integers(min_value=min_length, max_value=max_length), + values=st.integers(min_value=min_side, max_value=max_side), min_size=min_dims, max_size=max_dims, ) From 3b8e8aec5e82b217c9cbdd045e571227a2e69abe Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 6 Sep 2022 23:21:58 -0400 Subject: [PATCH 62/95] correct error type --- doc/user-guide/testing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 5eeaf7d3eb8..69fc578023a 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -121,7 +121,7 @@ For example you could create a ``chunks`` strategy to specify particular chunkin arrs = npst.arrays(dtype=numeric_dtypes) # generates arrays of any shape dims = xrst.dimension_names() # generates lists of any number of dimensions - # Drawing examples from this strategy will raise a hypothesis.errors.Unsatisfiable error. + # Drawing examples from this strategy will raise a hypothesis.errors.InvalidArgument error. var = data.draw(xrst.variables(data=arrs, dims=dims)) assert ... From 0980061a522a4bad10d1c8071cf6cfc73cd899f3 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 01:02:27 -0400 Subject: [PATCH 63/95] remove coords kwarg --- xarray/testing/strategies.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index f29b4075bc4..e3b3c54f354 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -329,7 +329,6 @@ def dataarrays( draw: st.DrawFn, *, data: st.SearchStrategy[T_Array] = None, - coords: Mapping[str, xr.Variable] = None, dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, @@ -350,9 +349,6 @@ def dataarrays( ---------- data: Strategy generating array-likes, optional Default is to generate numpy data of arbitrary shape, values and dtypes. - coords: Strategy generating mappings from coordinate names to xr.Variables objects, optional - Default is to generate an arbitrary combination of both dimension and non-dimension coordinates, - with sizes matching data and/or dims, but arbitrary names, dtypes, and values. dims: Strategy for generating the dimensions, optional Can either be a strategy for generating a list of string dimension names, or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. @@ -371,8 +367,7 @@ def dataarrays( name = draw(st.none() | name) - if coords is not None: - raise NotImplementedError() + # TODO add a coords argument? if data is not None and dims is None: # no dims -> generate dims to match data @@ -485,7 +480,6 @@ def datasets( draw: st.DrawFn, *, data_vars: st.SearchStrategy[Mapping[str, xr.Variable]] = None, - coords: Mapping[str, xr.Variable] = None, dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, @@ -495,7 +489,7 @@ def datasets( Generates arbitrary xarray.Dataset objects. Follows the basic signature of the xarray.Dataset constructor, but you can also pass alternative strategies to - generate either numpy-like array data variables, dimensions, or coordinates. + generate either numpy-like array data variables or dimensions. Passing nothing will generate a completely arbitrary Dataset (backed by numpy arrays). @@ -506,9 +500,6 @@ def datasets( data_vars: Strategy generating mappings from variable names to xr.Variable objects, optional Default is to generate an arbitrary combination of compatible variables with sizes matching dims, but arbitrary names, dtypes, and values. - coords: Strategy generating mappings from coordinate names to xr.Variable objects, optional - Default is to generate an arbitrary combination of both dimension and non-dimension coordinates, - with sizes matching data_vars and/or dims, but arbitrary names, dtypes, and values. dims: Strategy for generating the dimensions, optional Can either be a strategy for generating a list of string dimension names, or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. @@ -523,8 +514,7 @@ def datasets( If custom strategies passed try to draw examples which together cannot create a valid DataArray. """ - if coords is not None: - raise NotImplementedError() + # TODO add a coords argument? if data_vars is not None and dims is None: # no dims -> generate dims to match data From 0313b3e47f99892dd671b84373a884bbed4be152 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 10:29:25 -0400 Subject: [PATCH 64/95] test different types of coordinates are sometimes generated --- xarray/tests/test_strategies.py | 46 +++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 8377a259db3..bb68632937c 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -1,3 +1,5 @@ +import contextlib + import numpy as np import numpy.testing as npt import pytest @@ -7,7 +9,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st -from hypothesis import given +from hypothesis import Phase, given, settings from hypothesis.errors import InvalidArgument from xarray import DataArray, Dataset @@ -181,13 +183,41 @@ def test_valid_set_of_coords(self, data): da = DataArray(data=arr, coords=coord_vars, dims=["x", "y"]) assert isinstance(da, DataArray) - def test_generates_1d_dim_coords(self): - # TODO having a `hypothesis.find(strat, predicate)` function would be very useful here - # see https://github.com/HypothesisWorks/hypothesis/issues/3436#issuecomment-1212369645 - ... - - def test_generates_non_dim_coords(self): - ... + def test_sometimes_generates_1d_dim_coords(self): + found_one = False + + @given(st.data()) + @settings(phases=[Phase.generate]) + def inner(data): + coord_vars = data.draw(coordinate_variables(dim_sizes={"x": 2, "y": 3})) + for name, var in coord_vars.items(): + if var.ndim == 1 and name == var.dims[0]: + nonlocal found_one + found_one = True + raise AssertionError # early stopping - test is correct but slower without this + + with contextlib.suppress(AssertionError): + inner() + + assert found_one + + def test_sometimes_generates_non_dim_coords(self): + found_one = False + + @given(st.data()) + @settings(phases=[Phase.generate]) + def inner(data): + coord_vars = data.draw(coordinate_variables(dim_sizes={"x": 2, "y": 3})) + for name, var in coord_vars.items(): + if var.ndim != 1 or (var.ndim == 1 and name != var.dims[0]): + nonlocal found_one + found_one = True + raise AssertionError # early stopping - test is correct but slower without this + + with contextlib.suppress(AssertionError): + inner() + + assert found_one class TestDataArraysStrategy: From e6ebb1fbe6c7077c0fb9e6cf45b616132f40a88a Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 7 Sep 2022 10:33:01 -0400 Subject: [PATCH 65/95] zip dict Co-authored-by: Zac Hatfield-Dodds --- xarray/testing/strategies.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index e3b3c54f354..4bfe1c197c3 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -471,8 +471,7 @@ def data_variables( ) ) - data_vars = {n: v for n, v in zip(var_names, vars)} - return data_vars + return dict(zip(var_names, vars)) @st.composite From 4da8772f5386e2b5fa7ded2330a4590f277719ab Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 11:29:44 -0400 Subject: [PATCH 66/95] add dim_names kwarg to dimension_sizes strategy --- xarray/testing/strategies.py | 26 ++++++++++++++++++++++---- xarray/tests/test_strategies.py | 7 +++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index e3b3c54f354..8b596d78d71 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -23,7 +23,11 @@ # required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. def numeric_dtypes() -> st.SearchStrategy[np.dtype]: - """Generates only those numpy dtypes which xarray can handle.""" + """ + Generates only those numpy dtypes which xarray can handle. + + Requires the hypothesis package to be installed. + """ return ( npst.integer_dtypes() @@ -56,7 +60,11 @@ def np_arrays( def names() -> st.SearchStrategy[str]: - """Generates arbitrary string names for dimensions / variables.""" + """ + Generates arbitrary string names for dimensions / variables. + + Requires the hypothesis package to be installed. + """ return st.text(st.characters(), min_size=1) @@ -88,6 +96,7 @@ def dimension_names( def dimension_sizes( *, + dim_names: st.SearchStrategy[str] = names(), min_dims: int = 0, max_dims: int = 3, min_side: int = 1, @@ -100,6 +109,9 @@ def dimension_sizes( Parameters ---------- + dim_names: strategy generating strings, optional + Strategy for generating dimension names. + Defaults to the `names` strategy. min_dims: int, optional Minimum number of dimensions in generated list. Default is 1. @@ -118,7 +130,7 @@ def dimension_sizes( max_side = min_side + 5 return st.dictionaries( - keys=names(), + keys=dim_names, values=st.integers(min_value=min_side, max_value=max_side), min_size=min_dims, max_size=max_dims, @@ -136,7 +148,13 @@ def dimension_sizes( def attrs() -> st.SearchStrategy[Mapping[str, Any]]: - """Generates arbitrary valid attributes dictionaries for xarray objects.""" + """ + Generates arbitrary valid attributes dictionaries for xarray objects. + + The generated dictionaries can potentially be recursive. + + Requires the hypothesis package to be installed. + """ return st.recursive( st.dictionaries(_attr_keys, _attr_values), lambda children: st.dictionaries(_attr_keys, children), diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index bb68632937c..20c08873723 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -84,6 +84,13 @@ def test_fixed_number_of_dims(self, dims): assert isinstance(dims, dict) assert len(dims) == 3 + @given(st.data()) + def test_restrict_names(self, data): + capitalized_names = st.text(st.characters(), min_size=1).map(str.upper) + dim_sizes = data.draw(dimension_sizes(dim_names=capitalized_names)) + for d in dim_sizes.keys(): + assert d.upper() == d + class TestAttrsStrategy: @given(attrs()) From e6d7a34cf73bca1b87220d8ceed7350fb688f637 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 11:58:56 -0400 Subject: [PATCH 67/95] return a dict from _alignable_variables --- xarray/testing/strategies.py | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 8b596d78d71..3eb9d5643c8 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -276,14 +276,18 @@ def _unique_subset_of( def _alignable_variables( draw: st.DrawFn, *, + var_names: st.SearchStrategy[str], dim_sizes: Mapping[str, int], -) -> st.SearchStrategy[List[xr.Variable]]: - """Generates lists of variables with compatible (i.e. alignable) dimensions and sizes.""" +) -> st.SearchStrategy[Mapping[str, xr.Variable]]: + """ + Generates dicts of names mapping to variables with compatible (i.e. alignable) dimensions and sizes. + """ alignable_dim_sizes = draw(_unique_subset_of(dim_sizes)) if dim_sizes else {} + vars = variables(dims=st.just(alignable_dim_sizes)) # TODO don't hard code max number of variables - return draw(st.lists(variables(dims=st.just(alignable_dim_sizes)), max_size=3)) + return draw(st.dictionaries(var_names, vars, max_size=3)) @st.composite @@ -323,20 +327,14 @@ def coordinate_variables( # Possibly generate ND "non-dimension coordinates" - explicit possibility not to include any helps with shrinking if draw(st.booleans()): - non_dim_coord_vars = draw(_alignable_variables(dim_sizes=dim_sizes)) # can't have same name as a dimension valid_non_dim_coord_names = names().filter(lambda n: n not in dim_names) - non_dim_coord_names = draw( - st.lists( - valid_non_dim_coord_names, - min_size=len(non_dim_coord_vars), - max_size=len(non_dim_coord_vars), - unique=True, + non_dim_coords = draw( + _alignable_variables( + var_names=valid_non_dim_coord_names, dim_sizes=dim_sizes ) ) - - non_dim_coords = {n: v for n, v in zip(non_dim_coord_names, non_dim_coord_vars)} all_coords.update(non_dim_coords) return all_coords @@ -472,24 +470,16 @@ def data_variables( allowed_names: Strategy generating strings Allowed names for data variables. Needed to avoid conflict with names of coordinate variables & dimensions. """ - # TODO these shouldn't have the same name as any dimensions or any coordinates... - vars = draw(_alignable_variables(dim_sizes=dim_sizes)) + # TODO these also shouldn't have the same name as any dimensions or any coordinates... dim_names = list(dim_sizes.keys()) # can't have same name as a dimension # TODO this is also used in coordinate_variables so refactor it out into separate function valid_var_names = allowed_names.filter(lambda n: n not in dim_names) - # TODO do I actually need to draw from st.lists for this? - var_names = draw( - st.lists( - valid_var_names, - min_size=len(vars), - max_size=len(vars), - unique=True, - ) + data_vars = draw( + _alignable_variables(var_names=valid_var_names, dim_sizes=dim_sizes) ) - data_vars = {n: v for n, v in zip(var_names, vars)} return data_vars From 15812fdc1337ed4f05f4769ebbe54a0cddd0eba3 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 13:12:49 -0400 Subject: [PATCH 68/95] add coord_names arg to coordinate_variables strategy --- xarray/testing/strategies.py | 33 ++++++++++++++++++++------------- xarray/tests/test_strategies.py | 25 +++++++++++++++++++++---- 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 4fc787d3007..39bdb463bed 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -295,6 +295,7 @@ def coordinate_variables( draw: st.DrawFn, *, dim_sizes: Mapping[str, int], + coord_names: st.SearchStrategy[str] = names(), ) -> st.SearchStrategy[Mapping[str, xr.Variable]]: """ Generates dicts of alignable Variable objects for use as coordinates. @@ -308,6 +309,8 @@ def coordinate_variables( ---------- dim_sizes: Mapping of str to int Sizes of dimensions to use for coordinates. + coord_names: Strategy generating strings, optional + Allowed names for non-dimension coordinates. Defaults to `names` strategy. """ dim_names = list(dim_sizes.keys()) @@ -329,7 +332,7 @@ def coordinate_variables( if draw(st.booleans()): # can't have same name as a dimension - valid_non_dim_coord_names = names().filter(lambda n: n not in dim_names) + valid_non_dim_coord_names = coord_names.filter(lambda n: n not in dim_names) non_dim_coords = draw( _alignable_variables( var_names=valid_non_dim_coord_names, dim_sizes=dim_sizes @@ -456,7 +459,7 @@ def data_variables( draw: st.DrawFn, *, dim_sizes: Mapping[str, int], - allowed_names: st.SearchStrategy[str] = names(), + var_names: st.SearchStrategy[str] = names(), ) -> st.SearchStrategy[Mapping[str, xr.Variable]]: """ Generates dicts of alignable Variable objects for use as Dataset data variables. @@ -467,15 +470,14 @@ def data_variables( ---------- dim_sizes: Mapping of str to int Sizes of dimensions to use for variables. - allowed_names: Strategy generating strings + var_names: Strategy generating strings Allowed names for data variables. Needed to avoid conflict with names of coordinate variables & dimensions. """ - # TODO these also shouldn't have the same name as any dimensions or any coordinates... dim_names = list(dim_sizes.keys()) # can't have same name as a dimension # TODO this is also used in coordinate_variables so refactor it out into separate function - valid_var_names = allowed_names.filter(lambda n: n not in dim_names) + valid_var_names = var_names.filter(lambda n: n not in dim_names) data_vars = draw( _alignable_variables(var_names=valid_var_names, dim_sizes=dim_sizes) ) @@ -527,8 +529,11 @@ def datasets( # no dims -> generate dims to match data data_vars = draw(data_vars) dim_sizes = _find_overall_sizes(data_vars) - # TODO only draw coordinate variables whose names don't conflict with data variables - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + # only draw coordinate variables whose names don't conflict with data variables + allowed_coord_names = names().filter(lambda n: n not in list(data_vars.keys())) + coords = draw( + coordinate_variables(coord_names=allowed_coord_names, dim_sizes=dim_sizes) + ) elif data_vars is None and dims is not None: # no data -> generate data to match dims @@ -543,7 +548,7 @@ def datasets( allowed_data_var_names = names().filter(lambda n: n not in coord_names) data_vars = draw( data_variables( - dim_sizes=dim_sizes, allowed_names=allowed_data_var_names + dim_sizes=dim_sizes, var_names=allowed_data_var_names ) ) @@ -559,8 +564,11 @@ def datasets( data_vars = draw(data_vars) _check_compatible_sizes(data_vars, dim_sizes) - # TODO only draw coordinate variables whose names don't conflict with data variables - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + # only draw coordinate variables whose names don't conflict with data variables + allowed_coord_names = names().filter(lambda n: n not in list(data_vars.keys())) + coords = draw( + coordinate_variables(coord_names=allowed_coord_names, dim_sizes=dim_sizes) + ) else: # nothing provided, so generate everything consistently by drawing data to match dims, and coords to match both @@ -572,14 +580,13 @@ def datasets( else: coords = {} - coord_names = list(coords.keys()) - allowed_data_var_names = names().filter(lambda n: n not in coord_names) + allowed_data_var_names = names().filter(lambda n: n not in list(coords.keys())) # Allow for no data variables - helps with shrinking if draw(st.booleans()): draw( data_variables( - dim_sizes=dim_sizes, allowed_names=allowed_data_var_names + dim_sizes=dim_sizes, var_names=allowed_data_var_names ) ) else: diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 20c08873723..5ea220db719 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -88,8 +88,8 @@ def test_fixed_number_of_dims(self, dims): def test_restrict_names(self, data): capitalized_names = st.text(st.characters(), min_size=1).map(str.upper) dim_sizes = data.draw(dimension_sizes(dim_names=capitalized_names)) - for d in dim_sizes.keys(): - assert d.upper() == d + for dim in dim_sizes.keys(): + assert dim.upper() == dim class TestAttrsStrategy: @@ -226,6 +226,18 @@ def inner(data): assert found_one + @given(st.data()) + def test_restrict_names(self, data): + capitalized_names = st.text(st.characters(), min_size=1).map(str.upper) + coord_vars = data.draw( + coordinate_variables( + dim_sizes={"x": 2, "y": 3}, coord_names=capitalized_names + ) + ) + for name in coord_vars.keys(): + if name not in ['x', 'y']: + assert name.upper() == name + class TestDataArraysStrategy: @given(dataarrays()) @@ -275,8 +287,13 @@ def test_given_only_sizes(self, data): assert set(v.sizes.items()).issubset(set(dim_sizes.items())) @given(st.data()) - def test_given_restricted_names(self, data): - ... + def test_restrict_names(self, data): + capitalized_names = st.text(st.characters(), min_size=1).map(str.upper) + data_vars = data.draw( + data_variables(dim_sizes={"x": 2, "y": 3}, var_names=capitalized_names) + ) + for name in data_vars.keys(): + assert name.upper() == name class TestDatasetsStrategy: From 4374681b1c967e59c4fa27540dcba6c7cad8d4af Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 7 Sep 2022 17:37:19 +0000 Subject: [PATCH 69/95] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/testing/strategies.py | 10 ++-------- xarray/tests/test_strategies.py | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 39bdb463bed..7634c355bec 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -547,9 +547,7 @@ def datasets( coord_names = list(coords.keys()) allowed_data_var_names = names().filter(lambda n: n not in coord_names) data_vars = draw( - data_variables( - dim_sizes=dim_sizes, var_names=allowed_data_var_names - ) + data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) ) elif data_vars is not None and dims is not None: @@ -584,11 +582,7 @@ def datasets( # Allow for no data variables - helps with shrinking if draw(st.booleans()): - draw( - data_variables( - dim_sizes=dim_sizes, var_names=allowed_data_var_names - ) - ) + draw(data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names)) else: data_vars = {} diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 5ea220db719..b00e5a2c871 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -235,7 +235,7 @@ def test_restrict_names(self, data): ) ) for name in coord_vars.keys(): - if name not in ['x', 'y']: + if name not in ["x", "y"]: assert name.upper() == name From 0f0c4fbf0a7c969d171e41b091e8599a3bb2c445 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 13:57:41 -0400 Subject: [PATCH 70/95] change typing of dims arg --- xarray/testing/strategies.py | 70 ++++++++++++++++----------------- xarray/tests/test_strategies.py | 2 +- 2 files changed, 35 insertions(+), 37 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 39bdb463bed..e7b4b9b176a 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,4 @@ -from typing import Any, Hashable, List, Mapping, Tuple, Union +from typing import Any, Hashable, List, Mapping, Sequence, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -172,9 +172,7 @@ def variables( draw: st.DrawFn, *, data: st.SearchStrategy[T_Array] = None, - dims: Union[ - st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] - ] = None, + dims: st.SearchStrategy[Union[Sequence[str], Mapping[str, int]]] = None, attrs: st.SearchStrategy[Mapping] = attrs(), ) -> st.SearchStrategy[xr.Variable]: """ @@ -192,7 +190,7 @@ def variables( data: Strategy generating array-likes, optional Default is to generate numpy data of arbitrary shape, values and dtype. dims: Strategy for generating the dimensions, optional - Can either be a strategy for generating a list of string dimension names, + Can either be a strategy for generating a sequence of string dimension names, or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. If provided in the former form the lengths of the returned Variable will either be determined from the data argument if given or arbitrarily generated if not. @@ -222,21 +220,20 @@ def variables( elif dims is not None and data is None: # no data -> generate data to match dims dims = draw(dims) - if isinstance(dims, List): + if isinstance(dims, Sequence): valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) data = draw(np_arrays(shape=draw(valid_shapes))) - else: + elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} shape = tuple(dims.values()) data = draw(np_arrays(shape=shape)) + else: + raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") elif data is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible dims = draw(dims) - # TODO is there another way to enforce these assumptions? This is very like to fail hypothesis' health checks - # TODO how do I write a test that checks that the hypothesis InvalidArgument error will be raised? - # TODO or we could just raise in this case? if isinstance(dims, List): data = draw(data) if data.ndim != len(dims): @@ -245,7 +242,7 @@ def variables( "unique dimension names. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) - else: + elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} data = draw(data) shape = tuple(dims.values()) @@ -255,6 +252,8 @@ def variables( f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) + else: + raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") else: # nothing provided, so generate everything consistently by drawing dims to match data @@ -348,9 +347,7 @@ def dataarrays( draw: st.DrawFn, *, data: st.SearchStrategy[T_Array] = None, - dims: Union[ - st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] - ] = None, + dims: st.SearchStrategy[Union[Sequence[str], Mapping[str, int]]] = None, name: st.SearchStrategy[Union[str, None]] = names(), attrs: st.SearchStrategy[Mapping] = attrs(), ) -> st.SearchStrategy[xr.DataArray]: @@ -369,7 +366,7 @@ def dataarrays( data: Strategy generating array-likes, optional Default is to generate numpy data of arbitrary shape, values and dtypes. dims: Strategy for generating the dimensions, optional - Can either be a strategy for generating a list of string dimension names, + Can either be a strategy for generating a sequence of string dimension names, or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. If provided in the former form the lengths of the returned Variable will either be determined from the data argument if given or arbitrarily generated if not. @@ -398,23 +395,25 @@ def dataarrays( elif data is None and dims is not None: # no data -> generate data to match dims dims = draw(dims) - if isinstance(dims, List): + if isinstance(dims, Sequence): dim_names = dims valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) data = draw(np_arrays(shape=draw(valid_shapes))) dim_sizes = {n: l for n, l in zip(dims, data.shape)} coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - else: + elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} dim_names, shape = list(dims.keys()), tuple(dims.values()) data = draw(np_arrays(shape=shape)) coords = draw(coordinate_variables(dim_sizes=dims)) + else: + raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") elif data is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible dims = draw(dims) - if isinstance(dims, List): + if isinstance(dims, Sequence): dim_names = dims data = draw(data) if data.ndim != len(dims): @@ -424,7 +423,7 @@ def dataarrays( "draw compatible examples for data and dims." ) dim_sizes = {n: l for n, l in zip(dims, data.shape)} - else: + elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} data = draw(data) dim_sizes = dims @@ -435,6 +434,8 @@ def dataarrays( f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) + else: + raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") coords = draw(coordinate_variables(dim_sizes=dim_sizes)) @@ -489,9 +490,7 @@ def datasets( draw: st.DrawFn, *, data_vars: st.SearchStrategy[Mapping[str, xr.Variable]] = None, - dims: Union[ - st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] - ] = None, + dims: st.SearchStrategy[Union[Sequence[str], Mapping[str, int]]] = None, attrs: st.SearchStrategy[Mapping] = attrs(), ) -> st.SearchStrategy[xr.Dataset]: """ @@ -510,7 +509,7 @@ def datasets( Default is to generate an arbitrary combination of compatible variables with sizes matching dims, but arbitrary names, dtypes, and values. dims: Strategy for generating the dimensions, optional - Can either be a strategy for generating a list of string dimension names, + Can either be a strategy for generating a sequence of string dimension names, or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. If provided in the former form the lengths of the returned Variable will either be determined from the data argument if given or arbitrarily generated if not. @@ -537,32 +536,35 @@ def datasets( elif data_vars is None and dims is not None: # no data -> generate data to match dims - if isinstance(dims, List): + dims = draw(dims) + if isinstance(dims, Sequence): # TODO support dims as list too? raise NotImplementedError() - else: + elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} - dim_sizes = draw(dims) + dim_sizes = dims coords = draw(coordinate_variables(dim_sizes=dim_sizes)) coord_names = list(coords.keys()) allowed_data_var_names = names().filter(lambda n: n not in coord_names) data_vars = draw( - data_variables( - dim_sizes=dim_sizes, var_names=allowed_data_var_names - ) + data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) ) + else: + raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") elif data_vars is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible dims = draw(dims) - if isinstance(dims, List): + if isinstance(dims, Sequence): # TODO support dims as list too? raise NotImplementedError() - else: + elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} dim_sizes = dims data_vars = draw(data_vars) _check_compatible_sizes(data_vars, dim_sizes) + else: + raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") # only draw coordinate variables whose names don't conflict with data variables allowed_coord_names = names().filter(lambda n: n not in list(data_vars.keys())) @@ -584,11 +586,7 @@ def datasets( # Allow for no data variables - helps with shrinking if draw(st.booleans()): - draw( - data_variables( - dim_sizes=dim_sizes, var_names=allowed_data_var_names - ) - ) + draw(data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names)) else: data_vars = {} diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 5ea220db719..b00e5a2c871 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -235,7 +235,7 @@ def test_restrict_names(self, data): ) ) for name in coord_vars.keys(): - if name not in ['x', 'y']: + if name not in ["x", "y"]: assert name.upper() == name From 6a30af54c2427d2fb85b8124dbdbb080bcd54825 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 15:15:07 -0400 Subject: [PATCH 71/95] support dims as list to datasets strat when data not given --- xarray/testing/strategies.py | 36 +++++++++++++++++---------------- xarray/tests/test_strategies.py | 16 +++++++++++++++ 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index e7b4b9b176a..175436f78c7 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -342,6 +342,11 @@ def coordinate_variables( return all_coords +def _sizes_from_dim_names(dims: Sequence[str]) -> st.SearchStrategy[Mapping[str, int]]: + size_along_dim = st.integers(min_value=1, max_value=6) + return st.fixed_dictionaries({d: size_along_dim for d in dims}) + + @st.composite def dataarrays( draw: st.DrawFn, @@ -396,20 +401,17 @@ def dataarrays( # no data -> generate data to match dims dims = draw(dims) if isinstance(dims, Sequence): - dim_names = dims - valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) - data = draw(np_arrays(shape=draw(valid_shapes))) - dim_sizes = {n: l for n, l in zip(dims, data.shape)} - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - + dim_sizes = draw(_sizes_from_dim_names(dims)) elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} - dim_names, shape = list(dims.keys()), tuple(dims.values()) - data = draw(np_arrays(shape=shape)) - coords = draw(coordinate_variables(dim_sizes=dims)) + dim_sizes = dims else: raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") + dim_names, shape = list(dim_sizes.keys()), tuple(dim_sizes.values()) + data = draw(np_arrays(shape=shape)) + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + elif data is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible dims = draw(dims) @@ -538,20 +540,20 @@ def datasets( # no data -> generate data to match dims dims = draw(dims) if isinstance(dims, Sequence): - # TODO support dims as list too? - raise NotImplementedError() + dim_sizes = draw(_sizes_from_dim_names(dims)) elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} dim_sizes = dims - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - coord_names = list(coords.keys()) - allowed_data_var_names = names().filter(lambda n: n not in coord_names) - data_vars = draw( - data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) - ) else: raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + coord_names = list(coords.keys()) + allowed_data_var_names = names().filter(lambda n: n not in coord_names) + data_vars = draw( + data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) + ) + elif data_vars is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible dims = draw(dims) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index b00e5a2c871..4c628be168d 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -310,12 +310,18 @@ def test_given_data(self, data): @given(st.data()) def test_given_dims(self, data): + dims = ["x", "y"] + ds = data.draw(datasets(dims=st.just(dims))) + assert set(ds.dims).issubset(set(dims)) + dim_sizes = {"x": 3, "y": 4} ds = data.draw(datasets(dims=st.just(dim_sizes))) assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) @given(st.data()) def test_given_data_and_dims(self, data): + + # pass dims as mapping dim_sizes = {"x": 3, "y": 4} data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) ds = data.draw(datasets(data_vars=st.just(data_vars), dims=st.just(dim_sizes))) @@ -329,3 +335,13 @@ def test_given_data_and_dims(self, data): data_vars=st.just(data_vars), dims=st.just(incompatible_dim_sizes) ) ) + + @pytest.mark.xfail(reason="not implemented") + @given(st.data()) + def test_given_data_and_dims_as_sequence(self, data): + # pass dims as sequence + dim_sizes = {"x": 3, "y": 4} + dims = list(dim_sizes.keys()) + data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) + ds = data.draw(datasets(data_vars=st.just(data_vars), dims=st.just(dims))) + assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) From 177d908e6a9d4bda262a7004600b5dff03ef46c3 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 15:37:03 -0400 Subject: [PATCH 72/95] put coord and data var generation in optional branch to try to improve shrinking --- xarray/testing/strategies.py | 87 ++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 43 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 175436f78c7..d9bd32e7583 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -311,33 +311,38 @@ def coordinate_variables( coord_names: Strategy generating strings, optional Allowed names for non-dimension coordinates. Defaults to `names` strategy. """ - dim_names = list(dim_sizes.keys()) all_coords = {} - # Possibly generate 1D "dimension coordinates" - explicit possibility not to include amy helps with shrinking - if dim_names and draw(st.booleans()): - # first generate subset of dimension names - these set which dimension coords will be included - dim_coord_names_and_lengths = draw(_unique_subset_of(dim_sizes)) + if draw( + st.booleans() + ): # Allow for no coordinate variables - explicit possibility not to helps with shrinking - # then generate 1D variables for each name - dim_coords = { - n: draw(variables(dims=st.just({n: l}))) - for n, l in dim_coord_names_and_lengths.items() - } - all_coords.update(dim_coords) + dim_names = list(dim_sizes.keys()) - # Possibly generate ND "non-dimension coordinates" - explicit possibility not to include any helps with shrinking - if draw(st.booleans()): + # Possibly generate 1D "dimension coordinates" - explicit possibility not to helps with shrinking + if dim_names and draw(st.booleans()): + # first generate subset of dimension names - these set which dimension coords will be included + dim_coord_names_and_lengths = draw(_unique_subset_of(dim_sizes)) - # can't have same name as a dimension - valid_non_dim_coord_names = coord_names.filter(lambda n: n not in dim_names) - non_dim_coords = draw( - _alignable_variables( - var_names=valid_non_dim_coord_names, dim_sizes=dim_sizes + # then generate 1D variables for each name + dim_coords = { + n: draw(variables(dims=st.just({n: l}))) + for n, l in dim_coord_names_and_lengths.items() + } + all_coords.update(dim_coords) + + # Possibly generate ND "non-dimension coordinates" - explicit possibility not to helps with shrinking + if draw(st.booleans()): + + # can't have same name as a dimension + valid_non_dim_coord_names = coord_names.filter(lambda n: n not in dim_names) + non_dim_coords = draw( + _alignable_variables( + var_names=valid_non_dim_coord_names, dim_sizes=dim_sizes + ) ) - ) - all_coords.update(non_dim_coords) + all_coords.update(non_dim_coords) return all_coords @@ -415,9 +420,9 @@ def dataarrays( elif data is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible dims = draw(dims) + data = draw(data) if isinstance(dims, Sequence): dim_names = dims - data = draw(data) if data.ndim != len(dims): raise InvalidArgument( f"Strategy attempting to generate data with {data.ndim} dims but {len(dims)} " @@ -427,9 +432,8 @@ def dataarrays( dim_sizes = {n: l for n, l in zip(dims, data.shape)} elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} - data = draw(data) dim_sizes = dims - dim_names, shape = list(dims.keys()), tuple(dims.values()) + dim_names, shape = list(dim_sizes.keys()), tuple(dim_sizes.values()) if data.shape != shape: raise InvalidArgument( f"Strategy attempting to generate data with shape {data.shape} dims but dimension " @@ -476,14 +480,20 @@ def data_variables( var_names: Strategy generating strings Allowed names for data variables. Needed to avoid conflict with names of coordinate variables & dimensions. """ - dim_names = list(dim_sizes.keys()) + if draw( + st.booleans() + ): # Allow for no coordinate variables - explicit possibility not to helps with shrinking + dim_names = list(dim_sizes.keys()) + + # can't have same name as a dimension + # TODO this is also used in coordinate_variables so refactor it out into separate function + valid_var_names = var_names.filter(lambda n: n not in dim_names) + data_vars = draw( + _alignable_variables(var_names=valid_var_names, dim_sizes=dim_sizes) + ) + else: + data_vars = {} - # can't have same name as a dimension - # TODO this is also used in coordinate_variables so refactor it out into separate function - valid_var_names = var_names.filter(lambda n: n not in dim_names) - data_vars = draw( - _alignable_variables(var_names=valid_var_names, dim_sizes=dim_sizes) - ) return data_vars @@ -577,20 +587,11 @@ def datasets( else: # nothing provided, so generate everything consistently by drawing data to match dims, and coords to match both dim_sizes = draw(dimension_sizes()) - - # Allow for no coordinate variables - helps with shrinking - if draw(st.booleans()): - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - else: - coords = {} - + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) allowed_data_var_names = names().filter(lambda n: n not in list(coords.keys())) - - # Allow for no data variables - helps with shrinking - if draw(st.booleans()): - draw(data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names)) - else: - data_vars = {} + data_vars = draw( + data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) + ) return xr.Dataset(data_vars=data_vars, coords=coords, attrs=draw(attrs)) From 5424e37914cf4953448ccac577766ffe00366fcb Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 15:40:28 -0400 Subject: [PATCH 73/95] improve simple test example --- doc/user-guide/testing.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 69fc578023a..01b6ac3420a 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -76,8 +76,6 @@ range of data that the xarray strategies can generate. In your tests however you should not use ``.example()`` - instead you should parameterize your tests with the ``hypothesis.given`` decorator: -# TODO finishsimple test example - .. ipython:: python from hypothesis import given @@ -85,8 +83,8 @@ In your tests however you should not use ``.example()`` - instead you should par .. ipython:: python @given(xrst.dataarrays()) - def test_something(da): - ... + def test_function_that_acts_on_dataarrays(da): + assert func(da) == ... Chaining Strategies From c8712736f45cd4ec883af50ad2b96c516c6b559b Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 17:23:48 -0400 Subject: [PATCH 74/95] add documentation on creating duck arrays --- doc/user-guide/testing.rst | 51 +++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 01b6ac3420a..a49027bd93e 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -94,7 +94,6 @@ Xarray's strategies can accept other strategies as arguments, allowing you to cu examples. .. ipython:: python - :okexcept: # generate a DataArray with shape (3, 4), but all other details still arbitrary xrst.dataarrays( @@ -157,10 +156,9 @@ To fix the length of dimensions you can instead pass `dims` as a mapping of dime You can also use this to specify that you want examples which are missing some part of the data structure, for instance .. ipython:: python - :okexcept: # Generates only dataarrays with no coordinates - xrst.dataarrays(coords=st.just({})).example() + xrst.datasets(data_vars=st.just({})).example() Through a combination of chaining strategies and fixing arguments, you can specify quite complicated requirements on the objects your chained strategy will generate. @@ -189,4 +187,49 @@ arbitrary ``DataArray`` objects whose dimensions will always match these specifi Creating Duck-type Arrays ~~~~~~~~~~~~~~~~~~~~~~~~~ -# TODO creating duckarrays by passing custom strategies to data arg +Xarray objects don't have to wrap numpy arrays, in fact they can wrap any array type which presents the same API as a +numpy array (so-called "duck array wrapping", see :ref:`_internals.duck_arrays`). + +Imagine we want to write a strategy which generates arbitrary `DataArray` objects, each of which wraps a +``sparse.COO`` array instead of a ``numpy.ndarray``. How could we do that? There are two ways: + +1. Create a xarray object with numpy data and use ``.map()`` to convert the underlying array to a +different type: + +.. ipython:: python + + import sparse + +.. ipython:: python + :okexcept: + + def convert_to_sparse(arr): + if arr.ndim == 0: + return arr + else: + return sparse.COO.from_numpy(arr) + + + sparse_dataarrays = xrst.dataarrays(attrs=st.just({})).map(convert_to_sparse) + + sparse_dataarrays.example() + sparse_dataarrays.example() + +2. Pass a strategy which generates the duck-typed arrays directly to the ``data`` argument of the xarray +strategies: + +.. ipython:: python + :okexcept: + + @st.composite + def sparse_arrays(draw) -> st.SearchStrategy[sparse._coo.core.COO]: + """Strategy which generates random sparse.COO arrays""" + shape = draw(npst.array_shapes()) + density = draw(st.integers(min_value=0, max_value=1)) + return sparse.random(shape, density=density) + + + sparse_dataarrays = xrst.dataarrays(data=sparse_arrays(), attrs=st.just({})) + + sparse_dataarrays.example() + sparse_dataarrays.example() From 7730a27756c0d499eea1bc6a9727bec98ff29203 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 17:31:15 -0400 Subject: [PATCH 75/95] okexcept for sparse examples --- doc/user-guide/testing.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index a49027bd93e..f8d87036638 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -197,6 +197,7 @@ Imagine we want to write a strategy which generates arbitrary `DataArray` object different type: .. ipython:: python + :okexcept: import sparse @@ -209,8 +210,10 @@ different type: else: return sparse.COO.from_numpy(arr) +.. ipython:: python + :okexcept: - sparse_dataarrays = xrst.dataarrays(attrs=st.just({})).map(convert_to_sparse) + sparse_dataarrays = xrst.dataarrays().map(convert_to_sparse) sparse_dataarrays.example() sparse_dataarrays.example() @@ -228,8 +231,10 @@ strategies: density = draw(st.integers(min_value=0, max_value=1)) return sparse.random(shape, density=density) +.. ipython:: python + :okexcept: - sparse_dataarrays = xrst.dataarrays(data=sparse_arrays(), attrs=st.just({})) + sparse_dataarrays = xrst.dataarrays(data=sparse_arrays()) sparse_dataarrays.example() sparse_dataarrays.example() From 24549bcb80c40571a3b699b9519f53dded11e8e7 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 17:59:29 -0400 Subject: [PATCH 76/95] fix sparse dataarrays example --- doc/user-guide/testing.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index f8d87036638..88d8f413eb4 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -204,11 +204,12 @@ different type: .. ipython:: python :okexcept: - def convert_to_sparse(arr): - if arr.ndim == 0: - return arr + def convert_to_sparse(da): + if da.ndim == 0: + return da else: - return sparse.COO.from_numpy(arr) + da.data = sparse.COO.from_numpy(da.values) + return da .. ipython:: python :okexcept: @@ -238,3 +239,6 @@ strategies: sparse_dataarrays.example() sparse_dataarrays.example() + +Either approach is fine, but one may be more convenient than the other depending on the type of the duck array which you +want to wrap. From 3082a0978ec16183b80865b4f78085ce91d9b54b Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 18:00:39 -0400 Subject: [PATCH 77/95] todo about building a duck array dataset --- doc/user-guide/testing.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 88d8f413eb4..e18eb54aa88 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -242,3 +242,30 @@ strategies: Either approach is fine, but one may be more convenient than the other depending on the type of the duck array which you want to wrap. + +Creating datasets can be a little more involved. Using method (1) is simple: + +.. ipython:: python + :okexcept: + + def convert_ds_to_sparse(ds): + return ds.map(convert_to_sparse) + +.. ipython:: python + :okexcept: + + sparse_datasets = xrst.datasets().map(convert_ds_to_sparse) + + sparse_datasets.example() + +but building a dataset from scratch (i.e. method (2)) requires building the dataset object in such as way that all of +the data variables have compatible dimensions. You can build up a dictionary of the form ``{var_name: data_variable}`` +yourself, or you can use the ``data_vars`` argument to the ``data_variables`` strategy (TODO): + +.. ipython:: python + :okexcept: + + sparse_data_vars = xrst.data_variables(data=sparse_arrays()) + sparse_datasets = xrst.datasets(data_vars=sparse_data_vars) + + sparse_datasets.example() \ No newline at end of file From 5df60dc9ae548f157c6b2a8d60c4380c4e9fe541 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 18:19:08 -0400 Subject: [PATCH 78/95] fix imports and cross-links --- doc/user-guide/testing.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index e18eb54aa88..0caae3f6415 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -20,8 +20,8 @@ Hypothesis testing .. note:: Testing with hypothesis is a fairly advanced topic. Before reading this section it is recommended that you take a look - at our guide to xarray's data structures, are familiar with conventional unit testing in pytest, and have seen the - hypothesis library documentation. + at our guide to xarray's :ref:`data structures`, are familiar with conventional unit testing in pytest, and have seen + the hypothesis library documentation. ``Hypothesis`` is a powerful library for property-based testing. Instead of writing tests for one example at a time, it allows you to write tests parameterized by a source of many @@ -188,7 +188,7 @@ Creating Duck-type Arrays ~~~~~~~~~~~~~~~~~~~~~~~~~ Xarray objects don't have to wrap numpy arrays, in fact they can wrap any array type which presents the same API as a -numpy array (so-called "duck array wrapping", see :ref:`_internals.duck_arrays`). +numpy array (so-called "duck array wrapping", see :ref:`internals.duck_arrays`). Imagine we want to write a strategy which generates arbitrary `DataArray` objects, each of which wraps a ``sparse.COO`` array instead of a ``numpy.ndarray``. How could we do that? There are two ways: @@ -200,6 +200,7 @@ different type: :okexcept: import sparse + import hypothesis.extra.numpy as npst .. ipython:: python :okexcept: From 01078def7aed2418a73b9e94a90ed75d9979e852 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 7 Sep 2022 22:20:49 +0000 Subject: [PATCH 79/95] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/user-guide/testing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 0caae3f6415..77a9ebafc22 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -269,4 +269,4 @@ yourself, or you can use the ``data_vars`` argument to the ``data_variables`` st sparse_data_vars = xrst.data_variables(data=sparse_arrays()) sparse_datasets = xrst.datasets(data_vars=sparse_data_vars) - sparse_datasets.example() \ No newline at end of file + sparse_datasets.example() From 53290e216d5816ba5b8212f8186f69b93b36ce52 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 8 Sep 2022 13:29:28 -0400 Subject: [PATCH 80/95] add hypothesis library to intersphinx mapping --- doc/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/conf.py b/doc/conf.py index f0050aeb24a..de79dd69d6c 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -334,6 +334,7 @@ "cftime": ("https://unidata.github.io/cftime", None), "rasterio": ("https://rasterio.readthedocs.io/en/latest", None), "sparse": ("https://sparse.pydata.org/en/latest/", None), + "hypothesis": ("https://hypothesis.readthedocs.io/en/latest/", None), } From bd2cb6e573b0136193749a6c3b4913e8632992d5 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 8 Sep 2022 13:29:38 -0400 Subject: [PATCH 81/95] fix many links --- doc/user-guide/testing.rst | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 77a9ebafc22..6802be434c9 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -20,13 +20,14 @@ Hypothesis testing .. note:: Testing with hypothesis is a fairly advanced topic. Before reading this section it is recommended that you take a look - at our guide to xarray's :ref:`data structures`, are familiar with conventional unit testing in pytest, and have seen - the hypothesis library documentation. + at our guide to xarray's :ref:`data structures`, are familiar with conventional unit testing in + `pytest `_, and have seen the + `hypothesis library documentation `_. -``Hypothesis`` is a powerful library for property-based testing. +`The hypothesis library `_ is a powerful tool for property-based testing. Instead of writing tests for one example at a time, it allows you to write tests parameterized by a source of many dynamically generated examples. For example you might have written a test which you wish to be parameterized by the set -of all possible ``integers()``. +of all possible integers via :py:func:`hypothesis.strategies.integers()`. Property-based testing is extremely powerful, because (unlike more conventional example-based testing) it can find bugs that you did not even think to look for! @@ -38,7 +39,7 @@ Each source of examples is called a "strategy", and xarray provides a range of c data structures containing arbitrary data. You can use these to efficiently test downstream code, quickly ensuring that your code can handle xarray objects of all possible structures and contents. -These strategies are accessible in the :py:module::`xarray.testing.strategies` module, which provides +These strategies are accessible in the :py:mod:`xarray.testing.strategies` module, which provides .. currentmodule:: xarray @@ -74,7 +75,7 @@ You can see that calling ``.example()`` multiple times will generate different e range of data that the xarray strategies can generate. In your tests however you should not use ``.example()`` - instead you should parameterize your tests with the -``hypothesis.given`` decorator: +:py:func:`hypothesis.given` decorator: .. ipython:: python @@ -113,7 +114,9 @@ For example you could create a ``chunks`` strategy to specify particular chunkin .. code-block:: - @given(st.data()) + import hypothesis.extra.numpy as npst + + @st.given(st.data()) def test_something_else_inefficiently(data): arrs = npst.arrays(dtype=numeric_dtypes) # generates arrays of any shape dims = xrst.dimension_names() # generates lists of any number of dimensions @@ -133,7 +136,7 @@ Fixing Arguments ~~~~~~~~~~~~~~~~ If you want to fix one aspect of the data structure, whilst allowing variation in the generated examples -over all other aspects, then use ``hypothesis.strategies.just()``. +over all other aspects, then use :py:func:`hypothesis.strategies.just()`. .. ipython:: python @@ -142,8 +145,8 @@ over all other aspects, then use ``hypothesis.strategies.just()``. # Generates only dataarrays with dimensions ["x", "y"] xrst.dataarrays(dims=st.just(["x", "y"])).example() -(This is technically another example of chaining strategies - ``hypothesis.strategies.just`` is simply a special -strategy that just contains a single example.) +(This is technically another example of chaining strategies - :py:func:`hypothesis.strategies.just()` is simply a +special strategy that just contains a single example.) To fix the length of dimensions you can instead pass `dims` as a mapping of dimension names to lengths (i.e. following xarray objects' ``.sizes()`` property), e.g. @@ -176,8 +179,8 @@ objects your chained strategy will generate. special_dataarrays.example() special_dataarrays.example() -Here we have used one of hypothesis' built-in strategies ``fixed_dictionaries`` to create a strategy which generates -mappings of dimension names to lengths (i.e. the ``size`` of the xarray object we want). +Here we have used one of hypothesis' built-in strategies :py:func:`hypothesis.strategies.fixed_dictionaries` to create a +strategy which generates mappings of dimension names to lengths (i.e. the ``size`` of the xarray object we want). This particular strategy will always generate an ``x`` dimension of length 2, and a ``y`` dimension of length either 3 or 4, and will sometimes also generate a ``z`` dimension of length 2. By feeding this strategy for dictionaries into the `dims` argument of xarray's `dataarrays` strategy, we can generate @@ -191,7 +194,7 @@ Xarray objects don't have to wrap numpy arrays, in fact they can wrap any array numpy array (so-called "duck array wrapping", see :ref:`internals.duck_arrays`). Imagine we want to write a strategy which generates arbitrary `DataArray` objects, each of which wraps a -``sparse.COO`` array instead of a ``numpy.ndarray``. How could we do that? There are two ways: +:py:class:`sparse.COO` array instead of a ``numpy.ndarray``. How could we do that? There are two ways: 1. Create a xarray object with numpy data and use ``.map()`` to convert the underlying array to a different type: From c5e83c268ff0fc14b8d90084baf3514d6d4818c6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 8 Sep 2022 17:31:40 +0000 Subject: [PATCH 82/95] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/user-guide/testing.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 6802be434c9..99fec410f28 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -116,6 +116,7 @@ For example you could create a ``chunks`` strategy to specify particular chunkin import hypothesis.extra.numpy as npst + @st.given(st.data()) def test_something_else_inefficiently(data): arrs = npst.arrays(dtype=numeric_dtypes) # generates arrays of any shape From de26b2fc5f9f551ae1fe793e8d183393f944ce72 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 9 Sep 2022 10:34:07 -0400 Subject: [PATCH 83/95] fixed all local mypy errors --- xarray/testing/strategies.py | 204 ++++++++++++++++++----------------- 1 file changed, 105 insertions(+), 99 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index d9bd32e7583..189002ac035 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,4 @@ -from typing import Any, Hashable, List, Mapping, Sequence, Tuple, Union +from typing import Any, Dict, Hashable, List, Mapping, Sequence, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -39,9 +39,9 @@ def numeric_dtypes() -> st.SearchStrategy[np.dtype]: def np_arrays( *, - shape: Union[Tuple[int], st.SearchStrategy[Tuple[int]]] = npst.array_shapes( - max_side=4 - ), + shape: Union[ + Tuple[int, ...], st.SearchStrategy[Tuple[int, ...]] + ] = npst.array_shapes(max_side=4), dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = numeric_dtypes(), ) -> st.SearchStrategy[np.ndarray]: """ @@ -72,7 +72,7 @@ def dimension_names( *, min_dims: int = 0, max_dims: int = 3, -) -> st.SearchStrategy[List[str]]: +) -> st.SearchStrategy[List[Hashable]]: """ Generates an arbitrary list of valid dimension names. @@ -96,12 +96,12 @@ def dimension_names( def dimension_sizes( *, - dim_names: st.SearchStrategy[str] = names(), + dim_names: st.SearchStrategy[Hashable] = names(), min_dims: int = 0, max_dims: int = 3, min_side: int = 1, max_side: int = None, -) -> st.SearchStrategy[Mapping[str, int]]: +) -> st.SearchStrategy[Mapping[Hashable, int]]: """ Generates an arbitrary mapping from dimension names to lengths. @@ -147,7 +147,7 @@ def dimension_sizes( _attr_values = st.none() | st.booleans() | st.text(st.characters()) | _small_arrays -def attrs() -> st.SearchStrategy[Mapping[str, Any]]: +def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: """ Generates arbitrary valid attributes dictionaries for xarray objects. @@ -172,9 +172,9 @@ def variables( draw: st.DrawFn, *, data: st.SearchStrategy[T_Array] = None, - dims: st.SearchStrategy[Union[Sequence[str], Mapping[str, int]]] = None, + dims: st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]] = None, attrs: st.SearchStrategy[Mapping] = attrs(), -) -> st.SearchStrategy[xr.Variable]: +) -> xr.Variable: """ Generates arbitrary xarray.Variable objects. @@ -214,41 +214,42 @@ def variables( if data is not None and dims is None: # no dims -> generate dims to match data - data = draw(data) - dims = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) + _data = draw(data) + dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) elif dims is not None and data is None: # no data -> generate data to match dims - dims = draw(dims) - if isinstance(dims, Sequence): - valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) - data = draw(np_arrays(shape=draw(valid_shapes))) - elif isinstance(dims, Mapping): + _dims = draw(dims) + if isinstance(_dims, Sequence): + dim_names = list(_dims) + valid_shapes = npst.array_shapes(min_dims=len(_dims), max_dims=len(_dims)) + _data = draw(np_arrays(shape=draw(valid_shapes))) + elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} - shape = tuple(dims.values()) - data = draw(np_arrays(shape=shape)) + dim_names, shape = list(_dims.keys()), tuple(_dims.values()) + _data = draw(np_arrays(shape=shape)) else: raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") elif data is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible - dims = draw(dims) + _dims = draw(dims) + _data = draw(data) - if isinstance(dims, List): - data = draw(data) - if data.ndim != len(dims): + if isinstance(_dims, Sequence): + dim_names = list(_dims) + if _data.ndim != len(_dims): raise InvalidArgument( - f"Strategy attempting to generate data with {data.ndim} dims but {len(dims)} " + f"Strategy attempting to generate data with {_data.ndim} dims but {len(_dims)} " "unique dimension names. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) - elif isinstance(dims, Mapping): + elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} - data = draw(data) - shape = tuple(dims.values()) - if data.shape != shape: + dim_names, shape = list(_dims.keys()), tuple(_dims.values()) + if _data.shape != shape: raise InvalidArgument( - f"Strategy attempting to generate data with shape {data.shape} dims but dimension " + f"Strategy attempting to generate data with shape {_data.shape} dims but dimension " f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) @@ -257,16 +258,16 @@ def variables( else: # nothing provided, so generate everything consistently by drawing dims to match data - data = draw(np_arrays()) - dims = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) + _data = draw(np_arrays()) + dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) - return xr.Variable(dims=dims, data=data, attrs=draw(attrs)) + return xr.Variable(dims=dim_names, data=_data, attrs=draw(attrs)) @st.composite def _unique_subset_of( draw: st.DrawFn, d: Mapping[Hashable, Any] -) -> st.SearchStrategy[Mapping[Hashable, Any]]: +) -> Mapping[Hashable, Any]: subset_keys = draw(st.lists(st.sampled_from(list(d.keys())), unique=True)) return {k: d[k] for k in subset_keys} @@ -276,8 +277,8 @@ def _alignable_variables( draw: st.DrawFn, *, var_names: st.SearchStrategy[str], - dim_sizes: Mapping[str, int], -) -> st.SearchStrategy[Mapping[str, xr.Variable]]: + dim_sizes: Mapping[Hashable, int], +) -> Mapping[Hashable, xr.Variable]: """ Generates dicts of names mapping to variables with compatible (i.e. alignable) dimensions and sizes. """ @@ -293,9 +294,9 @@ def _alignable_variables( def coordinate_variables( draw: st.DrawFn, *, - dim_sizes: Mapping[str, int], - coord_names: st.SearchStrategy[str] = names(), -) -> st.SearchStrategy[Mapping[str, xr.Variable]]: + dim_sizes: Mapping[Hashable, int], + coord_names: st.SearchStrategy[Hashable] = names(), +) -> Mapping[Hashable, xr.Variable]: """ Generates dicts of alignable Variable objects for use as coordinates. @@ -321,7 +322,7 @@ def coordinate_variables( dim_names = list(dim_sizes.keys()) # Possibly generate 1D "dimension coordinates" - explicit possibility not to helps with shrinking - if dim_names and draw(st.booleans()): + if len(dim_names) > 0 and draw(st.booleans()): # first generate subset of dimension names - these set which dimension coords will be included dim_coord_names_and_lengths = draw(_unique_subset_of(dim_sizes)) @@ -347,7 +348,9 @@ def coordinate_variables( return all_coords -def _sizes_from_dim_names(dims: Sequence[str]) -> st.SearchStrategy[Mapping[str, int]]: +def _sizes_from_dim_names( + dims: Sequence[Hashable], +) -> st.SearchStrategy[Dict[Hashable, int]]: size_along_dim = st.integers(min_value=1, max_value=6) return st.fixed_dictionaries({d: size_along_dim for d in dims}) @@ -357,10 +360,10 @@ def dataarrays( draw: st.DrawFn, *, data: st.SearchStrategy[T_Array] = None, - dims: st.SearchStrategy[Union[Sequence[str], Mapping[str, int]]] = None, - name: st.SearchStrategy[Union[str, None]] = names(), + dims: st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]] = None, + name: st.SearchStrategy[Union[Hashable, None]] = names(), attrs: st.SearchStrategy[Mapping] = attrs(), -) -> st.SearchStrategy[xr.DataArray]: +) -> xr.DataArray: """ Generates arbitrary xarray.DataArray objects. @@ -391,71 +394,73 @@ def dataarrays( If custom strategies passed try to draw examples which together cannot create a valid DataArray. """ - name = draw(st.none() | name) + _name = draw(st.none() | name) # TODO add a coords argument? if data is not None and dims is None: # no dims -> generate dims to match data - data = draw(data) - dim_names = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) - dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} + _data = draw(data) + dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) + dim_sizes: Mapping[Hashable, int] = { + n: l for n, l in zip(dim_names, _data.shape) + } coords = draw(coordinate_variables(dim_sizes=dim_sizes)) elif data is None and dims is not None: # no data -> generate data to match dims - dims = draw(dims) - if isinstance(dims, Sequence): - dim_sizes = draw(_sizes_from_dim_names(dims)) - elif isinstance(dims, Mapping): + _dims = draw(dims) + if isinstance(_dims, Sequence): + dim_sizes = draw(_sizes_from_dim_names(_dims)) + elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} - dim_sizes = dims + dim_sizes = _dims else: - raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") + raise ValueError(f"Invalid type for dims argument - got type {type(_dims)}") dim_names, shape = list(dim_sizes.keys()), tuple(dim_sizes.values()) - data = draw(np_arrays(shape=shape)) + _data = draw(np_arrays(shape=shape)) coords = draw(coordinate_variables(dim_sizes=dim_sizes)) elif data is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible - dims = draw(dims) - data = draw(data) - if isinstance(dims, Sequence): - dim_names = dims - if data.ndim != len(dims): + _dims = draw(dims) + _data = draw(data) + if isinstance(_dims, Sequence): + dim_names = list(_dims) + if _data.ndim != len(_dims): raise InvalidArgument( - f"Strategy attempting to generate data with {data.ndim} dims but {len(dims)} " + f"Strategy attempting to generate data with {_data.ndim} dims but {len(_dims)} " "unique dimension names. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) - dim_sizes = {n: l for n, l in zip(dims, data.shape)} - elif isinstance(dims, Mapping): + dim_sizes = {n: l for n, l in zip(_dims, _data.shape)} + elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} - dim_sizes = dims + dim_sizes = _dims dim_names, shape = list(dim_sizes.keys()), tuple(dim_sizes.values()) - if data.shape != shape: + if _data.shape != shape: raise InvalidArgument( - f"Strategy attempting to generate data with shape {data.shape} dims but dimension " + f"Strategy attempting to generate data with shape {_data.shape} dims but dimension " f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) else: - raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") + raise ValueError(f"Invalid type for dims argument - got type {type(_dims)}") coords = draw(coordinate_variables(dim_sizes=dim_sizes)) else: # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both - data = draw(np_arrays()) - dim_names = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) - dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} + _data = draw(np_arrays()) + dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) + dim_sizes = {n: l for n, l in zip(dim_names, _data.shape)} coords = draw(coordinate_variables(dim_sizes=dim_sizes)) return xr.DataArray( - data=data, + data=_data, coords=coords, - name=name, + name=_name, dims=dim_names, attrs=draw(attrs), ) @@ -465,9 +470,9 @@ def dataarrays( def data_variables( draw: st.DrawFn, *, - dim_sizes: Mapping[str, int], - var_names: st.SearchStrategy[str] = names(), -) -> st.SearchStrategy[Mapping[str, xr.Variable]]: + dim_sizes: Mapping[Hashable, int], + var_names: st.SearchStrategy[Hashable] = names(), +) -> Mapping[Hashable, xr.Variable]: """ Generates dicts of alignable Variable objects for use as Dataset data variables. @@ -501,10 +506,10 @@ def data_variables( def datasets( draw: st.DrawFn, *, - data_vars: st.SearchStrategy[Mapping[str, xr.Variable]] = None, - dims: st.SearchStrategy[Union[Sequence[str], Mapping[str, int]]] = None, + data_vars: st.SearchStrategy[Mapping[Hashable, xr.Variable]] = None, + dims: st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]] = None, attrs: st.SearchStrategy[Mapping] = attrs(), -) -> st.SearchStrategy[xr.Dataset]: +) -> xr.Dataset: """ Generates arbitrary xarray.Dataset objects. @@ -538,48 +543,48 @@ def datasets( if data_vars is not None and dims is None: # no dims -> generate dims to match data - data_vars = draw(data_vars) - dim_sizes = _find_overall_sizes(data_vars) + _data_vars = draw(data_vars) + dim_sizes = _find_overall_sizes(_data_vars) # only draw coordinate variables whose names don't conflict with data variables - allowed_coord_names = names().filter(lambda n: n not in list(data_vars.keys())) + allowed_coord_names = names().filter(lambda n: n not in list(_data_vars.keys())) coords = draw( coordinate_variables(coord_names=allowed_coord_names, dim_sizes=dim_sizes) ) elif data_vars is None and dims is not None: # no data -> generate data to match dims - dims = draw(dims) - if isinstance(dims, Sequence): - dim_sizes = draw(_sizes_from_dim_names(dims)) - elif isinstance(dims, Mapping): + _dims = draw(dims) + if isinstance(_dims, Sequence): + dim_sizes = draw(_sizes_from_dim_names(_dims)) + elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} - dim_sizes = dims + dim_sizes = _dims else: - raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") + raise ValueError(f"Invalid type for dims argument - got type {type(_dims)}") coords = draw(coordinate_variables(dim_sizes=dim_sizes)) coord_names = list(coords.keys()) allowed_data_var_names = names().filter(lambda n: n not in coord_names) - data_vars = draw( + _data_vars = draw( data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) ) elif data_vars is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible - dims = draw(dims) - if isinstance(dims, Sequence): + _dims = draw(dims) + if isinstance(_dims, Sequence): # TODO support dims as list too? raise NotImplementedError() - elif isinstance(dims, Mapping): + elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} - dim_sizes = dims - data_vars = draw(data_vars) - _check_compatible_sizes(data_vars, dim_sizes) + dim_sizes = _dims + _data_vars = draw(data_vars) + _check_compatible_sizes(_data_vars, dim_sizes) else: - raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") + raise ValueError(f"Invalid type for dims argument - got type {type(_dims)}") # only draw coordinate variables whose names don't conflict with data variables - allowed_coord_names = names().filter(lambda n: n not in list(data_vars.keys())) + allowed_coord_names = names().filter(lambda n: n not in list(_data_vars.keys())) coords = draw( coordinate_variables(coord_names=allowed_coord_names, dim_sizes=dim_sizes) ) @@ -589,23 +594,24 @@ def datasets( dim_sizes = draw(dimension_sizes()) coords = draw(coordinate_variables(dim_sizes=dim_sizes)) allowed_data_var_names = names().filter(lambda n: n not in list(coords.keys())) - data_vars = draw( + _data_vars = draw( data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) ) - return xr.Dataset(data_vars=data_vars, coords=coords, attrs=draw(attrs)) + return xr.Dataset(data_vars=_data_vars, coords=coords, attrs=draw(attrs)) -def _find_overall_sizes(vars: Mapping[str, xr.Variable]) -> Mapping[str, int]: +def _find_overall_sizes(vars: Mapping[Hashable, xr.Variable]) -> Mapping[Hashable, int]: """Given a set of variables, find their common sizes.""" # TODO raise an error if inconsistent (i.e. if different values appear under same key) + # TODO narrow type by checking if values are not ints sizes_dicts = [v.sizes for v in vars.values()] dim_sizes = {d: s for dim_sizes in sizes_dicts for d, s in dim_sizes.items()} return dim_sizes def _check_compatible_sizes( - vars: Mapping[str, xr.Variable], dim_sizes: Mapping[str, int] + vars: Mapping[Hashable, xr.Variable], dim_sizes: Mapping[Hashable, int] ): """Check set of variables have sizes compatible with given dim_sizes. If not raise InvalidArgument error.""" From f81e14f0ec673e2c23f77b9d0a9150fecd404828 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 9 Sep 2022 16:19:29 -0400 Subject: [PATCH 84/95] move numpy strategies import --- doc/user-guide/testing.rst | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 6802be434c9..95fc2fceb99 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -57,6 +57,12 @@ These strategies are accessible in the :py:mod:`xarray.testing.strategies` modul testing.strategies.data_variables testing.strategies.datasets +These build upon the numpy strategies offered in :py:mod:`hypothesis.extra.numpy`: + +.. ipython:: python + + import hypothesis.extra.numpy as npst + Generating Examples ~~~~~~~~~~~~~~~~~~~ @@ -114,8 +120,6 @@ For example you could create a ``chunks`` strategy to specify particular chunkin .. code-block:: - import hypothesis.extra.numpy as npst - @st.given(st.data()) def test_something_else_inefficiently(data): arrs = npst.arrays(dtype=numeric_dtypes) # generates arrays of any shape @@ -203,7 +207,6 @@ different type: :okexcept: import sparse - import hypothesis.extra.numpy as npst .. ipython:: python :okexcept: From af24af558a9460a3aac70d8f8f2a3cd02d5bd5cc Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 9 Sep 2022 23:51:56 -0400 Subject: [PATCH 85/95] reduce sizes --- xarray/testing/strategies.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 189002ac035..9d0549751c8 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -65,7 +65,7 @@ def names() -> st.SearchStrategy[str]: Requires the hypothesis package to be installed. """ - return st.text(st.characters(), min_size=1) + return st.text(st.characters(), min_size=1, max_size=5) def dimension_names( @@ -127,7 +127,7 @@ def dimension_sizes( """ if max_side is None: - max_side = min_side + 5 + max_side = min_side + 3 return st.dictionaries( keys=dim_names, @@ -141,10 +141,10 @@ def dimension_sizes( _small_arrays = np_arrays( shape=npst.array_shapes( max_side=2, - max_dims=3, + max_dims=2, ) ) -_attr_values = st.none() | st.booleans() | st.text(st.characters()) | _small_arrays +_attr_values = st.none() | st.booleans() | st.text(st.characters(), max_size=5) | _small_arrays def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: From 9777c2afe00cd28b768610c6b9c3f9324091e080 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 10 Sep 2022 03:53:21 +0000 Subject: [PATCH 86/95] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/testing/strategies.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 9d0549751c8..ff1ef9196b6 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -144,7 +144,9 @@ def dimension_sizes( max_dims=2, ) ) -_attr_values = st.none() | st.booleans() | st.text(st.characters(), max_size=5) | _small_arrays +_attr_values = ( + st.none() | st.booleans() | st.text(st.characters(), max_size=5) | _small_arrays +) def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: From 7841dd5c87ba493651bb8e995d2a494e536e9487 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 24 Jul 2023 10:12:54 -0400 Subject: [PATCH 87/95] fix some api links in docs --- doc/user-guide/testing.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 95fc2fceb99..2905ef375ef 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -152,7 +152,7 @@ over all other aspects, then use :py:func:`hypothesis.strategies.just()`. (This is technically another example of chaining strategies - :py:func:`hypothesis.strategies.just()` is simply a special strategy that just contains a single example.) -To fix the length of dimensions you can instead pass `dims` as a mapping of dimension names to lengths +To fix the length of dimensions you can instead pass ``dims`` as a mapping of dimension names to lengths (i.e. following xarray objects' ``.sizes()`` property), e.g. .. ipython:: python @@ -187,17 +187,17 @@ Here we have used one of hypothesis' built-in strategies :py:func:`hypothesis.st strategy which generates mappings of dimension names to lengths (i.e. the ``size`` of the xarray object we want). This particular strategy will always generate an ``x`` dimension of length 2, and a ``y`` dimension of length either 3 or 4, and will sometimes also generate a ``z`` dimension of length 2. -By feeding this strategy for dictionaries into the `dims` argument of xarray's `dataarrays` strategy, we can generate -arbitrary ``DataArray`` objects whose dimensions will always match these specifications. +By feeding this strategy for dictionaries into the ``dims`` argument of xarray's :py:func:`~st.dataarrays` strategy, +we can generate arbitrary :py:class:`~xarray.DataArray` objects whose dimensions will always match these specifications. Creating Duck-type Arrays ~~~~~~~~~~~~~~~~~~~~~~~~~ Xarray objects don't have to wrap numpy arrays, in fact they can wrap any array type which presents the same API as a -numpy array (so-called "duck array wrapping", see :ref:`internals.duck_arrays`). +numpy array (so-called "duck array wrapping", see :ref:`wrapping numpy-like arrays `). -Imagine we want to write a strategy which generates arbitrary `DataArray` objects, each of which wraps a +Imagine we want to write a strategy which generates arbitrary ``DataArray`` objects, each of which wraps a :py:class:`sparse.COO` array instead of a ``numpy.ndarray``. How could we do that? There are two ways: 1. Create a xarray object with numpy data and use ``.map()`` to convert the underlying array to a From a6fc06360d165514108fa45b41557c60887320a2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Nov 2023 11:38:37 +0000 Subject: [PATCH 88/95] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/testing/__init__.py | 2 +- xarray/testing/strategies.py | 11 +++++------ xarray/tests/test_strategies.py | 2 -- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/xarray/testing/__init__.py b/xarray/testing/__init__.py index c9d1f9b1790..7f35a2b2be2 100644 --- a/xarray/testing/__init__.py +++ b/xarray/testing/__init__.py @@ -1,4 +1,4 @@ -from .testing import ( # noqa: F401 +from xarray.testing.testing import ( # noqa: F401 _assert_dataarray_invariants, _assert_dataset_invariants, _assert_indexes_invariants_checks, diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index ff1ef9196b6..cb31d46b3ed 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,5 @@ -from typing import Any, Dict, Hashable, List, Mapping, Sequence, Tuple, Union +from collections.abc import Hashable, Mapping, Sequence +from typing import Any, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -40,7 +41,7 @@ def numeric_dtypes() -> st.SearchStrategy[np.dtype]: def np_arrays( *, shape: Union[ - Tuple[int, ...], st.SearchStrategy[Tuple[int, ...]] + tuple[int, ...], st.SearchStrategy[tuple[int, ...]] ] = npst.array_shapes(max_side=4), dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = numeric_dtypes(), ) -> st.SearchStrategy[np.ndarray]: @@ -72,7 +73,7 @@ def dimension_names( *, min_dims: int = 0, max_dims: int = 3, -) -> st.SearchStrategy[List[Hashable]]: +) -> st.SearchStrategy[list[Hashable]]: """ Generates an arbitrary list of valid dimension names. @@ -320,7 +321,6 @@ def coordinate_variables( if draw( st.booleans() ): # Allow for no coordinate variables - explicit possibility not to helps with shrinking - dim_names = list(dim_sizes.keys()) # Possibly generate 1D "dimension coordinates" - explicit possibility not to helps with shrinking @@ -337,7 +337,6 @@ def coordinate_variables( # Possibly generate ND "non-dimension coordinates" - explicit possibility not to helps with shrinking if draw(st.booleans()): - # can't have same name as a dimension valid_non_dim_coord_names = coord_names.filter(lambda n: n not in dim_names) non_dim_coords = draw( @@ -352,7 +351,7 @@ def coordinate_variables( def _sizes_from_dim_names( dims: Sequence[Hashable], -) -> st.SearchStrategy[Dict[Hashable, int]]: +) -> st.SearchStrategy[dict[Hashable, int]]: size_along_dim = st.integers(min_value=1, max_value=6) return st.fixed_dictionaries({d: size_along_dim for d in dims}) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 4c628be168d..aae43587c1e 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -172,7 +172,6 @@ def test_given_fixed_sizes_and_arbitrary_data(self, data): class TestCoordinateVariablesStrategy: @given(coordinate_variables(dim_sizes={"x": 2, "y": 3})) def test_alignable(self, coord_vars): - # TODO there must be a better way of checking align-ability than this for v in coord_vars.values(): if "x" in v.dims: @@ -320,7 +319,6 @@ def test_given_dims(self, data): @given(st.data()) def test_given_data_and_dims(self, data): - # pass dims as mapping dim_sizes = {"x": 3, "y": 4} data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) From 0b13771f7ee190b23a2dfd80a53415bcc234ea01 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 1 Apr 2024 10:54:06 -0400 Subject: [PATCH 89/95] remove np_arrays strategy --- xarray/testing/strategies.py | 14 +++++++------- xarray/tests/test_strategies.py | 27 --------------------------- 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 2c776b4b7ad..94a6b933dcf 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -468,7 +468,7 @@ def _alignable_variables( Generates dicts of names mapping to variables with compatible (i.e. alignable) dimensions and sizes. """ - alignable_dim_sizes = draw(_unique_subset_of(dim_sizes)) if dim_sizes else {} + alignable_dim_sizes = draw(unique_subset_of(dim_sizes)) if dim_sizes else {} vars = variables(dims=st.just(alignable_dim_sizes)) # TODO don't hard code max number of variables @@ -508,12 +508,12 @@ def coordinate_variables( # Possibly generate 1D "dimension coordinates" - explicit possibility not to helps with shrinking if len(dim_names) > 0 and draw(st.booleans()): # first generate subset of dimension names - these set which dimension coords will be included - dim_coord_names_and_lengths = draw(_unique_subset_of(dim_sizes)) + dim_coord_names_and_lengths = draw(unique_subset_of(dim_sizes)) # then generate 1D variables for each name dim_coords = { - n: draw(variables(dims=st.just({n: l}))) - for n, l in dim_coord_names_and_lengths.items() + n: draw(variables(dims=st.just({n: length}))) + for n, length in dim_coord_names_and_lengths.items() } all_coords.update(dim_coords) @@ -586,7 +586,7 @@ def dataarrays( _data = draw(data) dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) dim_sizes: Mapping[Hashable, int] = { - n: l for n, l in zip(dim_names, _data.shape) + n: length for n, length in zip(dim_names, _data.shape) } coords = draw(coordinate_variables(dim_sizes=dim_sizes)) @@ -617,7 +617,7 @@ def dataarrays( "unique dimension names. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) - dim_sizes = {n: l for n, l in zip(_dims, _data.shape)} + dim_sizes = {n: length for n, length in zip(_dims, _data.shape)} elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} dim_sizes = _dims @@ -637,7 +637,7 @@ def dataarrays( # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both _data = draw(np_arrays()) dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) - dim_sizes = {n: l for n, l in zip(dim_names, _data.shape)} + dim_sizes = {n: length for n, length in zip(dim_names, _data.shape)} coords = draw(coordinate_variables(dim_sizes=dim_sizes)) return xr.DataArray( diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index aa7e932cf65..cbb00f9aa53 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -23,7 +23,6 @@ datasets, dimension_names, dimension_sizes, - np_arrays, numeric_dtypes, supported_dtypes, unique_subset_of, @@ -34,32 +33,6 @@ ALLOWED_ATTRS_VALUES_TYPES = (int, bool, str, np.ndarray) -class TestNumpyArraysStrategy: - @given(np_arrays()) - def test_given_nothing(self, arr): - assert isinstance(arr, np.ndarray) - - @given(np_arrays(dtype=np.dtype("int32"))) - def test_fixed_dtype(self, arr): - assert arr.dtype == np.dtype("int32") - - @given(st.data()) - def test_arbitrary_valid_dtype(self, data): - valid_dtype = data.draw(numeric_dtypes()) - arr = data.draw(np_arrays(dtype=valid_dtype)) - assert arr.dtype == valid_dtype - - @given(np_arrays(shape=(2, 3))) - def test_fixed_shape(self, arr): - assert arr.shape == (2, 3) - - @given(st.data()) - def test_arbitrary_shape(self, data): - shape = data.draw(npst.array_shapes()) - arr = data.draw(np_arrays(shape=shape)) - assert arr.shape == shape - - class TestDimensionNamesStrategy: @given(dimension_names()) def test_types(self, dims): From b44a4a2ab6b42b404cd0e6b945af5d1431dbabeb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 14:55:02 +0000 Subject: [PATCH 90/95] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_strategies.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index cbb00f9aa53..bb57bbd21d3 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -23,7 +23,6 @@ datasets, dimension_names, dimension_sizes, - numeric_dtypes, supported_dtypes, unique_subset_of, variables, From cdcfbf4e4004a3a77cefe34318f6c893a5cc6105 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 1 Apr 2024 12:00:33 -0400 Subject: [PATCH 91/95] fix bad merge of whatsnew --- doc/whats-new.rst | 9 --------- 1 file changed, 9 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c26cfc3d756..542856081c7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -450,15 +450,6 @@ New Features Accessible under :py:func:`testing.strategies`, and documented in a new page on testing in the User Guide. (:issue:`6911`, :pull:`6908`) By `Tom Nicholas `_. -- Enable taking the mean of dask-backed :py:class:`cftime.datetime` arrays - (:pull:`6556`, :pull:`6940`). By `Deepak Cherian - `_ and `Spencer Clark - `_. -- Allow creating Xarray objects where a multidimensional variable shares its name - with a dimension. Examples include output from finite volume models like FVCOM. - (:issue:`2233`, :pull:`7989`) - By `Deepak Cherian `_ and `Benoit Bovy `_. - - Use `opt_einsum `_ for :py:func:`xarray.dot` by default if installed. By `Deepak Cherian `_. (:issue:`7764`, :pull:`8373`). - Add ``DataArray.dt.total_seconds()`` method to match the Pandas API. (:pull:`8435`). From 0aab116ce9c90a4bd6d8b75be04c03c6e6cc80f3 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 1 Apr 2024 12:01:18 -0400 Subject: [PATCH 92/95] fix bad merge in strategies --- xarray/testing/strategies.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 94a6b933dcf..7899349c245 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -34,15 +34,6 @@ ] -# required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. -def numeric_dtypes() -> st.SearchStrategy[np.dtype]: - """ - Generates only those numpy dtypes which xarray can handle. - - Requires the hypothesis package to be installed. - """ - - class ArrayStrategyFn(Protocol[T_DuckArray]): def __call__( self, From 525a4b64e78f8cfa4809eec986fef181eb4b78b6 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 22 Aug 2024 07:56:54 -0600 Subject: [PATCH 93/95] Update xarray/testing/strategies.py Co-authored-by: Justus Magin --- xarray/testing/strategies.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index ee5588d7080..a0e59599b27 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -518,9 +518,8 @@ def coordinate_variables( all_coords = {} - if draw( - st.booleans() - ): # Allow for no coordinate variables - explicit possibility not to helps with shrinking + # Allow for no coordinate variables - explicit possibility not to helps with shrinking + if draw(st.booleans()): dim_names = list(dim_sizes.keys()) # Possibly generate 1D "dimension coordinates" - explicit possibility not to helps with shrinking From b343f4f1c5d57b31364ec5c3b76ad3a7673baf02 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 22 Aug 2024 07:59:19 -0600 Subject: [PATCH 94/95] one more --- xarray/testing/strategies.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index a0e59599b27..b15f9a3225a 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -688,9 +688,8 @@ def data_variables( var_names: Strategy generating strings Allowed names for data variables. Needed to avoid conflict with names of coordinate variables & dimensions. """ - if draw( - st.booleans() - ): # Allow for no coordinate variables - explicit possibility not to helps with shrinking + # Allow for no coordinate variables - explicit possibility not to helps with shrinking + if draw(st.booleans()): dim_names = list(dim_sizes.keys()) # can't have same name as a dimension From e6d8e645da88d15df4824e112b0e85e8a83a7d5d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 22 Aug 2024 08:02:56 -0600 Subject: [PATCH 95/95] No implicit Optional --- xarray/testing/strategies.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index b15f9a3225a..1f7b3e7a819 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Hashable, Iterable, Mapping, Sequence -from typing import TYPE_CHECKING, Any, Protocol, Union, overload +from typing import TYPE_CHECKING, Any, Optional, Protocol, Union, overload try: import hypothesis.strategies as st @@ -559,8 +559,10 @@ def _sizes_from_dim_names( def dataarrays( draw: st.DrawFn, *, - data: st.SearchStrategy[T_DuckArray] = None, - dims: st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]] = None, + data: Optional[st.SearchStrategy[T_DuckArray]] = None, + dims: Optional[ + st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]] + ] = None, name: st.SearchStrategy[Union[Hashable, None]] = names(), attrs: st.SearchStrategy[Mapping] = attrs(), ) -> xr.DataArray: @@ -708,8 +710,10 @@ def data_variables( def datasets( draw: st.DrawFn, *, - data_vars: st.SearchStrategy[Mapping[Hashable, xr.Variable]] = None, - dims: st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]] = None, + data_vars: Optional[st.SearchStrategy[Mapping[Hashable, xr.Variable]]] = None, + dims: Optional[ + st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]] + ] = None, attrs: st.SearchStrategy[Mapping] = attrs(), ) -> xr.Dataset: """