Skip to content

allow incomplete hypercubes in combine_by_coords #3649

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ New Features

Bug fixes
~~~~~~~~~
- Fix :py:meth:`xarray.combine_by_coords` to allow for combining incomplete
hypercubes of Datasets (:issue:`3648`). By `Ian Bolliger
<https://github.com/bolliger32>`_.
- Fix :py:meth:`xarray.combine_by_coords` when combining cftime coordinates
which span long time intervals (:issue:`3535`). By `Spencer Clark
<https://github.com/spencerkclark>`_.
Expand Down
36 changes: 29 additions & 7 deletions xarray/core/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,12 @@ def _infer_concat_order_from_coords(datasets):
return combined_ids, concat_dims


def _check_shape_tile_ids(combined_tile_ids):
def _check_dimension_depth_tile_ids(combined_tile_ids):
"""
Check all tuples are the same length, i.e. check that all lists are
nested to the same depth.
"""
tile_ids = combined_tile_ids.keys()

# Check all tuples are the same length
# i.e. check that all lists are nested to the same depth
nesting_depths = [len(tile_id) for tile_id in tile_ids]
if not nesting_depths:
nesting_depths = [0]
Expand All @@ -128,8 +129,13 @@ def _check_shape_tile_ids(combined_tile_ids):
"The supplied objects do not form a hypercube because"
" sub-lists do not have consistent depths"
)
# return these just to be reused in _check_shape_tile_ids
return tile_ids, nesting_depths

# Check all lists along one dimension are same length

def _check_shape_tile_ids(combined_tile_ids):
"""Check all lists along one dimension are same length."""
tile_ids, nesting_depths = _check_dimension_depth_tile_ids(combined_tile_ids)
for dim in range(nesting_depths[0]):
indices_along_dim = [tile_id[dim] for tile_id in tile_ids]
occurrences = Counter(indices_along_dim)
Expand Down Expand Up @@ -536,7 +542,8 @@ def combine_by_coords(
coords : {'minimal', 'different', 'all' or list of str}, optional
As per the 'data_vars' kwarg, but for coordinate variables.
fill_value : scalar, optional
Value to use for newly missing values
Value to use for newly missing values. If None, raises a ValueError if
the passed Datasets do not create a complete hypercube.
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
String indicating how to combine differing indexes
(excluding concat_dim) in objects
Expand Down Expand Up @@ -653,6 +660,15 @@ def combine_by_coords(
temperature (y, x) float64 1.654 10.63 7.015 2.543 ... 12.46 2.22 15.96
precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953

>>> xr.combine_by_coords([x1, x2, x3])
<xarray.Dataset>
Dimensions: (x: 6, y: 4)
Coordinates:
* x (x) int64 10 20 30 40 50 60
* y (y) int64 0 1 2 3
Data variables:
temperature (y, x) float64 1.654 10.63 7.015 nan ... 12.46 2.22 15.96
precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953
"""

# Group by data vars
Expand All @@ -667,7 +683,13 @@ def combine_by_coords(
list(datasets_with_same_vars)
)

_check_shape_tile_ids(combined_ids)
if fill_value is None:
# check that datasets form complete hypercube
_check_shape_tile_ids(combined_ids)
else:
# check only that all datasets have same dimension depth for these
# vars
_check_dimension_depth_tile_ids(combined_ids)

# Concatenate along all of concat_dims one by one to create single ds
concatenated = _combine_nd(
Expand Down
16 changes: 16 additions & 0 deletions xarray/tests/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,22 @@ def test_check_for_impossible_ordering(self):
):
combine_by_coords([ds1, ds0])

def test_combine_by_coords_incomplete_hypercube(self):
# test that this succeeds with default fill_value
x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]})
x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]})
x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]})
actual = combine_by_coords([x1, x2, x3])
expected = Dataset(
{"a": (("y", "x"), [[1, 1], [1, np.nan]])},
coords={"y": [0, 1], "x": [0, 1]},
)
assert_identical(expected, actual)

# test that this fails if fill_value is None
with pytest.raises(ValueError):
combine_by_coords([x1, x2, x3], fill_value=None)


@pytest.mark.filterwarnings(
"ignore:In xarray version 0.15 `auto_combine` " "will be deprecated"
Expand Down