From f329057f39f4408175afcb409797548fbc3cfefc Mon Sep 17 00:00:00 2001 From: Ian Bolliger Date: Thu, 19 Dec 2019 21:55:24 +0000 Subject: [PATCH 1/6] allow incomplete hypercubes --- xarray/core/combine.py | 16 ++++++++++------ xarray/tests/test_combine.py | 12 ++++++++++++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 65087b05cc0..0b361278e34 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -115,11 +115,10 @@ def _infer_concat_order_from_coords(datasets): return combined_ids, concat_dims -def _check_shape_tile_ids(combined_tile_ids): +def _check_dimension_depth_tile_ids(combined_tile_ids): + """Check all tuples are the same length + # i.e. check that all lists are nested to the same depth""" tile_ids = combined_tile_ids.keys() - - # Check all tuples are the same length - # i.e. check that all lists are nested to the same depth nesting_depths = [len(tile_id) for tile_id in tile_ids] if not nesting_depths: nesting_depths = [0] @@ -128,8 +127,13 @@ def _check_shape_tile_ids(combined_tile_ids): "The supplied objects do not form a hypercube because" " sub-lists do not have consistent depths" ) + # return these just to be reused in _check_shape_tile_ids + return tile_ids, nesting_depths - # Check all lists along one dimension are same length + +def _check_shape_tile_ids(combined_tile_ids): + """Check all lists along one dimension are same length.""" + tile_ids, nesting_depths = _check_dimension_depth_tile_ids(combined_tile_ids) for dim in range(nesting_depths[0]): indices_along_dim = [tile_id[dim] for tile_id in tile_ids] occurrences = Counter(indices_along_dim) @@ -667,7 +671,7 @@ def combine_by_coords( list(datasets_with_same_vars) ) - _check_shape_tile_ids(combined_ids) + _check_dimension_depth_tile_ids(combined_ids) # Concatenate along all of concat_dims one by one to create single ds concatenated = _combine_nd( diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index a29fe0190cf..94868719683 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -711,6 +711,18 @@ def test_check_for_impossible_ordering(self): ): combine_by_coords([ds1, ds0]) + def test_combine_by_coords_incomplete_hypercube(self): + # Want to check that this will return + x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]}) + x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]}) + x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]}) + actual = combine_by_coords([x1, x2, x3]) + expected = Dataset( + {"a": (("y", "x"), [[1, 1], [1, np.nan]])}, + coords={"y": [0, 1], "x": [0, 1]}, + ) + assert_identical(expected, actual) + @pytest.mark.filterwarnings( "ignore:In xarray version 0.15 `auto_combine` " "will be deprecated" From 04b8aa33b8dd09f481e5529c10e3d1834e2db4bb Mon Sep 17 00:00:00 2001 From: Ian Bolliger Date: Thu, 19 Dec 2019 21:57:09 +0000 Subject: [PATCH 2/6] reformat docstring --- xarray/core/combine.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 0b361278e34..a2cd37f7612 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -116,8 +116,10 @@ def _infer_concat_order_from_coords(datasets): def _check_dimension_depth_tile_ids(combined_tile_ids): - """Check all tuples are the same length - # i.e. check that all lists are nested to the same depth""" + """ + Check all tuples are the same length, i.e. check that all lists are + nested to the same depth. + """ tile_ids = combined_tile_ids.keys() nesting_depths = [len(tile_id) for tile_id in tile_ids] if not nesting_depths: From a7231073a79e106e161be4c5732ecf53c3803bf3 Mon Sep 17 00:00:00 2001 From: Ian Bolliger Date: Thu, 19 Dec 2019 22:00:45 +0000 Subject: [PATCH 3/6] trailing whitespace --- xarray/core/combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index a2cd37f7612..1ddfd7463fa 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -117,7 +117,7 @@ def _infer_concat_order_from_coords(datasets): def _check_dimension_depth_tile_ids(combined_tile_ids): """ - Check all tuples are the same length, i.e. check that all lists are + Check all tuples are the same length, i.e. check that all lists are nested to the same depth. """ tile_ids = combined_tile_ids.keys() From fb5707181c42b507d6ae0c04b7f65b2ab2c8158d Mon Sep 17 00:00:00 2001 From: Ian Bolliger Date: Thu, 19 Dec 2019 22:07:39 +0000 Subject: [PATCH 4/6] add to combine_by_coords docstring --- xarray/core/combine.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 1ddfd7463fa..ecdc4e16198 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -659,6 +659,15 @@ def combine_by_coords( temperature (y, x) float64 1.654 10.63 7.015 2.543 ... 12.46 2.22 15.96 precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953 + >>> xr.combine_by_coords([x1, x2, x3]) + + Dimensions: (x: 6, y: 4) + Coordinates: + * x (x) int64 10 20 30 40 50 60 + * y (y) int64 0 1 2 3 + Data variables: + temperature (y, x) float64 1.654 10.63 7.015 nan ... 12.46 2.22 15.96 + precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953 """ # Group by data vars From 3f9fd03def7d2b9f76df6e6a307ab16289603174 Mon Sep 17 00:00:00 2001 From: Ian Bolliger Date: Thu, 19 Dec 2019 22:07:48 +0000 Subject: [PATCH 5/6] whats new --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fe05a4d2c21..4bf1e6fb860 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,6 +38,9 @@ New Features Bug fixes ~~~~~~~~~ +- Fix :py:meth:`xarray.combine_by_coords` to allow for combining incomplete + hypercubes of Datasets (:issue:`3648`). By `Ian Bolliger + `_. - Fix :py:meth:`xarray.combine_by_coords` when combining cftime coordinates which span long time intervals (:issue:`3535`). By `Spencer Clark `_. From f5fe56e03980ecd95cd78e516db4b4de4799ec02 Mon Sep 17 00:00:00 2001 From: Ian Bolliger Date: Sat, 21 Dec 2019 02:03:04 +0000 Subject: [PATCH 6/6] change default fill_value --- xarray/core/combine.py | 11 +++++++++-- xarray/tests/test_combine.py | 6 +++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index ecdc4e16198..3f6e0e79351 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -542,7 +542,8 @@ def combine_by_coords( coords : {'minimal', 'different', 'all' or list of str}, optional As per the 'data_vars' kwarg, but for coordinate variables. fill_value : scalar, optional - Value to use for newly missing values + Value to use for newly missing values. If None, raises a ValueError if + the passed Datasets do not create a complete hypercube. join : {'outer', 'inner', 'left', 'right', 'exact'}, optional String indicating how to combine differing indexes (excluding concat_dim) in objects @@ -682,7 +683,13 @@ def combine_by_coords( list(datasets_with_same_vars) ) - _check_dimension_depth_tile_ids(combined_ids) + if fill_value is None: + # check that datasets form complete hypercube + _check_shape_tile_ids(combined_ids) + else: + # check only that all datasets have same dimension depth for these + # vars + _check_dimension_depth_tile_ids(combined_ids) # Concatenate along all of concat_dims one by one to create single ds concatenated = _combine_nd( diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 94868719683..d907e1c5e46 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -712,7 +712,7 @@ def test_check_for_impossible_ordering(self): combine_by_coords([ds1, ds0]) def test_combine_by_coords_incomplete_hypercube(self): - # Want to check that this will return + # test that this succeeds with default fill_value x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]}) x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]}) x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]}) @@ -723,6 +723,10 @@ def test_combine_by_coords_incomplete_hypercube(self): ) assert_identical(expected, actual) + # test that this fails if fill_value is None + with pytest.raises(ValueError): + combine_by_coords([x1, x2, x3], fill_value=None) + @pytest.mark.filterwarnings( "ignore:In xarray version 0.15 `auto_combine` " "will be deprecated"