From 6a2758e76ad22beeac1e0643820e4cd6ffefa241 Mon Sep 17 00:00:00 2001 From: Lukas Bindreiter Date: Mon, 10 Oct 2022 15:01:33 +0200 Subject: [PATCH 1/9] Update open_dataset backend to ensure compatibility with new explicit index model --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 3e34af08e27..39d147c52ee 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -234,7 +234,7 @@ def _get_mtime(filename_or_obj): def _protect_dataset_variables_inplace(dataset, cache): for name, variable in dataset.variables.items(): - if name not in variable.dims: + if name not in dataset.indexes: # no need to protect IndexVariable objects data = indexing.CopyOnWriteArray(variable._data) if cache: From 3c87f5d7ea6048916db73f954d869236a72a28b2 Mon Sep 17 00:00:00 2001 From: Lukas Bindreiter Date: Tue, 11 Oct 2022 14:14:13 +0200 Subject: [PATCH 2/9] Avoid generation of Indexes object --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 39d147c52ee..13bcf046ac3 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -234,7 +234,7 @@ def _get_mtime(filename_or_obj): def _protect_dataset_variables_inplace(dataset, cache): for name, variable in dataset.variables.items(): - if name not in dataset.indexes: + if name not in dataset._indexes: # no need to protect IndexVariable objects data = indexing.CopyOnWriteArray(variable._data) if cache: From 7695d0fe544942154227bd50c285310744d7e14d Mon Sep 17 00:00:00 2001 From: Lukas Bindreiter Date: Tue, 11 Oct 2022 14:23:45 +0200 Subject: [PATCH 3/9] Add test ensuring backend compatibility with multiindices --- xarray/tests/test_backends_api.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py index e14234bcaf9..496ce8ba466 100644 --- a/xarray/tests/test_backends_api.py +++ b/xarray/tests/test_backends_api.py @@ -3,6 +3,7 @@ from numbers import Number import numpy as np +import pandas as pd import pytest import xarray as xr @@ -48,6 +49,30 @@ def open_dataset( assert_identical(expected, actual) +def test_multiindex() -> None: + def _create_multiindex(**kwargs): + return pd.MultiIndex.from_arrays(list(kwargs.values()), names=kwargs.keys()) + + dataset = xr.Dataset() + dataset.coords["coord1"] = ["A", "B"] + dataset.coords["coord2"] = [1, 2] + dataset["multi_index"] = _create_multiindex( + coord1=["A", "A", "B", "B"], coord2=[1, 2, 1, 2] + ) + + class MultiindexBackend(xr.backends.BackendEntrypoint): + def open_dataset( + self, + filename_or_obj, + drop_variables=None, + **kwargs, + ) -> xr.Dataset: + return dataset.copy(deep=True) + + loaded = xr.open_dataset("fake_filename", engine=MultiindexBackend) + assert_identical(dataset, loaded) + + class PassThroughBackendEntrypoint(xr.backends.BackendEntrypoint): """Access an object passed to the `open_dataset` method.""" From 1ff1038c305b7a184e7cbae850b7c7c3cae64880 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 11 Oct 2022 10:18:35 -0600 Subject: [PATCH 4/9] Update xarray/tests/test_backends_api.py --- xarray/tests/test_backends_api.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py index 496ce8ba466..98bdcc63219 100644 --- a/xarray/tests/test_backends_api.py +++ b/xarray/tests/test_backends_api.py @@ -50,6 +50,8 @@ def open_dataset( def test_multiindex() -> None: + # GH7139 + # Check that we properly handle backends that change index variables def _create_multiindex(**kwargs): return pd.MultiIndex.from_arrays(list(kwargs.values()), names=kwargs.keys()) From aed8cb7bd41322e3571da878cc7ad5ba0c5d6c65 Mon Sep 17 00:00:00 2001 From: Lukas Bindreiter Date: Wed, 12 Oct 2022 16:31:04 +0200 Subject: [PATCH 5/9] Use stack to construct multi index in test --- xarray/tests/test_backends_api.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py index 98bdcc63219..271ed55c84d 100644 --- a/xarray/tests/test_backends_api.py +++ b/xarray/tests/test_backends_api.py @@ -55,12 +55,8 @@ def test_multiindex() -> None: def _create_multiindex(**kwargs): return pd.MultiIndex.from_arrays(list(kwargs.values()), names=kwargs.keys()) - dataset = xr.Dataset() - dataset.coords["coord1"] = ["A", "B"] - dataset.coords["coord2"] = [1, 2] - dataset["multi_index"] = _create_multiindex( - coord1=["A", "A", "B", "B"], coord2=[1, 2, 1, 2] - ) + dataset = xr.Dataset(coords={"coord1": ["A", "B"], "coord2": [1, 2]}) + dataset = dataset.stack(z=["coord1", "coord2"]) class MultiindexBackend(xr.backends.BackendEntrypoint): def open_dataset( From 11d7b1824c7f281e4093d410b4a43dda27d6d6c8 Mon Sep 17 00:00:00 2001 From: Lukas Bindreiter Date: Wed, 12 Oct 2022 16:37:23 +0200 Subject: [PATCH 6/9] Mention open_dataset backend multi-index compatibility in whats-new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 15da1221c90..0e2e5231b65 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,6 +47,8 @@ Bug fixes :py:meth:`DataArray.to_index` for multi-index levels (convert to single index). (:issue:`6836`, :pull:`7105`) By `Benoît Bovy `_. +- Support for open_dataset backends that return datasets containing multi-indexes (:issue:`7139`, :pull:`7150`) + By `Lukas Bindreiter ` Documentation ~~~~~~~~~~~~~ From 7818cfd60236fd58114e79f83de83ad2204d65cf Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 12 Oct 2022 16:54:17 +0200 Subject: [PATCH 7/9] remove _create_multiindex utility function --- xarray/tests/test_backends_api.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py index 271ed55c84d..7c59a18f48f 100644 --- a/xarray/tests/test_backends_api.py +++ b/xarray/tests/test_backends_api.py @@ -52,9 +52,6 @@ def open_dataset( def test_multiindex() -> None: # GH7139 # Check that we properly handle backends that change index variables - def _create_multiindex(**kwargs): - return pd.MultiIndex.from_arrays(list(kwargs.values()), names=kwargs.keys()) - dataset = xr.Dataset(coords={"coord1": ["A", "B"], "coord2": [1, 2]}) dataset = dataset.stack(z=["coord1", "coord2"]) From 739dab9487609559c7b11cde7fd74c88af84e5d5 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 12 Oct 2022 17:13:56 +0200 Subject: [PATCH 8/9] remove pandas import --- xarray/tests/test_backends_api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py index 7c59a18f48f..efff86d7683 100644 --- a/xarray/tests/test_backends_api.py +++ b/xarray/tests/test_backends_api.py @@ -3,7 +3,6 @@ from numbers import Number import numpy as np -import pandas as pd import pytest import xarray as xr From ae850fa98a5267b82525fa43573179f44e491e41 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 12 Oct 2022 18:27:06 +0200 Subject: [PATCH 9/9] Update doc/whats-new.rst --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0e2e5231b65..15d9210c3a6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -48,7 +48,7 @@ Bug fixes (:issue:`6836`, :pull:`7105`) By `Benoît Bovy `_. - Support for open_dataset backends that return datasets containing multi-indexes (:issue:`7139`, :pull:`7150`) - By `Lukas Bindreiter ` + By `Lukas Bindreiter `_. Documentation ~~~~~~~~~~~~~