From 51a90c4e11f838d9bf21df0ae5998bf0db05503a Mon Sep 17 00:00:00 2001 From: Matthieu Ancellin Date: Fri, 13 Mar 2020 15:06:51 +0100 Subject: [PATCH 1/4] Fix bug for multi-index with categorical values. See issue #3674. --- xarray/core/indexes.py | 2 ++ xarray/tests/test_dataset.py | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 06bf08cefd2..dea1767d50c 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -22,6 +22,8 @@ def remove_unused_levels_categories(index): for i, level in enumerate(index.levels): if isinstance(level, pd.CategoricalIndex): level = level[index.codes[i]].remove_unused_categories() + else: + level = level[index.codes[i]] levels.append(level) index = pd.MultiIndex.from_arrays(levels, names=index.names) elif isinstance(index, pd.CategoricalIndex): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d2e8c6b7609..b1423da3c10 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1458,6 +1458,16 @@ def test_categorical_reindex(self): actual = ds.reindex(cat=["foo"])["cat"].values assert (actual == np.array(["foo"])).all() + def test_categorical_multiindex(self): + i1 = pd.Series([0, 0]) + cat = pd.CategoricalDtype(categories=['foo', 'baz', 'bar']) + i2 = pd.Series(['baz', 'bar'], dtype=cat) + + df = pd.DataFrame({'i1': i1, 'i2': i2, 'values': [1, 2]}).set_index(['i1', 'i2']) + actual = df.to_xarray() + print(actual) + assert actual['values'].shape == (1, 2) + def test_sel_drop(self): data = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]}) expected = Dataset({"foo": 1}) From d5f2392db2e46d484543652996e5e45f6126dcc9 Mon Sep 17 00:00:00 2001 From: Matthieu Ancellin Date: Fri, 13 Mar 2020 15:17:38 +0100 Subject: [PATCH 2/4] Blacked. --- xarray/tests/test_dataset.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index b1423da3c10..c5b078e1cb6 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1460,13 +1460,15 @@ def test_categorical_reindex(self): def test_categorical_multiindex(self): i1 = pd.Series([0, 0]) - cat = pd.CategoricalDtype(categories=['foo', 'baz', 'bar']) - i2 = pd.Series(['baz', 'bar'], dtype=cat) + cat = pd.CategoricalDtype(categories=["foo", "baz", "bar"]) + i2 = pd.Series(["baz", "bar"], dtype=cat) - df = pd.DataFrame({'i1': i1, 'i2': i2, 'values': [1, 2]}).set_index(['i1', 'i2']) + df = pd.DataFrame({"i1": i1, "i2": i2, "values": [1, 2]}).set_index( + ["i1", "i2"] + ) actual = df.to_xarray() print(actual) - assert actual['values'].shape == (1, 2) + assert actual["values"].shape == (1, 2) def test_sel_drop(self): data = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]}) From 035add0c31c83489df497f7a631e66fa3c51b811 Mon Sep 17 00:00:00 2001 From: Matthieu Ancellin Date: Fri, 13 Mar 2020 15:17:58 +0100 Subject: [PATCH 3/4] Add line in whats-new.rst. --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 80309dc4673..6c919bc55b3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -55,6 +55,8 @@ New Features Bug fixes ~~~~~~~~~ +- Fix use of multi-index with categorical values (:issue:`3674`). + By `Matthieu Ancellin `_. - Fix :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` producing index with name reflecting the previous dimension name instead of the new one (:issue:`3748`, :pull:`3752`). By `Joseph K Aicher From b211654e5a324fcb366e4bf93701337d9a11535a Mon Sep 17 00:00:00 2001 From: Matthieu Ancellin Date: Fri, 13 Mar 2020 15:44:55 +0100 Subject: [PATCH 4/4] Remove forgotten print. --- xarray/tests/test_dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c5b078e1cb6..6a6c496591a 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1467,7 +1467,6 @@ def test_categorical_multiindex(self): ["i1", "i2"] ) actual = df.to_xarray() - print(actual) assert actual["values"].shape == (1, 2) def test_sel_drop(self):