From 762c961faa733e8aca41debf6485dccf80436fe1 Mon Sep 17 00:00:00 2001
From: Ghislain Picard <ghislain.picard@univ-grenoble-alpes.fr>
Date: Sun, 20 Sep 2020 22:40:19 +0200
Subject: [PATCH 1/8] Accept coordinates with MultiIndex (solve issue #3008)

---
 xarray/core/coordinates.py | 43 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py
index 846e4044a2c..7bc7fec4695 100644
--- a/xarray/core/coordinates.py
+++ b/xarray/core/coordinates.py
@@ -13,6 +13,7 @@
     cast,
 )
 
+import numpy as np
 import pandas as pd
 
 from . import formatting, indexing
@@ -106,9 +107,47 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index:
             (dim,) = ordered_dims
             return self._data.get_index(dim)  # type: ignore
         else:
+            from pandas.core.arrays.categorical import factorize_from_iterable
+
             indexes = [self._data.get_index(k) for k in ordered_dims]  # type: ignore
-            names = list(ordered_dims)
-            return pd.MultiIndex.from_product(indexes, names=names)
+
+            # compute the sizes of the repeat and tile for the cartesian product
+            # (taken from pandas.core.reshape.util)
+            lenX = np.fromiter((len(index) for index in indexes), dtype=np.intp)
+            cumprodX = np.cumproduct(lenX)
+
+            if cumprodX[-1] != 0:
+                # sizes of the repeats
+                b = cumprodX[-1] / cumprodX
+            else:
+                # if any factor is empty, the cartesian product is empty
+                b = np.zeros_like(cumprodX)
+
+            # sizes of the tiles
+            a = np.roll(cumprodX, 1)
+            a[0] = 1
+
+            # loop over the indexes
+            # for each MultiIndex or Index compute the cartesian product of the codes
+
+            code_list = []
+            level_list = []
+            names = []
+
+            for i, index in enumerate(indexes):
+                if isinstance(index, pd.MultiIndex):
+                    codes, levels = index.codes, index.levels
+                else:
+                    code, level = factorize_from_iterable(index)
+                    codes = [code]
+                    levels = [level]
+                    
+                # compute the cartesian product
+                code_list += [np.tile(np.repeat(code, b[i]), a[i]) for code in codes]
+                level_list += levels
+                names += index.names
+
+            return pd.MultiIndex(level_list, code_list, names=names)
 
     def update(self, other: Mapping[Hashable, Any]) -> None:
         other_vars = getattr(other, "variables", other)

From 1472c92ae7b238b34927b9b2d721a806aa5f2b15 Mon Sep 17 00:00:00 2001
From: Ghislain Picard <ghislain.picard@univ-grenoble-alpes.fr>
Date: Sun, 20 Sep 2020 22:43:08 +0200
Subject: [PATCH 2/8] formatting

---
 xarray/core/coordinates.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py
index 7bc7fec4695..c5cf5558f25 100644
--- a/xarray/core/coordinates.py
+++ b/xarray/core/coordinates.py
@@ -141,7 +141,7 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index:
                     code, level = factorize_from_iterable(index)
                     codes = [code]
                     levels = [level]
-                    
+
                 # compute the cartesian product
                 code_list += [np.tile(np.repeat(code, b[i]), a[i]) for code in codes]
                 level_list += levels

From 31a698a2b7932c54872ba87fa28ef8d3b8990e2b Mon Sep 17 00:00:00 2001
From: Ghislain Picard <ghislain.picard@univ-grenoble-alpes.fr>
Date: Mon, 21 Sep 2020 21:02:23 +0200
Subject: [PATCH 3/8] change to pd.factorize and improve variable names

---
 xarray/core/coordinates.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py
index c5cf5558f25..5c27cdb6e7b 100644
--- a/xarray/core/coordinates.py
+++ b/xarray/core/coordinates.py
@@ -107,8 +107,6 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index:
             (dim,) = ordered_dims
             return self._data.get_index(dim)  # type: ignore
         else:
-            from pandas.core.arrays.categorical import factorize_from_iterable
-
             indexes = [self._data.get_index(k) for k in ordered_dims]  # type: ignore
 
             # compute the sizes of the repeat and tile for the cartesian product
@@ -118,14 +116,14 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index:
 
             if cumprodX[-1] != 0:
                 # sizes of the repeats
-                b = cumprodX[-1] / cumprodX
+                repeat_counts = cumprodX[-1] / cumprodX
             else:
                 # if any factor is empty, the cartesian product is empty
-                b = np.zeros_like(cumprodX)
+                repeat_counts = np.zeros_like(cumprodX)
 
             # sizes of the tiles
-            a = np.roll(cumprodX, 1)
-            a[0] = 1
+            tile_counts = np.roll(cumprodX, 1)
+            tile_counts[0] = 1
 
             # loop over the indexes
             # for each MultiIndex or Index compute the cartesian product of the codes
@@ -138,12 +136,12 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index:
                 if isinstance(index, pd.MultiIndex):
                     codes, levels = index.codes, index.levels
                 else:
-                    code, level = factorize_from_iterable(index)
+                    code, level = pd.factorize(index)
                     codes = [code]
                     levels = [level]
 
                 # compute the cartesian product
-                code_list += [np.tile(np.repeat(code, b[i]), a[i]) for code in codes]
+                code_list += [np.tile(np.repeat(code, repeat_counts[i]), tile_counts[i]) for code in codes]
                 level_list += levels
                 names += index.names
 

From 4ea0b5337af7bb720b442833896a3b342a38c3aa Mon Sep 17 00:00:00 2001
From: Ghislain Picard <ghislain.picard@univ-grenoble-alpes.fr>
Date: Mon, 21 Sep 2020 21:02:45 +0200
Subject: [PATCH 4/8] add a test for multiindex

---
 xarray/tests/test_dataarray.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 5e0fe13ea52..08d7e3ccdd4 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -3520,6 +3520,21 @@ def test_to_dataframe(self):
         with raises_regex(ValueError, "unnamed"):
             arr.to_dataframe()
 
+    def test_to_dataframe_multiindex(self):
+        # regression test for #3008
+        arr_np = np.random.randn(4, 3)
+     
+        mindex = pd.MultiIndex.from_product([[1, 2], list('ab')], names=['A', 'B'])
+
+        arr = DataArray(arr_np, [('MI', mindex), ('C', [5, 6, 7])], name="foo")
+
+        actual = arr.to_dataframe()
+        assert_array_equal(actual['foo'].values, arr_np.flatten())
+        assert_array_equal(actual.index.names, list('ABC'))
+        assert_array_equal(actual.index.levels[0], [1, 2])
+        assert_array_equal(actual.index.levels[1], ['a', 'b'])
+        assert_array_equal(actual.index.levels[2], [5, 6, 7])
+
     def test_to_pandas_name_matches_coordinate(self):
         # coordinate with same name as array
         arr = DataArray([1, 2, 3], dims="x", name="x")

From 1febc77f734b92ba17e6e30f4a0bab0c24222716 Mon Sep 17 00:00:00 2001
From: Ghislain Picard <ghislain.picard@univ-grenoble-alpes.fr>
Date: Mon, 21 Sep 2020 21:11:52 +0200
Subject: [PATCH 5/8] formatting

---
 xarray/tests/test_dataarray.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 08d7e3ccdd4..e2f8fc1c938 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -3523,16 +3523,16 @@ def test_to_dataframe(self):
     def test_to_dataframe_multiindex(self):
         # regression test for #3008
         arr_np = np.random.randn(4, 3)
-     
-        mindex = pd.MultiIndex.from_product([[1, 2], list('ab')], names=['A', 'B'])
 
-        arr = DataArray(arr_np, [('MI', mindex), ('C', [5, 6, 7])], name="foo")
+        mindex = pd.MultiIndex.from_product([[1, 2], list("ab")], names=["A", "B"])
+
+        arr = DataArray(arr_np, [("MI", mindex), ("C", [5, 6, 7])], name="foo")
 
         actual = arr.to_dataframe()
-        assert_array_equal(actual['foo'].values, arr_np.flatten())
-        assert_array_equal(actual.index.names, list('ABC'))
+        assert_array_equal(actual["foo"].values, arr_np.flatten())
+        assert_array_equal(actual.index.names, list("ABC"))
         assert_array_equal(actual.index.levels[0], [1, 2])
-        assert_array_equal(actual.index.levels[1], ['a', 'b'])
+        assert_array_equal(actual.index.levels[1], ["a", "b"])
         assert_array_equal(actual.index.levels[2], [5, 6, 7])
 
     def test_to_pandas_name_matches_coordinate(self):

From aa88f796ac51a2942aabeb36e9c12762f82bda37 Mon Sep 17 00:00:00 2001
From: Ghislain Picard <ghislain.picard@univ-grenoble-alpes.fr>
Date: Sun, 27 Sep 2020 09:57:59 +0200
Subject: [PATCH 6/8] change variable names and formatting

---
 xarray/core/coordinates.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py
index 5c27cdb6e7b..08b7629bed9 100644
--- a/xarray/core/coordinates.py
+++ b/xarray/core/coordinates.py
@@ -111,18 +111,20 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index:
 
             # compute the sizes of the repeat and tile for the cartesian product
             # (taken from pandas.core.reshape.util)
-            lenX = np.fromiter((len(index) for index in indexes), dtype=np.intp)
-            cumprodX = np.cumproduct(lenX)
+            index_lengths = np.fromiter(
+                (len(index) for index in indexes), dtype=np.intp
+            )
+            cumprod_lengths = np.cumproduct(index_lengths)
 
-            if cumprodX[-1] != 0:
+            if cumprod_lengths[-1] != 0:
                 # sizes of the repeats
-                repeat_counts = cumprodX[-1] / cumprodX
+                repeat_counts = cumprod_lengths[-1] / cumprod_lengths
             else:
                 # if any factor is empty, the cartesian product is empty
-                repeat_counts = np.zeros_like(cumprodX)
+                repeat_counts = np.zeros_like(cumprod_lengths)
 
             # sizes of the tiles
-            tile_counts = np.roll(cumprodX, 1)
+            tile_counts = np.roll(cumprod_lengths, 1)
             tile_counts[0] = 1
 
             # loop over the indexes
@@ -141,7 +143,10 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index:
                     levels = [level]
 
                 # compute the cartesian product
-                code_list += [np.tile(np.repeat(code, repeat_counts[i]), tile_counts[i]) for code in codes]
+                code_list += [
+                    np.tile(np.repeat(code, repeat_counts[i]), tile_counts[i])
+                    for code in codes
+                ]
                 level_list += levels
                 names += index.names
 

From 144c618d6f19d31ca0593e53c3886d312837f983 Mon Sep 17 00:00:00 2001
From: Ghislain Picard <ghislain.picard@univ-grenoble-alpes.fr>
Date: Sun, 27 Sep 2020 09:58:17 +0200
Subject: [PATCH 7/8] add test for to_dataframe with 0 length index

---
 xarray/tests/test_dataarray.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index e2f8fc1c938..fcfef8ae217 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -3535,6 +3535,18 @@ def test_to_dataframe_multiindex(self):
         assert_array_equal(actual.index.levels[1], ["a", "b"])
         assert_array_equal(actual.index.levels[2], [5, 6, 7])
 
+    def test_to_dataframe_0length(self):
+        # regression test for #3008
+        arr_np = np.random.randn(4, 0)
+
+        mindex = pd.MultiIndex.from_product([[1, 2], list("ab")], names=["A", "B"])
+
+        arr = DataArray(arr_np, [("MI", mindex), ("C", [])], name="foo")
+
+        actual = arr.to_dataframe()
+        assert len(actual) == 0
+        assert_array_equal(actual.index.names, list("ABC"))
+
     def test_to_pandas_name_matches_coordinate(self):
         # coordinate with same name as array
         arr = DataArray([1, 2, 3], dims="x", name="x")

From eb34d2b09577df2561666ecf0b178ae89fd8780e Mon Sep 17 00:00:00 2001
From: Keewis <keewis@posteo.de>
Date: Sat, 20 Feb 2021 00:01:04 +0100
Subject: [PATCH 8/8] add a whats-new.rst [skip-ci]

---
 doc/whats-new.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 22902963d9c..f1a8e851b4b 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -141,6 +141,9 @@ Bug fixes
   a float64 array (:issue:`4898`, :pull:`4911`). By `Blair Bonnett <https://github.com/bcbnz>`_.
 - Fix decoding of vlen strings using h5py versions greater than 3.0.0 with h5netcdf backend (:issue:`4570`, :pull:`4893`).
   By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
+- Allow converting :py:class:`Dataset` or :py:class:`DataArray` objects with a ``MultiIndex``
+  and at least one other dimension to a ``pandas`` object (:issue:`3008`, :pull:`4442`).
+  By `ghislainp <https://github.com/ghislainp>`_.
 
 Documentation
 ~~~~~~~~~~~~~