From 2e485b73ea447562d14cd4ca32b462e05d7b4a11 Mon Sep 17 00:00:00 2001 From: ashtou Date: Sat, 23 May 2020 02:17:25 +0100 Subject: [PATCH 1/5] Fix MultiIndex melt when col_level is used #34129 --- pandas/core/reshape/melt.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 1a315ff0ead52..d1f9a95878c18 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -72,7 +72,14 @@ def melt( "The following 'value_vars' are not present in " f"the DataFrame: {list(missing)}" ) - frame = frame.loc[:, id_vars + value_vars] + # use `iloc` instead of `loc` when `col_level` is specified + if col_level is not None: + nlevels = frame.columns.nlevels + iid_vars = [int(cols.index(elm) / nlevels) for elm in id_vars] + ivalue_vars = [int(cols.index(elm) / nlevels) for elm in value_vars] + frame = frame.iloc[:, iid_vars + ivalue_vars] + else: + frame = frame.loc[:, id_vars + value_vars] else: frame = frame.copy() From ffb41706764505a17010885501dc5026e40640f6 Mon Sep 17 00:00:00 2001 From: ashtou Date: Sat, 23 May 2020 04:24:09 +0100 Subject: [PATCH 2/5] Add test for MultiIndex melt with col_level>0 --- pandas/tests/reshape/test_melt.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 6a670e6c729e9..3eba79e953898 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -101,15 +101,25 @@ def test_vars_work_with_multiindex(self): tm.assert_frame_equal(result, expected) def test_single_vars_work_with_multiindex(self): - expected = DataFrame( + expected0 = DataFrame( { "A": {0: 1.067683, 1: -1.321405, 2: -0.807333}, "CAP": {0: "B", 1: "B", 2: "B"}, "value": {0: -1.110463, 1: 0.368915, 2: 0.08298}, } ) - result = self.df1.melt(["A"], ["B"], col_level=0) - tm.assert_frame_equal(result, expected) + result0 = self.df1.melt(["A"], ["B"], col_level=0) + tm.assert_frame_equal(result0, expected0) + + expected1 = DataFrame( + { + "a": {0: 1.067683, 1: -1.321405, 2: -0.807333}, + "low": {0: "b", 1: "b", 2: "b"}, + "value": {0: -1.110463, 1: 0.368915, 2: 0.08298}, + } + ) + result1 = self.df1.melt(["a"], ["b"], col_level=1) + tm.assert_frame_equal(result1, expected1) def test_tuple_vars_fail_with_multiindex(self): # melt should fail with an informative error message if From 4670ab5493adb236850f498a7699060dd995e538 Mon Sep 17 00:00:00 2001 From: ashtou Date: Sat, 23 May 2020 05:17:04 +0100 Subject: [PATCH 3/5] Add a whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 41d519e0765dc..bab8b5cd99255 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -853,6 +853,7 @@ Reshaping - Bug in :func:`cut` raised an error when non-unique labels (:issue:`33141`) - Bug in :meth:`DataFrame.replace` casts columns to ``object`` dtype if items in ``to_replace`` not in values (:issue:`32988`) - Ensure only named functions can be used in :func:`eval()` (:issue:`32460`) +- Fixed bug in :func:`melt` where melting MultiIndex columns with ``col_level`` > 0 would raise a ``KeyError`` on ``id_vars`` (:issue:`34129`) Sparse ^^^^^^ From e00fcf0379a99b04a4a033abf1f3e8838465fa48 Mon Sep 17 00:00:00 2001 From: ashtou Date: Sat, 23 May 2020 14:54:48 +0100 Subject: [PATCH 4/5] Use `iloc` when `col_level` is specified --- pandas/core/reshape/melt.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index d1f9a95878c18..ae36b104ad28a 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -74,10 +74,10 @@ def melt( ) # use `iloc` instead of `loc` when `col_level` is specified if col_level is not None: - nlevels = frame.columns.nlevels - iid_vars = [int(cols.index(elm) / nlevels) for elm in id_vars] - ivalue_vars = [int(cols.index(elm) / nlevels) for elm in value_vars] - frame = frame.iloc[:, iid_vars + ivalue_vars] + idx = frame.columns.get_level_values(col_level).get_indexer( + id_vars + value_vars + ) + frame = frame.iloc[:, idx] else: frame = frame.loc[:, id_vars + value_vars] else: From c85279210e477461c2a419e741efda6b4c33df1b Mon Sep 17 00:00:00 2001 From: ashtou Date: Tue, 26 May 2020 13:59:40 +0100 Subject: [PATCH 5/5] Use iloc for selection in MultiIndex melt, Add test --- pandas/core/reshape/melt.py | 5 ++- pandas/tests/reshape/test_melt.py | 54 +++++++++++++++++++------------ 2 files changed, 36 insertions(+), 23 deletions(-) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index ae36b104ad28a..7d22b86c5c07c 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -72,14 +72,13 @@ def melt( "The following 'value_vars' are not present in " f"the DataFrame: {list(missing)}" ) - # use `iloc` instead of `loc` when `col_level` is specified if col_level is not None: idx = frame.columns.get_level_values(col_level).get_indexer( id_vars + value_vars ) - frame = frame.iloc[:, idx] else: - frame = frame.loc[:, id_vars + value_vars] + idx = frame.columns.get_indexer(id_vars + value_vars) + frame = frame.iloc[:, idx] else: frame = frame.copy() diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 3eba79e953898..000a6354277ab 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -100,26 +100,40 @@ def test_vars_work_with_multiindex(self): result = self.df1.melt(id_vars=[("A", "a")], value_vars=[("B", "b")]) tm.assert_frame_equal(result, expected) - def test_single_vars_work_with_multiindex(self): - expected0 = DataFrame( - { - "A": {0: 1.067683, 1: -1.321405, 2: -0.807333}, - "CAP": {0: "B", 1: "B", 2: "B"}, - "value": {0: -1.110463, 1: 0.368915, 2: 0.08298}, - } - ) - result0 = self.df1.melt(["A"], ["B"], col_level=0) - tm.assert_frame_equal(result0, expected0) - - expected1 = DataFrame( - { - "a": {0: 1.067683, 1: -1.321405, 2: -0.807333}, - "low": {0: "b", 1: "b", 2: "b"}, - "value": {0: -1.110463, 1: 0.368915, 2: 0.08298}, - } - ) - result1 = self.df1.melt(["a"], ["b"], col_level=1) - tm.assert_frame_equal(result1, expected1) + @pytest.mark.parametrize( + "id_vars, value_vars, col_level, expected", + [ + ( + ["A"], + ["B"], + 0, + DataFrame( + { + "A": {0: 1.067683, 1: -1.321405, 2: -0.807333}, + "CAP": {0: "B", 1: "B", 2: "B"}, + "value": {0: -1.110463, 1: 0.368915, 2: 0.08298}, + } + ), + ), + ( + ["a"], + ["b"], + 1, + DataFrame( + { + "a": {0: 1.067683, 1: -1.321405, 2: -0.807333}, + "low": {0: "b", 1: "b", 2: "b"}, + "value": {0: -1.110463, 1: 0.368915, 2: 0.08298}, + } + ), + ), + ], + ) + def test_single_vars_work_with_multiindex( + self, id_vars, value_vars, col_level, expected + ): + result = self.df1.melt(id_vars, value_vars, col_level=col_level) + tm.assert_frame_equal(result, expected) def test_tuple_vars_fail_with_multiindex(self): # melt should fail with an informative error message if