From 3de98b4ffc79c08b489ff7a272d02dfe1471b495 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Sat, 30 Mar 2024 14:14:07 -0700 Subject: [PATCH 1/3] Let melt name variable columns for a multiindex --- doc/source/whatsnew/v2.2.2.rst | 1 + pandas/core/reshape/melt.py | 15 ++++++++++++--- pandas/tests/reshape/test_melt.py | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 0dac3660c76b2..09dfbeb58e5ed 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -15,6 +15,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`) - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pyarrow nullable on with missing values (:issue:`57664`) +- :meth:`DataFrame.melt` would not accept multiple names in ``var_name`` when the columns were a :class:`MultiIndex` (:issue:`58033`) - Avoid issuing a spurious ``DeprecationWarning`` when a custom :class:`DataFrame` or :class:`Series` subclass method is called (:issue:`57553`) - Fixed regression in precision of :func:`to_datetime` with string and ``unit`` input (:issue:`57051`) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index f51a833e5f906..f62aeeb3d9739 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -64,9 +64,10 @@ def melt( value_vars : scalar, tuple, list, or ndarray, optional Column(s) to unpivot. If not specified, uses all columns that are not set as `id_vars`. - var_name : scalar, default None + var_name : scalar, tuple, list, or ndarray, optional Name to use for the 'variable' column. If None it uses - ``frame.columns.name`` or 'variable'. + ``frame.columns.name`` or 'variable'. Must be a scalar if columns are a + MultiIndex. value_name : scalar, default 'value' Name to use for the 'value' column, can't be an existing column label. col_level : scalar, optional @@ -217,7 +218,15 @@ def melt( frame.columns.name if frame.columns.name is not None else "variable" ] elif is_list_like(var_name): - raise ValueError(f"{var_name=} must be a scalar.") + if isinstance(frame.columns, MultiIndex): + var_name = list(var_name) + if len(var_name) > len(frame.columns): + raise ValueError( + f"{var_name=} has {len(var_name)} items, " + f"but the dataframe columns only have {len(frame.columns)} levels." + ) + else: + raise ValueError(f"{var_name=} must be a scalar.") else: var_name = [var_name] diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index f224a45ca3279..5695fff57b23b 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -533,6 +533,22 @@ def test_melt_non_scalar_var_name_raises(self): with pytest.raises(ValueError, match=r".* must be a scalar."): df.melt(id_vars=["a"], var_name=[1, 2]) + def test_melt_multiindex_columns_var_name(self): + # GH 58033 + df = DataFrame({("A", "a"): [1], ("A", "b"): [2]}) + + expected = DataFrame( + [("A", "a", 1), ("A", "b", 2)], columns=["first", "second", "value"] + ) + + tm.assert_frame_equal(df.melt(var_name=["first", "second"]), expected) + tm.assert_frame_equal(df.melt(var_name=["first"]), expected[["first", "value"]]) + + with pytest.raises( + ValueError, match="but the dataframe columns only have 2 levels" + ): + df.melt(var_name=["first", "second", "third"]) + class TestLreshape: def test_pairs(self): From 82e4dbd314422b825cd6340d5a144cc052465eff Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Tue, 21 May 2024 13:41:49 -0500 Subject: [PATCH 2/3] Respond to comments --- doc/source/whatsnew/v2.2.2.rst | 1 - doc/source/whatsnew/v3.0.0.rst | 1 + pandas/tests/reshape/test_melt.py | 4 ++++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 09dfbeb58e5ed..0dac3660c76b2 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -15,7 +15,6 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`) - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pyarrow nullable on with missing values (:issue:`57664`) -- :meth:`DataFrame.melt` would not accept multiple names in ``var_name`` when the columns were a :class:`MultiIndex` (:issue:`58033`) - Avoid issuing a spurious ``DeprecationWarning`` when a custom :class:`DataFrame` or :class:`Series` subclass method is called (:issue:`57553`) - Fixed regression in precision of :func:`to_datetime` with string and ``unit`` input (:issue:`57051`) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 74a19472ec835..f782d36c6ead1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -327,6 +327,7 @@ Performance improvements Bug fixes ~~~~~~~~~ +- :meth:`DataFrame.melt` would not accept multiple names in ``var_name`` when the columns were a :class:`MultiIndex` (:issue:`58033`) - Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`) - Fixed bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`) - Fixed bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 5695fff57b23b..49200face66c5 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -544,6 +544,10 @@ def test_melt_multiindex_columns_var_name(self): tm.assert_frame_equal(df.melt(var_name=["first", "second"]), expected) tm.assert_frame_equal(df.melt(var_name=["first"]), expected[["first", "value"]]) + def test_melt_multiindex_columns_var_name_too_many(self): + # GH 58033 + df = DataFrame({("A", "a"): [1], ("A", "b"): [2]}) + with pytest.raises( ValueError, match="but the dataframe columns only have 2 levels" ): From 3a0d820f74679376eb88416075e3d639f6e32a28 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Tue, 21 May 2024 14:06:37 -0500 Subject: [PATCH 3/3] Check is_iterator --- pandas/core/reshape/melt.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 4f2771aeffa85..294de2cf2fe1d 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -5,7 +5,10 @@ import numpy as np -from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.common import ( + is_iterator, + is_list_like, +) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.missing import notna @@ -219,7 +222,8 @@ def melt( ] elif is_list_like(var_name): if isinstance(frame.columns, MultiIndex): - var_name = list(var_name) + if is_iterator(var_name): + var_name = list(var_name) if len(var_name) > len(frame.columns): raise ValueError( f"{var_name=} has {len(var_name)} items, "