From 186bfffe4a579207ac216b433955c6a27b8e8c42 Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Fri, 22 Nov 2019 09:07:20 -0700 Subject: [PATCH 1/2] BUG: Fix melt with mixed int/str columns --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/reshape/melt.py | 5 ++-- pandas/tests/reshape/test_melt.py | 41 ++++++++++++++++++++++--------- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index b015f439935cb..8a60fd2062b61 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -535,6 +535,7 @@ Reshaping - Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ValueError (:issue:`28664`) - Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`) - Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`) +- Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 4cba52c5cd651..8e9edfa5f1409 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -11,6 +11,7 @@ from pandas.core.dtypes.missing import notna from pandas.core.arrays import Categorical +import pandas.core.common as com from pandas.core.frame import DataFrame, _shared_docs from pandas.core.indexes.base import Index from pandas.core.reshape.concat import concat @@ -47,7 +48,7 @@ def melt( else: # Check that `id_vars` are in frame id_vars = list(id_vars) - missing = Index(np.ravel(id_vars)).difference(cols) + missing = Index(com.flatten(id_vars)).difference(cols) if not missing.empty: raise KeyError( "The following 'id_vars' are not present" @@ -69,7 +70,7 @@ def melt( else: value_vars = list(value_vars) # Check that `value_vars` are in frame - missing = Index(np.ravel(value_vars)).difference(cols) + missing = Index(com.flatten(value_vars)).difference(cols) if not missing.empty: raise KeyError( "The following 'value_vars' are not present in" diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 4521f1bbf1a08..d11fadac94a3a 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -289,34 +289,53 @@ def test_melt_missing_columns_raises(self): df = pd.DataFrame(np.random.randn(5, 4), columns=list("abcd")) # Try to melt with missing `value_vars` column name - msg = "The following '{Var}' are not present in the DataFrame: {Col}" - with pytest.raises( - KeyError, match=msg.format(Var="value_vars", Col="\\['C'\\]") - ): + msg = r"The following '{var}' are not present in the DataFrame: \[{col}\]" + with pytest.raises(KeyError, match=msg.format(var="value_vars", col="'C'")): df.melt(["a", "b"], ["C", "d"]) # Try to melt with missing `id_vars` column name - with pytest.raises(KeyError, match=msg.format(Var="id_vars", Col="\\['A'\\]")): + with pytest.raises(KeyError, match=msg.format(var="id_vars", col="'A'")): df.melt(["A", "b"], ["c", "d"]) # Multiple missing with pytest.raises( - KeyError, - match=msg.format(Var="id_vars", Col="\\['not_here', 'or_there'\\]"), + KeyError, match=msg.format(var="id_vars", col="'not_here', 'or_there'"), ): df.melt(["a", "b", "not_here", "or_there"], ["c", "d"]) # Multiindex melt fails if column is missing from multilevel melt multi = df.copy() multi.columns = [list("ABCD"), list("abcd")] - with pytest.raises(KeyError, match=msg.format(Var="id_vars", Col="\\['E'\\]")): + with pytest.raises(KeyError, match=msg.format(var="id_vars", col="'E'")): multi.melt([("E", "a")], [("B", "b")]) # Multiindex fails if column is missing from single level melt - with pytest.raises( - KeyError, match=msg.format(Var="value_vars", Col="\\['F'\\]") - ): + with pytest.raises(KeyError, match=msg.format(var="value_vars", col="'F'")): multi.melt(["A"], ["F"], col_level=0) + # GH 29718: mixed int/str + df_mixed = DataFrame(columns=[0, "a"]) + with pytest.raises(KeyError, match=msg.format(var="id_vars", col="'0'")): + df_mixed.melt(id_vars=["0", "a"]) + + with pytest.raises(KeyError, match=msg.format(var="value_vars", col="'0'")): + df_mixed.melt(value_vars=["0", "a"]) + + def test_melt_mixed_int_str_id_vars(self): + # GH 29718 + df = DataFrame({0: ["foo"], "a": ["bar"], "b": [1], "d": [2]}) + result = melt(df, id_vars=[0, "a"], value_vars=["b", "d"]) + expected = DataFrame( + {0: ["foo"] * 2, "a": ["bar"] * 2, "variable": list("bd"), "value": [1, 2]} + ) + tm.assert_frame_equal(result, expected) + + def test_melt_mixed_int_str_value_vars(self): + # GH 29718 + df = DataFrame({0: ["foo"], "a": ["bar"]}) + result = melt(df, value_vars=[0, "a"]) + expected = DataFrame({"variable": [0, "a"], "value": ["foo", "bar"]}) + tm.assert_frame_equal(result, expected) + class TestLreshape: def test_pairs(self): From 212dc7f8b6e4a59230cd44393f14ac968bcfe481 Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Fri, 22 Nov 2019 09:20:09 -0700 Subject: [PATCH 2/2] unnecessary --- pandas/tests/reshape/test_melt.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index d11fadac94a3a..d6946ea41ed84 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -289,37 +289,34 @@ def test_melt_missing_columns_raises(self): df = pd.DataFrame(np.random.randn(5, 4), columns=list("abcd")) # Try to melt with missing `value_vars` column name - msg = r"The following '{var}' are not present in the DataFrame: \[{col}\]" - with pytest.raises(KeyError, match=msg.format(var="value_vars", col="'C'")): + msg = "The following '{Var}' are not present in the DataFrame: {Col}" + with pytest.raises( + KeyError, match=msg.format(Var="value_vars", Col="\\['C'\\]") + ): df.melt(["a", "b"], ["C", "d"]) # Try to melt with missing `id_vars` column name - with pytest.raises(KeyError, match=msg.format(var="id_vars", col="'A'")): + with pytest.raises(KeyError, match=msg.format(Var="id_vars", Col="\\['A'\\]")): df.melt(["A", "b"], ["c", "d"]) # Multiple missing with pytest.raises( - KeyError, match=msg.format(var="id_vars", col="'not_here', 'or_there'"), + KeyError, + match=msg.format(Var="id_vars", Col="\\['not_here', 'or_there'\\]"), ): df.melt(["a", "b", "not_here", "or_there"], ["c", "d"]) # Multiindex melt fails if column is missing from multilevel melt multi = df.copy() multi.columns = [list("ABCD"), list("abcd")] - with pytest.raises(KeyError, match=msg.format(var="id_vars", col="'E'")): + with pytest.raises(KeyError, match=msg.format(Var="id_vars", Col="\\['E'\\]")): multi.melt([("E", "a")], [("B", "b")]) # Multiindex fails if column is missing from single level melt - with pytest.raises(KeyError, match=msg.format(var="value_vars", col="'F'")): + with pytest.raises( + KeyError, match=msg.format(Var="value_vars", Col="\\['F'\\]") + ): multi.melt(["A"], ["F"], col_level=0) - # GH 29718: mixed int/str - df_mixed = DataFrame(columns=[0, "a"]) - with pytest.raises(KeyError, match=msg.format(var="id_vars", col="'0'")): - df_mixed.melt(id_vars=["0", "a"]) - - with pytest.raises(KeyError, match=msg.format(var="value_vars", col="'0'")): - df_mixed.melt(value_vars=["0", "a"]) - def test_melt_mixed_int_str_id_vars(self): # GH 29718 df = DataFrame({0: ["foo"], "a": ["bar"], "b": [1], "d": [2]})