Skip to content

BUG: Fix MutliIndexed unstack failures at tuple names #30943

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jan 20, 2020
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ Reshaping

-
- Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`)
- Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` can take tuple names in MultiIndexed data (:issue:`19966`)
- Bug in :meth:`DataFrame.pivot_table` when ``margin`` is ``True`` and only ``column`` is defined (:issue:`31016`)
- Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`)
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,10 @@ def _unstack_multiple(data, clocs, fill_value=None):

index = data.index

# GH 19966 Make sure if MultiIndexed index has tuple name, they will be
# recognised as a whole
if clocs in index.names:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment here on what is going on

clocs = [clocs]
clocs = [index._get_level_number(i) for i in clocs]

rlocs = [i for i in range(index.nlevels) if i not in clocs]
Expand Down
74 changes: 74 additions & 0 deletions pandas/tests/frame/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,80 @@ def test_unstack_fill_frame_categorical(self):
)
tm.assert_frame_equal(result, expected)

def test_unstack_tuplename_in_multiindex(self):
# GH 19966
idx = pd.MultiIndex.from_product(
[["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
)
df = pd.DataFrame({"d": [1] * 9, "e": [2] * 9}, index=idx)
result = df.unstack(("A", "a"))

expected = pd.DataFrame(
[[1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2]],
columns=pd.MultiIndex.from_tuples(
[
("d", "a"),
("d", "b"),
("d", "c"),
("e", "a"),
("e", "b"),
("e", "c"),
],
names=[None, ("A", "a")],
),
index=pd.Index([1, 2, 3], name=("B", "b")),
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"unstack_idx, expected_values, expected_index, expected_columns",
[
(
("A", "a"),
[[1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2]],
pd.MultiIndex.from_tuples(
[(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]
),
pd.MultiIndex.from_tuples(
[("d", "a"), ("d", "b"), ("e", "a"), ("e", "b")],
names=[None, ("A", "a")],
),
),
(
(("A", "a"), "B"),
[[1, 1, 1, 1, 2, 2, 2, 2], [1, 1, 1, 1, 2, 2, 2, 2]],
pd.Index([3, 4], name="C"),
pd.MultiIndex.from_tuples(
[
("d", "a", 1),
("d", "a", 2),
("d", "b", 1),
("d", "b", 2),
("e", "a", 1),
("e", "a", 2),
("e", "b", 1),
("e", "b", 2),
],
names=[None, ("A", "a"), "B"],
),
),
],
)
def test_unstack_mixed_type_name_in_multiindex(
self, unstack_idx, expected_values, expected_index, expected_columns
):
# GH 19966
idx = pd.MultiIndex.from_product(
[["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"]
)
df = pd.DataFrame({"d": [1] * 8, "e": [2] * 8}, index=idx)
result = df.unstack(unstack_idx)

expected = pd.DataFrame(
expected_values, columns=expected_columns, index=expected_index,
)
tm.assert_frame_equal(result, expected)

def test_unstack_preserve_dtypes(self):
# Checks fix for #11847
df = pd.DataFrame(
Expand Down
61 changes: 1 addition & 60 deletions pandas/tests/series/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pandas.util._test_decorators as td

import pandas as pd
from pandas import DataFrame, MultiIndex, Series
from pandas import DataFrame, Series
import pandas._testing as tm


Expand Down Expand Up @@ -160,65 +160,6 @@ def test_is_monotonic(self):
assert s.is_monotonic is False
assert s.is_monotonic_decreasing is True

def test_unstack(self):

index = MultiIndex(
levels=[["bar", "foo"], ["one", "three", "two"]],
codes=[[1, 1, 0, 0], [0, 1, 0, 2]],
)

s = Series(np.arange(4.0), index=index)
unstacked = s.unstack()

expected = DataFrame(
[[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]],
index=["bar", "foo"],
columns=["one", "three", "two"],
)

tm.assert_frame_equal(unstacked, expected)

unstacked = s.unstack(level=0)
tm.assert_frame_equal(unstacked, expected.T)

index = MultiIndex(
levels=[["bar"], ["one", "two", "three"], [0, 1]],
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
)
s = Series(np.random.randn(6), index=index)
exp_index = MultiIndex(
levels=[["one", "two", "three"], [0, 1]],
codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
)
expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0)
unstacked = s.unstack(0).sort_index()
tm.assert_frame_equal(unstacked, expected)

# GH5873
idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]])
ts = pd.Series([1, 2], index=idx)
left = ts.unstack()
right = DataFrame(
[[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5]
)
tm.assert_frame_equal(left, right)

idx = pd.MultiIndex.from_arrays(
[
["cat", "cat", "cat", "dog", "dog"],
["a", "a", "b", "a", "b"],
[1, 2, 1, 1, np.nan],
]
)
ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx)
right = DataFrame(
[[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]],
columns=["cat", "dog"],
)
tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)]
right.index = pd.MultiIndex.from_tuples(tpls)
tm.assert_frame_equal(ts.unstack(level=0), right)

@pytest.mark.parametrize("func", [np.any, np.all])
@pytest.mark.parametrize("kwargs", [dict(keepdims=True), dict(out=object())])
@td.skip_if_np_lt("1.15")
Expand Down
120 changes: 120 additions & 0 deletions pandas/tests/series/test_reshaping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame, MultiIndex, Series
import pandas._testing as tm


def test_unstack():
index = MultiIndex(
levels=[["bar", "foo"], ["one", "three", "two"]],
codes=[[1, 1, 0, 0], [0, 1, 0, 2]],
)

s = Series(np.arange(4.0), index=index)
unstacked = s.unstack()

expected = DataFrame(
[[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]],
index=["bar", "foo"],
columns=["one", "three", "two"],
)

tm.assert_frame_equal(unstacked, expected)

unstacked = s.unstack(level=0)
tm.assert_frame_equal(unstacked, expected.T)

index = MultiIndex(
levels=[["bar"], ["one", "two", "three"], [0, 1]],
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
)
s = Series(np.random.randn(6), index=index)
exp_index = MultiIndex(
levels=[["one", "two", "three"], [0, 1]],
codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
)
expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0)
unstacked = s.unstack(0).sort_index()
tm.assert_frame_equal(unstacked, expected)

# GH5873
idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]])
ts = pd.Series([1, 2], index=idx)
left = ts.unstack()
right = DataFrame(
[[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5]
)
tm.assert_frame_equal(left, right)

idx = pd.MultiIndex.from_arrays(
[
["cat", "cat", "cat", "dog", "dog"],
["a", "a", "b", "a", "b"],
[1, 2, 1, 1, np.nan],
]
)
ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx)
right = DataFrame(
[[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]],
columns=["cat", "dog"],
)
tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)]
right.index = pd.MultiIndex.from_tuples(tpls)
tm.assert_frame_equal(ts.unstack(level=0), right)


def test_unstack_tuplename_in_multiindex():
# GH 19966
idx = pd.MultiIndex.from_product(
[["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
)
ser = pd.Series(1, index=idx)
result = ser.unstack(("A", "a"))

expected = pd.DataFrame(
[[1, 1, 1], [1, 1, 1], [1, 1, 1]],
columns=pd.MultiIndex.from_tuples(
[("a",), ("b",), ("c",)], names=[("A", "a")],
),
index=pd.Index([1, 2, 3], name=("B", "b")),
)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"unstack_idx, expected_values, expected_index, expected_columns",
[
(
("A", "a"),
[[1, 1], [1, 1], [1, 1], [1, 1]],
pd.MultiIndex.from_tuples(
[(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]
),
pd.MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]),
),
(
(("A", "a"), "B"),
[[1, 1, 1, 1], [1, 1, 1, 1]],
pd.Index([3, 4], name="C"),
pd.MultiIndex.from_tuples(
[("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"]
),
),
],
)
def test_unstack_mixed_type_name_in_multiindex(
unstack_idx, expected_values, expected_index, expected_columns
):
# GH 19966
idx = pd.MultiIndex.from_product(
[["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"]
)
ser = pd.Series(1, index=idx)
result = ser.unstack(unstack_idx)

expected = pd.DataFrame(
expected_values, columns=expected_columns, index=expected_index,
)
tm.assert_frame_equal(result, expected)