Skip to content

Commit 6576242

Browse files
committed
EHN: Groupby on multiindex with missing data in group keys raises IndexError (#20519)
* If all index values in some level is NA, fill with NaN
1 parent def01cf commit 6576242

File tree

3 files changed

+37
-2
lines changed

3 files changed

+37
-2
lines changed

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ Groupby/resample/rolling
174174
^^^^^^^^^^^^^^^^^^^^^^^^
175175

176176
-
177-
-
177+
- ``IndexError`` would not raise if all index values in some index level is missing data (:issue:`20519`)
178178
- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
179179

180180
Reshaping

pandas/core/indexes/multi.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1279,7 +1279,10 @@ def _get_grouper_for_level(self, mapper, level):
12791279
# Remove unobserved levels from level_index
12801280
level_index = level_index.take(uniques)
12811281

1282-
grouper = level_index.take(codes)
1282+
if len(level_index):
1283+
grouper = level_index.take(codes)
1284+
else:
1285+
grouper = level_index.take(codes,fill_value=True)
12831286

12841287
return grouper, codes, level_index
12851288

pandas/tests/groupby/test_grouping.py

+32
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,38 @@ def test_groupby_empty(self):
628628
# check name
629629
assert s.groupby(s).grouper.names == ["name"]
630630

631+
def test_groupby_level_index_value_all_na(self):
632+
# issue 20519
633+
df = pd.DataFrame([["x", np.nan, 1]], columns=["A", "B", "C"]).set_index(
634+
["A", "B"]
635+
)
636+
result = df.groupby(level=["A", "B"]).sum()
637+
expected = DataFrame(
638+
data=[],
639+
index=MultiIndex(
640+
levels=[["x"],[]],
641+
codes=[[],[]],
642+
names=["A","B"]
643+
),
644+
columns=["C"]
645+
)
646+
tm.assert_frame_equal(result, expected, check_index_type=False, check_dtype=False)
647+
648+
df = pd.DataFrame(
649+
[[None, None, "x", 2], [np.nan, "y", np.nan, 4]], columns=["A", "B", "C", "D"]
650+
).set_index(["A", "B", "C"])
651+
result = df.groupby(level=["A", "B", "C"]).sum()
652+
expected = DataFrame(
653+
data=[],
654+
index=MultiIndex(
655+
levels=[[], ["y"], ["x"]],
656+
codes=[[], [], []],
657+
names=["A", "B", "C"]
658+
),
659+
columns=["D"]
660+
)
661+
tm.assert_frame_equal(result, expected, check_index_type=False, check_dtype=False)
662+
631663

632664
# get_group
633665
# --------------------------------

0 commit comments

Comments
 (0)