Skip to content

Commit 52cf65a

Browse files
committed
EHN: Groupby on multiindex with missing data in group keys raises IndexError (#20519)
* if all the values in a level of a MultiIndex were missing, fill with numpy nan
1 parent def01cf commit 52cf65a

File tree

3 files changed

+27
-2
lines changed

3 files changed

+27
-2
lines changed

doc/source/whatsnew/v1.0.0.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,10 @@ Groupby/resample/rolling
174174
^^^^^^^^^^^^^^^^^^^^^^^^
175175

176176
-
177-
-
177+
- Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`)
178+
- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
178179
- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
180+
- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
179181

180182
Reshaping
181183
^^^^^^^^^

pandas/core/indexes/multi.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1279,7 +1279,10 @@ def _get_grouper_for_level(self, mapper, level):
12791279
# Remove unobserved levels from level_index
12801280
level_index = level_index.take(uniques)
12811281

1282-
grouper = level_index.take(codes)
1282+
if len(level_index):
1283+
grouper = level_index.take(codes)
1284+
else:
1285+
grouper = level_index.take(codes, fill_value=True)
12831286

12841287
return grouper, codes, level_index
12851288

pandas/tests/groupby/test_grouping.py

+20
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,26 @@ def test_groupby_empty(self):
628628
# check name
629629
assert s.groupby(s).grouper.names == ["name"]
630630

631+
def test_groupby_level_index_value_all_na(self):
632+
# issue 20519
633+
df = pd.DataFrame(
634+
[["x", np.nan, 10], [None, np.nan, 20]], columns=["A", "B", "C"]
635+
).set_index(["A", "B"])
636+
result = df.groupby(level=["A", "B"]).sum()
637+
expected = DataFrame(
638+
data=[],
639+
index=MultiIndex(
640+
levels=[
641+
Index(["x"], dtype="object", name="A"),
642+
Index([], dtype="float64", name="B"),
643+
],
644+
codes=[[], []],
645+
),
646+
columns=["C"],
647+
dtype="int64",
648+
)
649+
tm.assert_frame_equal(result, expected)
650+
631651

632652
# get_group
633653
# --------------------------------

0 commit comments

Comments
 (0)