Skip to content

Commit 4b2a95e

Browse files
author
HH
committed
EHN: groupby missing data in index(pandas-dev#20519)
1 parent 6813d77 commit 4b2a95e

File tree

3 files changed

+29
-2
lines changed

3 files changed

+29
-2
lines changed

doc/source/whatsnew/v0.25.1.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ Indexing
8585
- Bug in partial-string indexing returning a NumPy array rather than a ``Series`` when indexing with a scalar like ``.loc['2015']`` (:issue:`27516`)
8686
- Break reference cycle involving :class:`Index` and other index classes to allow garbage collection of index objects without running the GC. (:issue:`27585`, :issue:`27840`)
8787
- Fix regression in assigning values to a single column of a DataFrame with a ``MultiIndex`` columns (:issue:`27841`).
88-
-
88+
- `IndexError` would not raised if group keys are nan value (:issue:`20519`)
8989

9090
Missing
9191
^^^^^^^

pandas/core/indexes/multi.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1279,7 +1279,10 @@ def _get_grouper_for_level(self, mapper, level):
12791279
# Remove unobserved levels from level_index
12801280
level_index = level_index.take(uniques)
12811281

1282-
grouper = level_index.take(codes)
1282+
if not len(level_index):
1283+
grouper = level_index
1284+
else:
1285+
grouper = level_index.take(codes)
12831286

12841287
return grouper, codes, level_index
12851288

pandas/tests/indexes/test_base.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2240,6 +2240,30 @@ def test_groupby(self):
22402240

22412241
tm.assert_dict_equal(result, expected)
22422242

2243+
def test_groupby_nan_index_value(self):
2244+
df = pd.DataFrame([["x", np.nan, 1]], columns=["A", "B", "C"]).set_index(
2245+
["A", "B"]
2246+
)
2247+
result = df.groupby(level=["A", "B"]).C.sum()
2248+
result = np.asarray(result)
2249+
2250+
s = Series([])
2251+
s.name = "C"
2252+
expected = s.astype("int64")
2253+
expected = np.asarray(expected)
2254+
tm.assert_numpy_array_equal(result, expected)
2255+
2256+
df = pd.DataFrame(
2257+
[["x", np.nan, 1, 2], [None, "y", 3, 4]], columns=["A", "B", "C", "D"]
2258+
).set_index(["A", "B", "C"])
2259+
result = df.groupby(level=["A", "B"]).D.sum()
2260+
s = Series([])
2261+
s.name = "D"
2262+
expected = s.astype("int64")
2263+
result = np.asarray(result)
2264+
expected = np.asarray(expected)
2265+
tm.assert_numpy_array_equal(result, expected)
2266+
22432267
@pytest.mark.parametrize(
22442268
"mi,expected",
22452269
[

0 commit comments

Comments
 (0)