diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index f4308684e286a..cd0a4d0c4c3c7 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -85,7 +85,7 @@ Indexing - Bug in partial-string indexing returning a NumPy array rather than a ``Series`` when indexing with a scalar like ``.loc['2015']`` (:issue:`27516`) - Break reference cycle involving :class:`Index` and other index classes to allow garbage collection of index objects without running the GC. (:issue:`27585`, :issue:`27840`) - Fix regression in assigning values to a single column of a DataFrame with a ``MultiIndex`` columns (:issue:`27841`). -- +- `IndexError` would not raised if group keys are nan value (:issue:`20519`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5f17dde01d2c4..1e2796562f29f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -873,7 +873,13 @@ def _assert_take_fillable( values, indices, allow_fill=allow_fill, fill_value=na_value ) else: - taken = values.take(indices) + try: + taken = values.take(indices) + except IndexError: + if not values.tolist(): + return [] + else: + raise return taken _index_shared_docs[ diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index d1ed79118d2fa..1848c7e791f73 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2240,6 +2240,25 @@ def test_groupby(self): tm.assert_dict_equal(result, expected) + def test_groupby_nan_index_value(self): + df = pd.DataFrame([["x", np.nan, 1]], columns=["A", "B", "C"]).set_index( + ["A", "B"] + ) + result = df.groupby(level=["A", "B"]).C.sum() + s = Series([]) + s.name = "C" + expected = s.astype("int64") + tm.assert_series_equal(result, expected) + + df = pd.DataFrame( + [["x", np.nan, 1, 2], [None, "y", 3, 4]], columns=["A", "B", "C", "D"] + ).set_index(["A", "B", "C"]) + result = df.groupby(level=["A", "B"]).D.sum() + s = Series([]) + s.name = "D" + expected = s.astype("int64") + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( "mi,expected", [