diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 19244ba858a4d..381a05a18b278 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -345,6 +345,7 @@ Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug in :meth:`SeriesGroupBy.value_counts` where unobserved categories in a grouped categorical series were not tallied (:issue:`38672`) +- Bug in :meth:`SeriesGroupBy.value_counts` where error was raised on an empty series (:issue:`39172`) - Bug in :meth:`.GroupBy.indices` would contain non-existent indices when null values were present in the groupby keys (:issue:`9304`) - Fixed bug in :meth:`DataFrameGroupBy.sum` and :meth:`SeriesGroupBy.sum` causing loss of precision through using Kahan summation (:issue:`38778`) - Fixed bug in :meth:`DataFrameGroupBy.cumsum`, :meth:`SeriesGroupBy.cumsum`, :meth:`DataFrameGroupBy.mean` and :meth:`SeriesGroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7034542848704..a98ef15696339 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -730,11 +730,16 @@ def apply_series_value_counts(): ids, lab = ids[sorter], lab[sorter] # group boundaries are where group ids change - idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]] + idchanges = 1 + np.nonzero(ids[1:] != ids[:-1])[0] + idx = np.r_[0, idchanges] + if not len(ids): + idx = idchanges # new values are where sorted labels change lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1)) inc = np.r_[True, lchanges] + if not len(lchanges): + inc = lchanges inc[idx] = True # group boundaries are also new values out = np.diff(np.nonzero(np.r_[inc, True])[0]) # value counts diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index afb648d8527ca..8bb07b7163f2e 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -122,6 +122,27 @@ def test_series_groupby_value_counts_with_grouper(): tm.assert_series_equal(result, expected) +def test_series_groupby_value_counts_empty(): + # GH39172 + df = DataFrame(columns=["A", "B"]) + dfg = df.groupby("A") + + result = dfg["B"].value_counts() + expected = Series([], name="B", dtype=result.dtype) + expected.index = MultiIndex.from_arrays([[]] * 2, names=["A", "B"]) + + tm.assert_series_equal(result, expected) + + df = DataFrame(columns=["A", "B", "C"]) + dfg = df.groupby(["A", "B"]) + + result = dfg["C"].value_counts() + expected = Series([], name="C", dtype=result.dtype) + expected.index = MultiIndex.from_arrays([[]] * 3, names=["A", "B", "C"]) + + tm.assert_series_equal(result, expected) + + def test_series_groupby_value_counts_on_categorical(): # GH38672