diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 7d39dd27f716f..b531486162ee4 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1567,6 +1567,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.GroupBy.rank` with ``method='dense'`` and ``pct=True`` when a group has only one member would raise a ``ZeroDivisionError`` (:issue:`23666`). - Calling :meth:`pandas.core.groupby.GroupBy.rank` with empty groups and ``pct=True`` was raising a ``ZeroDivisionError`` (:issue:`22519`) - Bug in :meth:`DataFrame.resample` when resampling ``NaT`` in ``TimeDeltaIndex`` (:issue:`13223`). +- Bug in :meth:`DataFrame.groupby` did not respect the ``observed`` argument when selecting a column and instead always used ``observed=False`` (:issue:`23970`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 33a41ab1cabc4..c5142a4ee98cc 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1336,7 +1336,8 @@ def _gotitem(self, key, ndim, subset=None): return DataFrameGroupBy(subset, self.grouper, selection=key, grouper=self.grouper, exclusions=self.exclusions, - as_index=self.as_index) + as_index=self.as_index, + observed=self.observed) elif ndim == 1: if subset is None: subset = self.obj[key] diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 7eda113be0e36..a39600d114b89 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -863,6 +863,34 @@ def test_groupby_multiindex_categorical_datetime(): assert_frame_equal(result, expected) +@pytest.mark.parametrize("as_index, expected", [ + (True, pd.Series( + index=pd.MultiIndex.from_arrays( + [pd.Series([1, 1, 2], dtype='category'), + [1, 2, 2]], names=['a', 'b'] + ), + data=[1, 2, 3], name='x' + )), + (False, pd.DataFrame({ + 'a': pd.Series([1, 1, 2], dtype='category'), + 'b': [1, 2, 2], + 'x': [1, 2, 3] + })) +]) +def test_groupby_agg_observed_true_single_column(as_index, expected): + # GH-23970 + df = pd.DataFrame({ + 'a': pd.Series([1, 1, 2], dtype='category'), + 'b': [1, 2, 2], + 'x': [1, 2, 3] + }) + + result = df.groupby( + ['a', 'b'], as_index=as_index, observed=True)['x'].sum() + + assert_equal(result, expected) + + @pytest.mark.parametrize('fill_value', [None, np.nan, pd.NaT]) def test_shift(fill_value): ct = pd.Categorical(['a', 'b', 'c', 'd'],