diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 7d39dd27f716f..b531486162ee4 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1567,6 +1567,7 @@ Groupby/Resample/Rolling
 - Bug in :meth:`pandas.core.groupby.GroupBy.rank` with ``method='dense'`` and ``pct=True`` when a group has only one member would raise a ``ZeroDivisionError`` (:issue:`23666`).
 - Calling :meth:`pandas.core.groupby.GroupBy.rank` with empty groups and ``pct=True`` was raising a ``ZeroDivisionError`` (:issue:`22519`)
 - Bug in :meth:`DataFrame.resample` when resampling ``NaT`` in ``TimeDeltaIndex`` (:issue:`13223`).
+- Bug in :meth:`DataFrame.groupby` did not respect the ``observed`` argument when selecting a column and instead always used ``observed=False`` (:issue:`23970`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 33a41ab1cabc4..c5142a4ee98cc 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1336,7 +1336,8 @@ def _gotitem(self, key, ndim, subset=None):
             return DataFrameGroupBy(subset, self.grouper, selection=key,
                                     grouper=self.grouper,
                                     exclusions=self.exclusions,
-                                    as_index=self.as_index)
+                                    as_index=self.as_index,
+                                    observed=self.observed)
         elif ndim == 1:
             if subset is None:
                 subset = self.obj[key]
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 7eda113be0e36..a39600d114b89 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -863,6 +863,34 @@ def test_groupby_multiindex_categorical_datetime():
     assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("as_index, expected", [
+    (True, pd.Series(
+        index=pd.MultiIndex.from_arrays(
+            [pd.Series([1, 1, 2], dtype='category'),
+                [1, 2, 2]], names=['a', 'b']
+        ),
+        data=[1, 2, 3], name='x'
+    )),
+    (False, pd.DataFrame({
+        'a': pd.Series([1, 1, 2], dtype='category'),
+        'b': [1, 2, 2],
+        'x': [1, 2, 3]
+    }))
+])
+def test_groupby_agg_observed_true_single_column(as_index, expected):
+    # GH-23970
+    df = pd.DataFrame({
+        'a': pd.Series([1, 1, 2], dtype='category'),
+        'b': [1, 2, 2],
+        'x': [1, 2, 3]
+    })
+
+    result = df.groupby(
+        ['a', 'b'], as_index=as_index, observed=True)['x'].sum()
+
+    assert_equal(result, expected)
+
+
 @pytest.mark.parametrize('fill_value', [None, np.nan, pd.NaT])
 def test_shift(fill_value):
     ct = pd.Categorical(['a', 'b', 'c', 'd'],