diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index fc7019c486d9a..62069dfcb2262 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -850,6 +850,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
 - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`).
 - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`)
 - :meth:`Series.unstack` no longer converts extension arrays to object-dtype ndarrays. The output ``DataFrame`` will now have the same dtype as the input. This changes behavior for Categorical and Sparse data (:issue:`23077`).
+- Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`).
 
 .. _whatsnew_0240.api.incompatibilities:
 
@@ -1084,6 +1085,7 @@ Categorical
 - Bug when indexing with a boolean-valued ``Categorical``. Now a boolean-valued ``Categorical`` is treated as a boolean mask (:issue:`22665`)
 - Constructing a :class:`CategoricalIndex` with empty values and boolean categories was raising a ``ValueError`` after a change to dtype coercion (:issue:`22702`).
 - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`).
+- Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`)
 
 Datetimelike
 ^^^^^^^^^^^^
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index e31929434b5d6..ea7507799fa9a 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -24,7 +24,8 @@ class providing the base-class of operations.
 from pandas.util._validators import validate_kwargs
 
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
-from pandas.core.dtypes.common import ensure_float, is_numeric_dtype, is_scalar
+from pandas.core.dtypes.common import (
+    ensure_float, is_extension_array_dtype, is_numeric_dtype, is_scalar)
 from pandas.core.dtypes.missing import isna, notna
 
 import pandas.core.algorithms as algorithms
@@ -754,7 +755,18 @@ def _try_cast(self, result, obj, numeric_only=False):
             dtype = obj.dtype
 
         if not is_scalar(result):
-            if numeric_only and is_numeric_dtype(dtype) or not numeric_only:
+            if is_extension_array_dtype(dtype):
+                # The function can return something of any type, so check
+                # if the type is compatible with the calling EA.
+                try:
+                    result = obj.values._from_sequence(result)
+                except Exception:
+                    # https://github.com/pandas-dev/pandas/issues/22850
+                    # pandas has no control over what 3rd-party ExtensionArrays
+                    # do in _values_from_sequence. We still want ops to work
+                    # though, so we catch any regular Exception.
+                    pass
+            elif numeric_only and is_numeric_dtype(dtype) or not numeric_only:
                 result = maybe_downcast_to_dtype(result, dtype)
 
         return result
diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
index 41ec2d3026499..24bc8ffe2e5a5 100644
--- a/pandas/tests/arrays/test_integer.py
+++ b/pandas/tests/arrays/test_integer.py
@@ -650,9 +650,10 @@ def test_preserve_dtypes(op):
 
     # groupby
     result = getattr(df.groupby("A"), op)()
+
     expected = pd.DataFrame({
         "B": np.array([1.0, 3.0]),
-        "C": np.array([1, 3], dtype="int64")
+        "C": integer_array([1, 3], dtype="Int64")
     }, index=pd.Index(['a', 'b'], name='A'))
     tm.assert_frame_equal(result, expected)
 
@@ -673,9 +674,10 @@ def test_reduce_to_float(op):
 
     # groupby
     result = getattr(df.groupby("A"), op)()
+
     expected = pd.DataFrame({
         "B": np.array([1.0, 3.0]),
-        "C": np.array([1, 3], dtype="float64")
+        "C": integer_array([1, 3], dtype="Int64")
     }, index=pd.Index(['a', 'b'], name='A'))
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py
index 1d2129312fb1b..d0ff2a02c4046 100644
--- a/pandas/tests/sparse/test_groupby.py
+++ b/pandas/tests/sparse/test_groupby.py
@@ -24,27 +24,39 @@ def test_first_last_nth(self):
         sparse_grouped = self.sparse.groupby('A')
         dense_grouped = self.dense.groupby('A')
 
+        sparse_grouped_first = sparse_grouped.first()
+        sparse_grouped_last = sparse_grouped.last()
+        sparse_grouped_nth = sparse_grouped.nth(1)
+
+        dense_grouped_first = dense_grouped.first().to_sparse()
+        dense_grouped_last = dense_grouped.last().to_sparse()
+        dense_grouped_nth = dense_grouped.nth(1).to_sparse()
+
         # TODO: shouldn't these all be spares or not?
-        tm.assert_frame_equal(sparse_grouped.first(),
-                              dense_grouped.first())
-        tm.assert_frame_equal(sparse_grouped.last(),
-                              dense_grouped.last())
-        tm.assert_frame_equal(sparse_grouped.nth(1),
-                              dense_grouped.nth(1).to_sparse())
+        tm.assert_frame_equal(sparse_grouped_first,
+                              dense_grouped_first)
+        tm.assert_frame_equal(sparse_grouped_last,
+                              dense_grouped_last)
+        tm.assert_frame_equal(sparse_grouped_nth,
+                              dense_grouped_nth)
 
     def test_aggfuncs(self):
         sparse_grouped = self.sparse.groupby('A')
         dense_grouped = self.dense.groupby('A')
 
-        tm.assert_frame_equal(sparse_grouped.mean(),
-                              dense_grouped.mean())
+        result = sparse_grouped.mean().to_sparse()
+        expected = dense_grouped.mean().to_sparse()
+
+        tm.assert_frame_equal(result, expected)
 
         # ToDo: sparse sum includes str column
         # tm.assert_frame_equal(sparse_grouped.sum(),
         #                       dense_grouped.sum())
 
-        tm.assert_frame_equal(sparse_grouped.count(),
-                              dense_grouped.count())
+        result = sparse_grouped.count().to_sparse()
+        expected = dense_grouped.count().to_sparse()
+
+        tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.parametrize("fill_value", [0, np.nan])
@@ -54,6 +66,5 @@ def test_groupby_includes_fill_value(fill_value):
                        'b': [fill_value, 1, fill_value, fill_value]})
     sdf = df.to_sparse(fill_value=fill_value)
     result = sdf.groupby('a').sum()
-    expected = df.groupby('a').sum()
-    tm.assert_frame_equal(result, expected,
-                          check_index_type=False)
+    expected = df.groupby('a').sum().to_sparse(fill_value=fill_value)
+    tm.assert_frame_equal(result, expected, check_index_type=False)
diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
index 69a0613c95475..ed29e20fd5ca5 100644
--- a/pandas/tests/test_resample.py
+++ b/pandas/tests/test_resample.py
@@ -1576,6 +1576,7 @@ def test_resample_categorical_data_with_timedeltaindex(self):
                               'Group': ['A', 'A']},
                              index=pd.to_timedelta([0, 10], unit='s'))
         expected = expected.reindex(['Group_obj', 'Group'], axis=1)
+        expected['Group'] = expected['Group_obj'].astype('category')
         tm.assert_frame_equal(result, expected)
 
     def test_resample_daily_anchored(self):