From 5bec6b31bac1fc3a81676e63db32d2297422917b Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 24 May 2019 14:44:27 -0400
Subject: [PATCH 01/10] BUG: preserve categorical & sparse types when grouping
 / pivot

preserve dtypes when applying a ufunc to a sparse dtype

closes #18502
closes #23743
---
 doc/source/whatsnew/v0.25.0.rst              | 59 ++++++++++++++++++++
 pandas/core/dtypes/cast.py                   |  8 ++-
 pandas/core/frame.py                         | 45 +++++++++++++++
 pandas/core/generic.py                       | 24 ++------
 pandas/core/groupby/generic.py               | 11 +++-
 pandas/core/groupby/groupby.py               | 27 +++++++--
 pandas/core/groupby/ops.py                   |  6 +-
 pandas/core/internals/blocks.py              | 16 +++++-
 pandas/core/nanops.py                        |  9 +--
 pandas/core/series.py                        | 27 +++++++--
 pandas/tests/groupby/test_function.py        | 47 ++++++++--------
 pandas/tests/groupby/test_nth.py             | 19 ++++---
 pandas/tests/resample/test_datetime_index.py |  6 ++
 pandas/tests/sparse/frame/test_analytics.py  | 16 +++++-
 pandas/tests/sparse/series/test_analytics.py | 16 ++++++
 pandas/tests/sparse/test_groupby.py          | 10 ++--
 pandas/tests/sparse/test_pivot.py            | 16 +++++-
 17 files changed, 282 insertions(+), 80 deletions(-)
 create mode 100644 pandas/tests/sparse/series/test_analytics.py

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index a6b74865f6619..6a792500029e0 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -316,6 +316,65 @@ of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* t
     s
     s.str.startswith(b'a')
 
+<<<<<<< HEAD
+=======
+.. _whatsnew_0250.api_breaking.ufuncs:
+
+ufuncs on Extension Dtype
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Operations with ``numpy`` ufuncs on Extension Arrays, including Sparse Dtypes will now coerce the
+resulting dtypes to same as the input dtype; previously this would coerce to a dense dtype. (:issue:`23743`)
+
+.. ipython:: python
+
+   df = pd.DataFrame({'A': pd.Series([1, np.nan, 3], dtype=pd.SparseDtype('float64', np.nan))})
+   df
+   df.dtypes
+
+*Previous Behavior*:
+
+.. code-block:: python
+
+   In [3]: np.sqrt(df).dtypes
+   Out[3]:
+   A    float64
+   dtype: object
+
+*New Behavior*:
+
+.. ipython:: python
+
+   np.sqrt(df).dtypes
+
+.. _whatsnew_0250.api_breaking.groupby_categorical:
+
+Categorical dtypes are preserved during groupby
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Previously, columns that were categorical, but not the groupby key(s) would be converted to ``object`` dtype during groupby operations. Pandas now will preserve these dtypes. (:issue:`18502`)
+
+.. ipython:: python
+
+   df = pd.DataFrame({'payload': [-1,-2,-1,-2],
+                      'col': pd.Categorical(["foo", "bar", "bar", "qux"], ordered=True)})
+   df
+   df.dtypes
+
+*Previous Behavior*:
+
+.. code-block:: python
+
+   In [5]: df.groupby('payload').first().col.dtype
+   Out[5]: dtype('O')
+
+*New Behavior*:
+
+.. ipython:: python
+
+   df.groupby('payload').first().col.dtype
+
+
 .. _whatsnew_0250.api_breaking.incompatible_index_unions:
 
 Incompatible Index Type Unions
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 2f66e9ed46aa0..0268f8fbdf467 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -605,7 +605,7 @@ def conv(r, dtype):
     return [conv(r, dtype) for r, dtype in zip(result, dtypes)]
 
 
-def astype_nansafe(arr, dtype, copy=True, skipna=False):
+def astype_nansafe(arr, dtype, copy=True, skipna=False, casting='unsafe'):
     """
     Cast the elements of an array to a given dtype a nan-safe manner.
 
@@ -616,8 +616,10 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False):
     copy : bool, default True
         If False, a view will be attempted but may fail, if
         e.g. the item sizes don't align.
-    skipna: bool, default False
+    skipna : bool, default False
         Whether or not we should skip NaN when casting as a string-type.
+    casting : {‘no’, ‘equiv’, ‘safe’, ‘same_kind’, ‘unsafe’}
+        optional, default 'unsafe'
 
     Raises
     ------
@@ -703,7 +705,7 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False):
 
     if copy or is_object_dtype(arr) or is_object_dtype(dtype):
         # Explicit copy, or required since NumPy can't view from / to object.
-        return arr.astype(dtype, copy=True)
+        return arr.astype(dtype, copy=True, casting=casting)
 
     return arr.view(dtype)
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6746844f4b1fa..ad5a04f8cb934 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2641,6 +2641,51 @@ def transpose(self, *args, **kwargs):
 
     T = property(transpose)
 
+    # ----------------------------------------------------------------------
+    # Array Interface
+
+    # This is also set in IndexOpsMixin
+    # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented
+    __array_priority__ = 1000
+
+    def __array__(self, dtype=None):
+        return com.values_from_object(self)
+
+    def __array_wrap__(self, result: np.ndarray, context=None) -> 'DataFrame':
+        """
+        We are called post ufunc; reconstruct the original object and dtypes.
+
+        Parameters
+        ----------
+        result : np.ndarray
+        context
+
+        Returns
+        -------
+        DataFrame
+        """
+
+        d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False)
+        result = self._constructor(result, **d)
+
+        # we try to cast extension array types back to the original
+        # TODO: this fails with duplicates, ugh
+        if self._data.any_extension_types:
+            result = result.astype(self.dtypes,
+                                   copy=False,
+                                   errors='ignore',
+                                   casting='same_kind')
+
+        return result.__finalize__(self)
+
+    # ideally we would define this to avoid the getattr checks, but
+    # is slower
+    # @property
+    # def __array_interface__(self):
+    #    """ provide numpy array interface method """
+    #    values = self.values
+    #    return dict(typestr=values.dtype.str,shape=values.shape,data=values)
+
     # ----------------------------------------------------------------------
     # Picklability
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 360576ffdb00a..b614298bb912c 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1919,25 +1919,6 @@ def empty(self):
     # ----------------------------------------------------------------------
     # Array Interface
 
-    # This is also set in IndexOpsMixin
-    # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented
-    __array_priority__ = 1000
-
-    def __array__(self, dtype=None):
-        return com.values_from_object(self)
-
-    def __array_wrap__(self, result, context=None):
-        d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False)
-        return self._constructor(result, **d).__finalize__(self)
-
-    # ideally we would define this to avoid the getattr checks, but
-    # is slower
-    # @property
-    # def __array_interface__(self):
-    #    """ provide numpy array interface method """
-    #    values = self.values
-    #    return dict(typestr=values.dtype.str,shape=values.shape,data=values)
-
     def to_dense(self):
         """
         Return dense representation of NDFrame (as opposed to sparse).
@@ -5693,6 +5674,11 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs):
                                          **kwargs)
             return self._constructor(new_data).__finalize__(self)
 
+        if not results:
+            if copy:
+                self = self.copy()
+            return self
+
         # GH 19920: retain column metadata after concat
         result = pd.concat(results, axis=1, copy=False)
         result.columns = self.columns
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 35ffa552913ae..20b7a595f49e9 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -156,12 +156,19 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True,
 
                 obj = self.obj[data.items[locs]]
                 s = groupby(obj, self.grouper)
-                result = s.aggregate(lambda x: alt(x, axis=self.axis))
+                try:
+                    result = s.aggregate(lambda x: alt(x, axis=self.axis))
+                except Exception:
+                    # we may have an exception in trying to aggregate
+                    # continue and exclude the block
+                    pass
 
             finally:
 
+                dtype = block.values.dtype
+
                 # see if we can cast the block back to the original dtype
-                result = block._try_coerce_and_cast_result(result)
+                result = block._try_coerce_and_cast_result(result, dtype=dtype)
                 newb = block.make_block(result)
 
             new_items.append(locs)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 43950f2f503c8..2c042d55dfa4b 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -786,6 +786,8 @@ def _try_cast(self, result, obj, numeric_only=False):
             elif is_extension_array_dtype(dtype):
                 # The function can return something of any type, so check
                 # if the type is compatible with the calling EA.
+
+                # return the same type (Series) as our caller
                 try:
                     result = obj._values._from_sequence(result, dtype=dtype)
                 except Exception:
@@ -1157,7 +1159,8 @@ def mean(self, *args, **kwargs):
         """
         nv.validate_groupby_func('mean', args, kwargs, ['numeric_only'])
         try:
-            return self._cython_agg_general('mean', **kwargs)
+            return self._cython_agg_general(
+                'mean', alt=lambda x, axis: Series(x).mean(**kwargs), **kwargs)
         except GroupByError:
             raise
         except Exception:  # pragma: no cover
@@ -1179,7 +1182,11 @@ def median(self, **kwargs):
             Median of values within each group.
         """
         try:
-            return self._cython_agg_general('median', **kwargs)
+            return self._cython_agg_general(
+                'median',
+                alt=lambda x,
+                axis: Series(x).median(**kwargs),
+                **kwargs)
         except GroupByError:
             raise
         except Exception:  # pragma: no cover
@@ -1235,7 +1242,10 @@ def var(self, ddof=1, *args, **kwargs):
         nv.validate_groupby_func('var', args, kwargs)
         if ddof == 1:
             try:
-                return self._cython_agg_general('var', **kwargs)
+                return self._cython_agg_general(
+                    'var',
+                    alt=lambda x, axis: Series(x).var(ddof=ddof, **kwargs),
+                    **kwargs)
             except Exception:
                 f = lambda x: x.var(ddof=ddof, **kwargs)
                 with _group_selection_context(self):
@@ -1263,7 +1273,6 @@ def sem(self, ddof=1):
         Series or DataFrame
             Standard error of the mean of values within each group.
         """
-
         return self.std(ddof=ddof) / np.sqrt(self.count())
 
     @Substitution(name='groupby')
@@ -1320,6 +1329,16 @@ def f(self, **kwargs):
                 except Exception:
                     result = self.aggregate(
                         lambda x: npfunc(x, axis=self.axis))
+
+                    # coerce the columns if we can
+                    if isinstance(result, DataFrame):
+                        for col in result.columns:
+                            result[col] = self._try_cast(
+                                result[col], self.obj[col])
+                    else:
+                        result = self._try_cast(
+                            result, self.obj)
+
                     if _convert:
                         result = result._convert(datetime=True)
                     return result
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 010047a8be4ed..38478be5a8e07 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -19,7 +19,7 @@
 from pandas.core.dtypes.common import (
     ensure_float64, ensure_int64, ensure_int_or_float, ensure_object,
     ensure_platform_int, is_bool_dtype, is_categorical_dtype, is_complex_dtype,
-    is_datetime64_any_dtype, is_integer_dtype, is_numeric_dtype,
+    is_datetime64_any_dtype, is_integer_dtype, is_numeric_dtype, is_sparse,
     is_timedelta64_dtype, needs_i8_conversion)
 from pandas.core.dtypes.missing import _maybe_fill, isna
 
@@ -451,9 +451,9 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1,
 
         # categoricals are only 1d, so we
         # are not setup for dim transforming
-        if is_categorical_dtype(values):
+        if is_categorical_dtype(values) or is_sparse(values):
             raise NotImplementedError(
-                "categoricals are not support in cython ops ATM")
+                "{} are not support in cython ops".format(values.dtype))
         elif is_datetime64_any_dtype(values):
             if how in ['add', 'prod', 'cumsum', 'cumprod']:
                 raise NotImplementedError(
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 4cc6c86417b3b..429b2b064c702 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -600,7 +600,8 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
                         values = self.get_values(dtype=dtype)
 
                     # _astype_nansafe works fine with 1-d only
-                    values = astype_nansafe(values.ravel(), dtype, copy=True)
+                    values = astype_nansafe(
+                        values.ravel(), dtype, copy=True, **kwargs)
 
                 # TODO(extension)
                 # should we make this attribute?
@@ -1767,6 +1768,19 @@ def _slice(self, slicer):
 
         return self.values[slicer]
 
+    def _try_cast_result(self, result, dtype=None):
+        """
+        if we have an operation that operates on for example floats
+        we want to try to cast back to our EA here if possible
+        """
+        try:
+            result = self._holder._from_sequence(
+                np.asarray(result).ravel(), dtype=dtype)
+        except Exception:
+            pass
+
+        return result
+
     def formatting_values(self):
         # Deprecating the ability to override _formatting_values.
         # Do the warning here, it's only user in pandas, since we
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 7923e463c7719..24a28bf0005cb 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -72,11 +72,12 @@ def _f(*args, **kwargs):
 
 class bottleneck_switch:
 
-    def __init__(self, **kwargs):
+    def __init__(self, name=None, **kwargs):
+        self.name = name
         self.kwargs = kwargs
 
     def __call__(self, alt):
-        bn_name = alt.__name__
+        bn_name = self.name or alt.__name__
 
         try:
             bn_func = getattr(bn, bn_name)
@@ -804,7 +805,8 @@ def nansem(values, axis=None, skipna=True, ddof=1, mask=None):
 
 
 def _nanminmax(meth, fill_value_typ):
-    @bottleneck_switch()
+
+    @bottleneck_switch(name='nan' + meth)
     def reduction(values, axis=None, skipna=True, mask=None):
 
         values, mask, dtype, dtype_max, fill_value = _get_values(
@@ -824,7 +826,6 @@ def reduction(values, axis=None, skipna=True, mask=None):
         result = _wrap_results(result, dtype, fill_value)
         return _maybe_null_out(result, axis, mask, values.shape)
 
-    reduction.__name__ = 'nan' + meth
     return reduction
 
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index c4a449154860f..485c7db92bb2d 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -762,12 +762,31 @@ def __array__(self, dtype=None):
             dtype = 'M8[ns]'
         return np.asarray(self.array, dtype)
 
-    def __array_wrap__(self, result, context=None):
+    def __array_wrap__(self, result: np.ndarray, context=None) -> 'Series':
         """
-        Gets called after a ufunc.
+        We are called post ufunc; reconstruct the original object and dtypes.
+
+        Parameters
+        ----------
+        result : np.ndarray
+        context
+
+        Returns
+        -------
+        Series
         """
-        return self._constructor(result, index=self.index,
-                                 copy=False).__finalize__(self)
+
+        result = self._constructor(result, index=self.index,
+                                   copy=False)
+
+        # we try to cast extension array types back to the original
+        if is_extension_array_dtype(self):
+            result = result.astype(self.dtype,
+                                   copy=False,
+                                   errors='ignore',
+                                   casting='same_kind')
+
+        return result.__finalize__(self)
 
     def __array_prepare__(self, result, context=None):
         """
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 3d9bfcd126377..379b6db2b650e 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -144,6 +144,7 @@ def test_arg_passthru():
         index=Index([1, 2], name='group'),
         columns=['int', 'float', 'category_int',
                  'datetime', 'datetimetz', 'timedelta'])
+
     for attr in ['mean', 'median']:
         f = getattr(df.groupby('group'), attr)
         result = f()
@@ -459,35 +460,35 @@ def test_groupby_cumprod():
     tm.assert_series_equal(actual, expected)
 
 
-def test_ops_general():
-    ops = [('mean', np.mean),
-           ('median', np.median),
-           ('std', np.std),
-           ('var', np.var),
-           ('sum', np.sum),
-           ('prod', np.prod),
-           ('min', np.min),
-           ('max', np.max),
-           ('first', lambda x: x.iloc[0]),
-           ('last', lambda x: x.iloc[-1]),
-           ('count', np.size), ]
+def scipy_sem(*args, **kwargs):
     try:
         from scipy.stats import sem
+        return sem(*args, ddof=1, **kwargs)
     except ImportError:
-        pass
-    else:
-        ops.append(('sem', sem))
+        pytest.skip("No Scipy installed")
+
+
+@pytest.mark.parametrize(
+    'op,targop',
+    [('mean', np.mean),
+     ('median', np.median),
+     ('std', np.std),
+     ('var', np.var),
+     ('sum', np.sum),
+     ('prod', np.prod),
+     ('min', np.min),
+     ('max', np.max),
+     ('first', lambda x: x.iloc[0]),
+     ('last', lambda x: x.iloc[-1]),
+     ('count', np.size),
+     ('sem', scipy_sem)])
+def test_ops_general(op, targop):
     df = DataFrame(np.random.randn(1000))
     labels = np.random.randint(0, 50, size=1000).astype(float)
 
-    for op, targop in ops:
-        result = getattr(df.groupby(labels), op)().astype(float)
-        expected = df.groupby(labels).agg(targop)
-        try:
-            tm.assert_frame_equal(result, expected)
-        except BaseException as exc:
-            exc.args += ('operation: %s' % op, )
-            raise
+    result = getattr(df.groupby(labels), op)().astype(float)
+    expected = df.groupby(labels).agg(targop)
+    tm.assert_frame_equal(result, expected)
 
 
 def test_max_nan_bug():
diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py
index 6a08a8d79b63e..b174fb0e0b6f9 100644
--- a/pandas/tests/groupby/test_nth.py
+++ b/pandas/tests/groupby/test_nth.py
@@ -282,18 +282,21 @@ def test_first_last_tz(data, expected_first, expected_last):
 ])
 def test_first_last_tz_multi_column(method, ts, alpha):
     # GH 21603
+    category_string = pd.Series(list('abc')).astype(
+        'category')
     df = pd.DataFrame({'group': [1, 1, 2],
-                       'category_string': pd.Series(list('abc')).astype(
-                           'category'),
+                       'category_string': category_string,
                        'datetimetz': pd.date_range('20130101', periods=3,
                                                    tz='US/Eastern')})
     result = getattr(df.groupby('group'), method)()
-    expepcted = pd.DataFrame({'category_string': [alpha, 'c'],
-                              'datetimetz': [ts,
-                                             Timestamp('2013-01-03',
-                                                       tz='US/Eastern')]},
-                             index=pd.Index([1, 2], name='group'))
-    assert_frame_equal(result, expepcted)
+    expected = pd.DataFrame(
+        {'category_string': pd.Categorical(
+            [alpha, 'c'], dtype=category_string.dtype),
+         'datetimetz': [ts,
+                        Timestamp('2013-01-03',
+                                  tz='US/Eastern')]},
+        index=pd.Index([1, 2], name='group'))
+    assert_frame_equal(result, expected)
 
 
 def test_nth_multi_index_as_expected():
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index 5711174ef0c9f..830ba6062cc72 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -112,6 +112,12 @@ def test_resample_integerarray():
                       dtype="Int64")
     assert_series_equal(result, expected)
 
+    result = ts.resample('3T').mean()
+    expected = Series([1, 4, 7],
+                      index=pd.date_range('1/1/2000', periods=3, freq='3T'),
+                      dtype='Int64')
+    assert_series_equal(result, expected)
+
 
 def test_resample_basic_grouper(series):
     s = series
diff --git a/pandas/tests/sparse/frame/test_analytics.py b/pandas/tests/sparse/frame/test_analytics.py
index ae97682f297ad..7054f9a9dd65f 100644
--- a/pandas/tests/sparse/frame/test_analytics.py
+++ b/pandas/tests/sparse/frame/test_analytics.py
@@ -1,7 +1,8 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, SparseDataFrame, SparseSeries
+from pandas import (
+    DataFrame, Series, SparseDataFrame, SparseDtype, SparseSeries)
 from pandas.util import testing as tm
 
 
@@ -39,3 +40,16 @@ def test_quantile_multi():
 
     tm.assert_frame_equal(result, dense_expected)
     tm.assert_sp_frame_equal(result, sparse_expected)
+
+
+@pytest.mark.parametrize('func', [np.exp, np.sqrt], ids=lambda x: x.__name__)
+def test_ufunc(func):
+    # GH 23743
+    # assert we preserve the incoming dtype on ufunc operation
+    df = DataFrame(
+        {'A': Series([1, np.nan, 3], dtype=SparseDtype('float64', np.nan))})
+    result = func(df)
+    expected = DataFrame(
+        {'A': Series(func([1, np.nan, 3]),
+                     dtype=SparseDtype('float64', np.nan))})
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/sparse/series/test_analytics.py b/pandas/tests/sparse/series/test_analytics.py
new file mode 100644
index 0000000000000..fe2eaf0e4de4c
--- /dev/null
+++ b/pandas/tests/sparse/series/test_analytics.py
@@ -0,0 +1,16 @@
+import numpy as np
+import pytest
+
+from pandas import Series, SparseDtype
+from pandas.util import testing as tm
+
+
+@pytest.mark.parametrize('func', [np.exp, np.sqrt], ids=lambda x: x.__name__)
+def test_ufunc(func):
+    # GH 23743
+    # assert we preserve the incoming dtype on ufunc operation
+    s = Series([1, np.nan, 3], dtype=SparseDtype('float64', np.nan))
+    result = func(s)
+    expected = Series(func([1, np.nan, 3]),
+                      dtype=SparseDtype('float64', np.nan))
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py
index 531a4360c78a2..bf6055bc12725 100644
--- a/pandas/tests/sparse/test_groupby.py
+++ b/pandas/tests/sparse/test_groupby.py
@@ -29,11 +29,10 @@ def test_first_last_nth(self):
         sparse_grouped_last = sparse_grouped.last()
         sparse_grouped_nth = sparse_grouped.nth(1)
 
-        dense_grouped_first = dense_grouped.first().to_sparse()
-        dense_grouped_last = dense_grouped.last().to_sparse()
-        dense_grouped_nth = dense_grouped.nth(1).to_sparse()
+        dense_grouped_first = pd.DataFrame(dense_grouped.first().to_sparse())
+        dense_grouped_last = pd.DataFrame(dense_grouped.last().to_sparse())
+        dense_grouped_nth = pd.DataFrame(dense_grouped.nth(1).to_sparse())
 
-        # TODO: shouldn't these all be spares or not?
         tm.assert_frame_equal(sparse_grouped_first,
                               dense_grouped_first)
         tm.assert_frame_equal(sparse_grouped_last,
@@ -69,5 +68,6 @@ def test_groupby_includes_fill_value(fill_value):
                        'b': [fill_value, 1, fill_value, fill_value]})
     sdf = df.to_sparse(fill_value=fill_value)
     result = sdf.groupby('a').sum()
-    expected = df.groupby('a').sum().to_sparse(fill_value=fill_value)
+    expected = pd.DataFrame(df.groupby('a').sum().to_sparse(
+        fill_value=fill_value))
     tm.assert_frame_equal(result, expected, check_index_type=False)
diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py
index 114e7b4bacd94..5c070ba5a9a5b 100644
--- a/pandas/tests/sparse/test_pivot.py
+++ b/pandas/tests/sparse/test_pivot.py
@@ -47,10 +47,20 @@ def test_pivot_table(self):
         #                            values='E', aggfunc='sum')
         # tm.assert_frame_equal(res_sparse, res_dense)
 
-    def test_pivot_table_multi(self):
+    @pytest.mark.parametrize(
+        'func',
+        ['mean',
+         'std',
+         'var',
+         'sem',
+         'median',
+         'first',
+         'last'])
+    def test_pivot_table_multi(self, func):
+
         res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
-                                    values=['D', 'E'])
+                                    values=['D', 'E'], aggfunc=func)
         res_dense = pd.pivot_table(self.dense, index='A', columns='B',
-                                   values=['D', 'E'])
+                                   values=['D', 'E'], aggfunc=func)
         res_dense = res_dense.apply(lambda x: x.astype("Sparse[float64]"))
         tm.assert_frame_equal(res_sparse, res_dense)

From d1490a2ef5ad22aeb72a6bb31ca16491bb2b96d6 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 28 May 2019 22:09:08 -0400
Subject: [PATCH 02/10] lint

---
 doc/source/whatsnew/v0.25.0.rst | 9 ++++++---
 pandas/core/groupby/groupby.py  | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 6a792500029e0..5c21f446b84a7 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -328,7 +328,9 @@ resulting dtypes to same as the input dtype; previously this would coerce to a d
 
 .. ipython:: python
 
-   df = pd.DataFrame({'A': pd.Series([1, np.nan, 3], dtype=pd.SparseDtype('float64', np.nan))})
+   df = pd.DataFrame(
+       {'A': pd.Series([1, np.nan, 3],
+                       dtype=pd.SparseDtype('float64', np.nan))})
    df
    df.dtypes
 
@@ -356,8 +358,9 @@ Previously, columns that were categorical, but not the groupby key(s) would be c
 
 .. ipython:: python
 
-   df = pd.DataFrame({'payload': [-1,-2,-1,-2],
-                      'col': pd.Categorical(["foo", "bar", "bar", "qux"], ordered=True)})
+   df = pd.DataFrame(
+       {'payload': [-1, -2, -1, -2],
+        'col': pd.Categorical(["foo", "bar", "bar", "qux"], ordered=True)})
    df
    df.dtypes
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 2c042d55dfa4b..6274f45427db6 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1185,7 +1185,7 @@ def median(self, **kwargs):
             return self._cython_agg_general(
                 'median',
                 alt=lambda x,
-                axis: Series(x).median(**kwargs),
+                axis: Series(x).median(axis=axis, **kwargs),
                 **kwargs)
         except GroupByError:
             raise

From 561e960fab5dddcbf86e5b34fa434ad778d0e983 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 29 May 2019 20:54:00 -0400
Subject: [PATCH 03/10] review comments

---
 pandas/core/frame.py                         |  7 ++++---
 pandas/core/series.py                        |  7 ++++---
 pandas/tests/groupby/test_function.py        | 11 ++++-------
 pandas/tests/sparse/frame/test_analytics.py  |  2 +-
 pandas/tests/sparse/series/test_analytics.py |  2 +-
 pandas/tests/sparse/test_pivot.py            | 13 ++++++++-----
 6 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ad5a04f8cb934..530590ea5dc45 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -16,7 +16,7 @@
 import sys
 import warnings
 from textwrap import dedent
-from typing import FrozenSet, List, Optional, Set, Type, Union
+from typing import FrozenSet, List, Optional, Tuple, Set, Type, Union
 
 import numpy as np
 import numpy.ma as ma
@@ -2651,14 +2651,15 @@ def transpose(self, *args, **kwargs):
     def __array__(self, dtype=None):
         return com.values_from_object(self)
 
-    def __array_wrap__(self, result: np.ndarray, context=None) -> 'DataFrame':
+    def __array_wrap__(self, result: np.ndarray,
+                       context: Optional[Tuple] = None) -> 'DataFrame':
         """
         We are called post ufunc; reconstruct the original object and dtypes.
 
         Parameters
         ----------
         result : np.ndarray
-        context
+        context : tuple, optional
 
         Returns
         -------
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 485c7db92bb2d..a15343f2806ba 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -6,6 +6,7 @@
 from shutil import get_terminal_size
 from textwrap import dedent
 import warnings
+from typing import Optional, Tuple
 
 import numpy as np
 
@@ -762,20 +763,20 @@ def __array__(self, dtype=None):
             dtype = 'M8[ns]'
         return np.asarray(self.array, dtype)
 
-    def __array_wrap__(self, result: np.ndarray, context=None) -> 'Series':
+    def __array_wrap__(self, result: np.ndarray,
+                       context: Optional[Tuple] = None) -> 'Series':
         """
         We are called post ufunc; reconstruct the original object and dtypes.
 
         Parameters
         ----------
         result : np.ndarray
-        context
+        context : tuple, optional
 
         Returns
         -------
         Series
         """
-
         result = self._constructor(result, index=self.index,
                                    copy=False)
 
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 379b6db2b650e..0ede4cab269f2 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -12,7 +12,7 @@
 from pandas import (
     DataFrame, Index, MultiIndex, Series, Timestamp, date_range, isna)
 import pandas.core.nanops as nanops
-from pandas.util import testing as tm
+from pandas.util import _test_decorators as td, testing as tm
 
 
 @pytest.mark.parametrize("agg_func", ['any', 'all'])
@@ -461,11 +461,8 @@ def test_groupby_cumprod():
 
 
 def scipy_sem(*args, **kwargs):
-    try:
-        from scipy.stats import sem
-        return sem(*args, ddof=1, **kwargs)
-    except ImportError:
-        pytest.skip("No Scipy installed")
+    from scipy.stats import sem
+    return sem(*args, ddof=1, **kwargs)
 
 
 @pytest.mark.parametrize(
@@ -481,7 +478,7 @@ def scipy_sem(*args, **kwargs):
      ('first', lambda x: x.iloc[0]),
      ('last', lambda x: x.iloc[-1]),
      ('count', np.size),
-     ('sem', scipy_sem)])
+     pytest.param('sem', scipy_sem, mark=td._skip_if_no_scipy)])
 def test_ops_general(op, targop):
     df = DataFrame(np.random.randn(1000))
     labels = np.random.randint(0, 50, size=1000).astype(float)
diff --git a/pandas/tests/sparse/frame/test_analytics.py b/pandas/tests/sparse/frame/test_analytics.py
index 7054f9a9dd65f..b187976703447 100644
--- a/pandas/tests/sparse/frame/test_analytics.py
+++ b/pandas/tests/sparse/frame/test_analytics.py
@@ -42,7 +42,7 @@ def test_quantile_multi():
     tm.assert_sp_frame_equal(result, sparse_expected)
 
 
-@pytest.mark.parametrize('func', [np.exp, np.sqrt], ids=lambda x: x.__name__)
+@pytest.mark.parametrize('func', [np.exp, np.sqrt], ids=str)
 def test_ufunc(func):
     # GH 23743
     # assert we preserve the incoming dtype on ufunc operation
diff --git a/pandas/tests/sparse/series/test_analytics.py b/pandas/tests/sparse/series/test_analytics.py
index fe2eaf0e4de4c..bac778f09fe66 100644
--- a/pandas/tests/sparse/series/test_analytics.py
+++ b/pandas/tests/sparse/series/test_analytics.py
@@ -5,7 +5,7 @@
 from pandas.util import testing as tm
 
 
-@pytest.mark.parametrize('func', [np.exp, np.sqrt], ids=lambda x: x.__name__)
+@pytest.mark.parametrize('func', [np.exp, np.sqrt], ids=str)
 def test_ufunc(func):
     # GH 23743
     # assert we preserve the incoming dtype on ufunc operation
diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py
index 5c070ba5a9a5b..fd29c918a7c93 100644
--- a/pandas/tests/sparse/test_pivot.py
+++ b/pandas/tests/sparse/test_pivot.py
@@ -56,11 +56,14 @@ def test_pivot_table(self):
          'median',
          'first',
          'last'])
-    def test_pivot_table_multi(self, func):
+    @pytest.mark.parametrize('dropna', [True, False])
+    def test_pivot_table_multi(self, func, dropna):
 
-        res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
-                                    values=['D', 'E'], aggfunc=func)
-        res_dense = pd.pivot_table(self.dense, index='A', columns='B',
-                                   values=['D', 'E'], aggfunc=func)
+        res_sparse = pd.pivot_table(
+            self.sparse, index='A', columns='B',
+            values=['D', 'E'], aggfunc=func, dropna=dropna)
+        res_dense = pd.pivot_table(
+            self.dense, index='A', columns='B',
+            values=['D', 'E'], aggfunc=func, dropna=dropna)
         res_dense = res_dense.apply(lambda x: x.astype("Sparse[float64]"))
         tm.assert_frame_equal(res_sparse, res_dense)

From 28be4d9f21493c58dbe1d12b83166b327d20038b Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 29 May 2019 21:20:53 -0400
Subject: [PATCH 04/10] use marks

---
 pandas/tests/groupby/test_function.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 0ede4cab269f2..2a77b1b96a662 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -478,7 +478,9 @@ def scipy_sem(*args, **kwargs):
      ('first', lambda x: x.iloc[0]),
      ('last', lambda x: x.iloc[-1]),
      ('count', np.size),
-     pytest.param('sem', scipy_sem, mark=td._skip_if_no_scipy)])
+     pytest.param(
+         'sem', scipy_sem, marks=[pytest.mark.skipif(
+             td._skip_if_no_scipy(), reason='scipy not installed')])])
 def test_ops_general(op, targop):
     df = DataFrame(np.random.randn(1000))
     labels = np.random.randint(0, 50, size=1000).astype(float)

From d6db2ea2eb67a7a831727bb1be5e093c1abc80ff Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 2 Jun 2019 16:00:36 -0400
Subject: [PATCH 05/10] review comments

---
 doc/source/whatsnew/v0.25.0.rst   | 2 +-
 pandas/core/series.py             | 2 +-
 pandas/tests/sparse/test_pivot.py | 4 +++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 5c21f446b84a7..6dacb9dae834a 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -323,7 +323,7 @@ of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* t
 ufuncs on Extension Dtype
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Operations with ``numpy`` ufuncs on Extension Arrays, including Sparse Dtypes will now coerce the
+Operations with ``numpy`` ufuncs on Extension Arrays, including Sparse Dtypes will now preserve the
 resulting dtypes to same as the input dtype; previously this would coerce to a dense dtype. (:issue:`23743`)
 
 .. ipython:: python
diff --git a/pandas/core/series.py b/pandas/core/series.py
index a15343f2806ba..59ae60cfb38d7 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -5,8 +5,8 @@
 from io import StringIO
 from shutil import get_terminal_size
 from textwrap import dedent
-import warnings
 from typing import Optional, Tuple
+import warnings
 
 import numpy as np
 
diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py
index fd29c918a7c93..f545ce0310a2a 100644
--- a/pandas/tests/sparse/test_pivot.py
+++ b/pandas/tests/sparse/test_pivot.py
@@ -2,6 +2,7 @@
 import pytest
 
 import pandas as pd
+from pandas import _np_version_under1p17
 import pandas.util.testing as tm
 
 
@@ -53,7 +54,8 @@ def test_pivot_table(self):
          'std',
          'var',
          'sem',
-         'median',
+         pytest.param('median', marks=pytest.mark.xfail(
+             not _np_version_under1p17, reason="fails on numpy > 1.16")),
          'first',
          'last'])
     @pytest.mark.parametrize('dropna', [True, False])

From 7c29393a4545010cc0fc069c70b5c040bea0e23f Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 2 Jun 2019 17:04:33 -0400
Subject: [PATCH 06/10] allow coercing casting

---
 pandas/core/arrays/sparse.py                 | 14 +++++++++++++-
 pandas/core/series.py                        |  5 +----
 pandas/tests/sparse/frame/test_analytics.py  | 12 ++++++++----
 pandas/tests/sparse/series/test_analytics.py | 12 ++++++++----
 4 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
index 3dda6868a80da..dadbd5e23dce9 100644
--- a/pandas/core/arrays/sparse.py
+++ b/pandas/core/arrays/sparse.py
@@ -1926,8 +1926,20 @@ def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False):
 
     index = _make_index(length, indices, kind)
     sparsified_values = arr[mask]
+
+    # careful about casting here
+    # as we could easily specify a type that cannot hold the resulting values
+    # e.g. integer when we have floats
     if dtype is not None:
-        sparsified_values = astype_nansafe(sparsified_values, dtype=dtype)
+        try:
+            sparsified_values = astype_nansafe(
+                sparsified_values, dtype=dtype, casting='same_kind')
+        except TypeError:
+            dtype = 'float64'
+            sparsified_values = astype_nansafe(
+                sparsified_values, dtype=dtype, casting='unsafe')
+
+
     # TODO: copy
     return sparsified_values, index, fill_value
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 59ae60cfb38d7..2432d801fe07e 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -782,10 +782,7 @@ def __array_wrap__(self, result: np.ndarray,
 
         # we try to cast extension array types back to the original
         if is_extension_array_dtype(self):
-            result = result.astype(self.dtype,
-                                   copy=False,
-                                   errors='ignore',
-                                   casting='same_kind')
+            result = result.astype(self.dtype, copy=False)
 
         return result.__finalize__(self)
 
diff --git a/pandas/tests/sparse/frame/test_analytics.py b/pandas/tests/sparse/frame/test_analytics.py
index b187976703447..a6d2225377dc3 100644
--- a/pandas/tests/sparse/frame/test_analytics.py
+++ b/pandas/tests/sparse/frame/test_analytics.py
@@ -42,14 +42,18 @@ def test_quantile_multi():
     tm.assert_sp_frame_equal(result, sparse_expected)
 
 
+@pytest.mark.parametrize(
+    'data, dtype',
+    [([1, np.nan, 3], SparseDtype('float64', np.nan)),
+     ([1, 2, 3], SparseDtype('int'))])
 @pytest.mark.parametrize('func', [np.exp, np.sqrt], ids=str)
-def test_ufunc(func):
+def test_ufunc(data, dtype, func):
     # GH 23743
     # assert we preserve the incoming dtype on ufunc operation
     df = DataFrame(
-        {'A': Series([1, np.nan, 3], dtype=SparseDtype('float64', np.nan))})
+        {'A': Series(data, dtype=dtype)})
     result = func(df)
     expected = DataFrame(
-        {'A': Series(func([1, np.nan, 3]),
-                     dtype=SparseDtype('float64', np.nan))})
+        {'A': Series(func(data),
+                     dtype=dtype)})
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/sparse/series/test_analytics.py b/pandas/tests/sparse/series/test_analytics.py
index bac778f09fe66..97a86a7dec94d 100644
--- a/pandas/tests/sparse/series/test_analytics.py
+++ b/pandas/tests/sparse/series/test_analytics.py
@@ -5,12 +5,16 @@
 from pandas.util import testing as tm
 
 
+@pytest.mark.parametrize(
+    'data, dtype',
+    [([1, np.nan, 3], SparseDtype('float64', np.nan)),
+     ([1, 2, 3], SparseDtype('int'))])
 @pytest.mark.parametrize('func', [np.exp, np.sqrt], ids=str)
-def test_ufunc(func):
+def test_ufunc(data, dtype, func):
     # GH 23743
     # assert we preserve the incoming dtype on ufunc operation
-    s = Series([1, np.nan, 3], dtype=SparseDtype('float64', np.nan))
+    s = Series(data, dtype=dtype)
     result = func(s)
-    expected = Series(func([1, np.nan, 3]),
-                      dtype=SparseDtype('float64', np.nan))
+    expected = Series(func(data),
+                      dtype=dtype)
     tm.assert_series_equal(result, expected)

From 0662f2b1007cafb9af5837fa61198ef7bd4631a1 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 2 Jun 2019 19:18:23 -0400
Subject: [PATCH 07/10] infer types

---
 pandas/core/arrays/sparse.py                 | 19 +++++++++----------
 pandas/core/internals/construction.py        |  5 ++++-
 pandas/core/sparse/frame.py                  | 14 ++++++++++----
 pandas/tests/sparse/frame/test_analytics.py  |  2 +-
 pandas/tests/sparse/series/test_analytics.py |  2 +-
 5 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
index dadbd5e23dce9..68fd58e63ba02 100644
--- a/pandas/core/arrays/sparse.py
+++ b/pandas/core/arrays/sparse.py
@@ -1927,18 +1927,17 @@ def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False):
     index = _make_index(length, indices, kind)
     sparsified_values = arr[mask]
 
-    # careful about casting here
-    # as we could easily specify a type that cannot hold the resulting values
-    # e.g. integer when we have floats
+    # careful about casting here as we could easily specify a type that
+    # cannot hold the resulting values, e.g. integer when we have floats
+    # if we don't have an object specified then use this as the cast
     if dtype is not None:
-        try:
-            sparsified_values = astype_nansafe(
-                sparsified_values, dtype=dtype, casting='same_kind')
-        except TypeError:
-            dtype = 'float64'
-            sparsified_values = astype_nansafe(
-                sparsified_values, dtype=dtype, casting='unsafe')
 
+        ok_to_cast = all(not (is_object_dtype(t) or is_bool_dtype(t))
+                         for t in (dtype, sparsified_values.dtype))
+        if ok_to_cast:
+            dtype = find_common_type([dtype, sparsified_values.dtype])
+        sparsified_values = astype_nansafe(
+            sparsified_values, dtype=dtype)
 
     # TODO: copy
     return sparsified_values, index, fill_value
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 2616f0aa97d0d..8e1609c1364fd 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -666,7 +666,10 @@ def sanitize_array(data, index, dtype=None, copy=False,
                 data = np.array(data, dtype=dtype, copy=False)
             subarr = np.array(data, dtype=object, copy=copy)
 
-    if is_object_dtype(subarr.dtype) and dtype != 'object':
+    if (not (is_extension_array_dtype(subarr.dtype) or
+             is_extension_array_dtype(dtype)) and
+            is_object_dtype(subarr.dtype) and
+            not is_object_dtype(dtype)):
         inferred = lib.infer_dtype(subarr, skipna=False)
         if inferred == 'period':
             try:
diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
index 67ecbcbea67f9..778fff249817d 100644
--- a/pandas/core/sparse/frame.py
+++ b/pandas/core/sparse/frame.py
@@ -284,20 +284,26 @@ def _unpickle_sparse_frame_compat(self, state):
     def to_dense(self):
         return SparseFrameAccessor(self).to_dense()
 
-    def _apply_columns(self, func):
+    def _apply_columns(self, func, *args, **kwargs):
         """
         Get new SparseDataFrame applying func to each columns
         """
 
-        new_data = {col: func(series)
+        new_data = {col: func(series, *args, **kwargs)
                     for col, series in self.items()}
 
         return self._constructor(
             data=new_data, index=self.index, columns=self.columns,
             default_fill_value=self.default_fill_value).__finalize__(self)
 
-    def astype(self, dtype):
-        return self._apply_columns(lambda x: x.astype(dtype))
+    def astype(self, dtype, **kwargs):
+
+        def f(x, dtype, **kwargs):
+            if isinstance(dtype, (dict, Series)):
+                dtype = dtype[x.name]
+            return x.astype(dtype, **kwargs)
+
+        return self._apply_columns(f, dtype=dtype, **kwargs)
 
     def copy(self, deep=True):
         """
diff --git a/pandas/tests/sparse/frame/test_analytics.py b/pandas/tests/sparse/frame/test_analytics.py
index a6d2225377dc3..52fcf7c355cf2 100644
--- a/pandas/tests/sparse/frame/test_analytics.py
+++ b/pandas/tests/sparse/frame/test_analytics.py
@@ -55,5 +55,5 @@ def test_ufunc(data, dtype, func):
     result = func(df)
     expected = DataFrame(
         {'A': Series(func(data),
-                     dtype=dtype)})
+                     dtype=SparseDtype('float64', dtype.fill_value))})
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/sparse/series/test_analytics.py b/pandas/tests/sparse/series/test_analytics.py
index 97a86a7dec94d..bf04f5b52a371 100644
--- a/pandas/tests/sparse/series/test_analytics.py
+++ b/pandas/tests/sparse/series/test_analytics.py
@@ -16,5 +16,5 @@ def test_ufunc(data, dtype, func):
     s = Series(data, dtype=dtype)
     result = func(s)
     expected = Series(func(data),
-                      dtype=dtype)
+                      dtype=SparseDtype('float64', dtype.fill_value))
     tm.assert_series_equal(result, expected)

From c75461c191a1e3d90d3af200c50189dffb3c6fa4 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 8 Jun 2019 19:31:15 -0400
Subject: [PATCH 08/10] sparse masking

---
 pandas/tests/extension/test_sparse.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
index faf1905ea1763..b259318371c01 100644
--- a/pandas/tests/extension/test_sparse.py
+++ b/pandas/tests/extension/test_sparse.py
@@ -154,6 +154,32 @@ def test_reindex(self, data, na_value):
         self._check_unsupported(data)
         super().test_reindex(data, na_value)
 
+    def test_getitem_mask(self, data):
+        # Empty mask, raw array
+        mask = np.zeros(len(data), dtype=bool)
+        result = data[mask]
+        assert len(result) == 0
+        assert isinstance(result, type(data))
+
+        # Empty mask, in series
+        mask = np.zeros(len(data), dtype=bool)
+        result = pd.Series(data)[mask]
+        assert len(result) == 0
+
+        # we change int -> float because of the masking
+        assert result.dtype == SparseDtype('float64', data.dtype.fill_value)
+
+        # non-empty mask, raw array
+        mask[0] = True
+        result = data[mask]
+        assert len(result) == 1
+        assert isinstance(result, type(data))
+
+        # non-empty mask, in series
+        result = pd.Series(data)[mask]
+        assert len(result) == 1
+        assert result.dtype == data.dtype
+
 
 # Skipping TestSetitem, since we don't implement it.
 

From 86090bf6b5e02fd55e063aaf39d44e53354ab284 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 9 Jun 2019 19:12:28 -0400
Subject: [PATCH 09/10] fix float casting

---
 pandas/core/arrays/sparse.py             | 24 +++++++++++++++-------
 pandas/tests/arrays/sparse/test_array.py |  1 +
 pandas/tests/extension/test_sparse.py    | 26 ------------------------
 3 files changed, 18 insertions(+), 33 deletions(-)

diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
index 68fd58e63ba02..490df5b250f74 100644
--- a/pandas/core/arrays/sparse.py
+++ b/pandas/core/arrays/sparse.py
@@ -25,7 +25,8 @@
     infer_dtype_from_scalar)
 from pandas.core.dtypes.common import (
     is_array_like, is_bool_dtype, is_datetime64_any_dtype, is_dtype_equal,
-    is_integer, is_object_dtype, is_scalar, is_string_dtype, pandas_dtype)
+    is_float_dtype, is_integer, is_integer_dtype, is_object_dtype, is_scalar,
+    is_string_dtype, pandas_dtype)
 from pandas.core.dtypes.dtypes import register_extension_dtype
 from pandas.core.dtypes.generic import (
     ABCIndexClass, ABCSeries, ABCSparseArray, ABCSparseSeries)
@@ -1927,15 +1928,24 @@ def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False):
     index = _make_index(length, indices, kind)
     sparsified_values = arr[mask]
 
-    # careful about casting here as we could easily specify a type that
-    # cannot hold the resulting values, e.g. integer when we have floats
-    # if we don't have an object specified then use this as the cast
     if dtype is not None:
 
-        ok_to_cast = all(not (is_object_dtype(t) or is_bool_dtype(t))
-                         for t in (dtype, sparsified_values.dtype))
-        if ok_to_cast:
+        # careful about casting here as we could easily specify a type that
+        # cannot hold the resulting values, e.g. integer when we have floats
+        # if this is not safe then convert the dtype; note that if there are
+        # nan's in the source array this will raise
+
+        # TODO: ideally this would be done by 'safe' casting in astype_nansafe
+        # but alas too many cases rely upon this working in the current way
+        # and casting='safe' doesn't really work in numpy properly
+        if is_integer_dtype(dtype) and is_float_dtype(sparsified_values.dtype):
+            result = astype_nansafe(
+                sparsified_values, dtype=dtype)
+            if np.allclose(result, sparsified_values, rtol=0):
+                return result, index, fill_value
+
             dtype = find_common_type([dtype, sparsified_values.dtype])
+
         sparsified_values = astype_nansafe(
             sparsified_values, dtype=dtype)
 
diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
index 231b5a92dbb3a..69259c66d61dd 100644
--- a/pandas/tests/arrays/sparse/test_array.py
+++ b/pandas/tests/arrays/sparse/test_array.py
@@ -472,6 +472,7 @@ def test_astype(self):
         # float -> float
         arr = SparseArray([None, None, 0, 2])
         result = arr.astype("Sparse[float32]")
+
         expected = SparseArray([None, None, 0, 2], dtype=np.dtype('float32'))
         tm.assert_sp_array_equal(result, expected)
 
diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
index b259318371c01..faf1905ea1763 100644
--- a/pandas/tests/extension/test_sparse.py
+++ b/pandas/tests/extension/test_sparse.py
@@ -154,32 +154,6 @@ def test_reindex(self, data, na_value):
         self._check_unsupported(data)
         super().test_reindex(data, na_value)
 
-    def test_getitem_mask(self, data):
-        # Empty mask, raw array
-        mask = np.zeros(len(data), dtype=bool)
-        result = data[mask]
-        assert len(result) == 0
-        assert isinstance(result, type(data))
-
-        # Empty mask, in series
-        mask = np.zeros(len(data), dtype=bool)
-        result = pd.Series(data)[mask]
-        assert len(result) == 0
-
-        # we change int -> float because of the masking
-        assert result.dtype == SparseDtype('float64', data.dtype.fill_value)
-
-        # non-empty mask, raw array
-        mask[0] = True
-        result = data[mask]
-        assert len(result) == 1
-        assert isinstance(result, type(data))
-
-        # non-empty mask, in series
-        result = pd.Series(data)[mask]
-        assert len(result) == 1
-        assert result.dtype == data.dtype
-
 
 # Skipping TestSetitem, since we don't implement it.
 

From 4bd486eb718d416a47aac1deb7907012e64da860 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 9 Jun 2019 20:36:50 -0400
Subject: [PATCH 10/10] review comments

---
 doc/source/whatsnew/v0.25.0.rst       |  2 +-
 pandas/core/groupby/groupby.py        | 35 +++++++++++++++------------
 pandas/core/internals/blocks.py       |  8 ++++++
 pandas/tests/groupby/test_function.py |  3 +--
 4 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 6dacb9dae834a..2e4959b44aeeb 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -323,7 +323,7 @@ of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* t
 ufuncs on Extension Dtype
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Operations with ``numpy`` ufuncs on Extension Arrays, including Sparse Dtypes will now preserve the
+Operations with ``numpy`` ufuncs on DataFrames with Extension Arrays, including Sparse Dtypes will now preserve the
 resulting dtypes to same as the input dtype; previously this would coerce to a dense dtype. (:issue:`23743`)
 
 .. ipython:: python
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 6274f45427db6..e067185e7ce94 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1299,7 +1299,7 @@ def _add_numeric_operations(cls):
         """
 
         def groupby_function(name, alias, npfunc,
-                             numeric_only=True, _convert=False,
+                             numeric_only=True,
                              min_count=-1):
 
             _local_template = """
@@ -1321,27 +1321,30 @@ def f(self, **kwargs):
                     kwargs['min_count'] = min_count
 
                 self._set_group_selection()
+
+                # try a cython aggregation if we can
                 try:
                     return self._cython_agg_general(
                         alias, alt=npfunc, **kwargs)
                 except AssertionError as e:
                     raise SpecificationError(str(e))
                 except Exception:
-                    result = self.aggregate(
-                        lambda x: npfunc(x, axis=self.axis))
-
-                    # coerce the columns if we can
-                    if isinstance(result, DataFrame):
-                        for col in result.columns:
-                            result[col] = self._try_cast(
-                                result[col], self.obj[col])
-                    else:
-                        result = self._try_cast(
-                            result, self.obj)
-
-                    if _convert:
-                        result = result._convert(datetime=True)
-                    return result
+                    pass
+
+                # apply a non-cython aggregation
+                result = self.aggregate(
+                    lambda x: npfunc(x, axis=self.axis))
+
+                # coerce the resulting columns if we can
+                if isinstance(result, DataFrame):
+                    for col in result.columns:
+                        result[col] = self._try_cast(
+                            result[col], self.obj[col])
+                else:
+                    result = self._try_cast(
+                        result, self.obj)
+
+                return result
 
             set_function_name(f, name, cls)
 
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 429b2b064c702..dfb5c458b0d77 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1772,8 +1772,16 @@ def _try_cast_result(self, result, dtype=None):
         """
         if we have an operation that operates on for example floats
         we want to try to cast back to our EA here if possible
+
+        result could be a 2-D numpy array, e.g. the result of
+        a numeric operation; but it must be shape (1, X) because
+        we by-definition operate on the ExtensionBlocks one-by-one
+
+        result could also be an EA Array itself, in which case it
+        is already a 1-D array
         """
         try:
+
             result = self._holder._from_sequence(
                 np.asarray(result).ravel(), dtype=dtype)
         except Exception:
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 2a77b1b96a662..355da1151d878 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -479,8 +479,7 @@ def scipy_sem(*args, **kwargs):
      ('last', lambda x: x.iloc[-1]),
      ('count', np.size),
      pytest.param(
-         'sem', scipy_sem, marks=[pytest.mark.skipif(
-             td._skip_if_no_scipy(), reason='scipy not installed')])])
+         'sem', scipy_sem, marks=td.skip_if_no_scipy)])
 def test_ops_general(op, targop):
     df = DataFrame(np.random.randn(1000))
     labels = np.random.randint(0, 50, size=1000).astype(float)