diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 42ebf648f299f..6d27e225b681e 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -403,6 +403,13 @@ Conversion DatetimeIndex.to_series DatetimeIndex.to_frame +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.mean + TimedeltaIndex -------------- .. autosummary:: @@ -435,6 +442,13 @@ Conversion TimedeltaIndex.ceil TimedeltaIndex.to_frame +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + TimedeltaIndex.mean + .. currentmodule:: pandas PeriodIndex diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 0e8cd95084a8d..6794c0c01e653 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -82,6 +82,7 @@ Other Enhancements - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`) - :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`) - :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`) +- :class:`DatetimeIndex` and :class:`TimedeltaIndex` now have a `mean` method (:issue:`24757`) - .. _whatsnew_0250.api_breaking: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index c32f8642dc2ed..61594be8ec385 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1382,7 +1382,7 @@ def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise', def _reduce(self, name, axis=0, skipna=True, **kwargs): op = getattr(self, name, None) if op: - return op(axis=axis, skipna=skipna, **kwargs) + return op(skipna=skipna, **kwargs) else: return super()._reduce(name, skipna, **kwargs) @@ -1438,6 +1438,54 @@ def max(self, axis=None, skipna=True, *args, **kwargs): # Don't have to worry about NA `result`, since no NA went in. return self._box_func(result) + def mean(self, skipna=True): + """ + Return the mean value of the Array. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + skipna : bool, default True + Whether to ignore any NaT elements + + Returns + ------- + scalar (Timestamp or Timedelta) + + See Also + -------- + numpy.ndarray.mean + Series.mean : Return the mean value in a Series. + + Notes + ----- + mean is only defined for Datetime and Timedelta dtypes, not for Period. + """ + if is_period_dtype(self): + # See discussion in GH#24757 + raise TypeError( + "mean is not implemented for {cls} since the meaning is " + "ambiguous. An alternative is " + "obj.to_timestamp(how='start').mean()" + .format(cls=type(self).__name__)) + + mask = self.isna() + if skipna: + values = self[~mask] + elif mask.any(): + return NaT + else: + values = self + + if not len(values): + # short-circut for empty max / min + return NaT + + result = nanops.nanmean(values.view('i8'), skipna=skipna) + # Don't have to worry about NA `result`, since no NA went in. + return self._box_func(result) + # ------------------------------------------------------------------- # Shared Constructor Helpers diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 092cec00228cd..6491a98812b8f 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -73,6 +73,7 @@ class DatetimeIndexOpsMixin(ExtensionOpsMixin): _maybe_mask_results = ea_passthrough( DatetimeLikeArrayMixin._maybe_mask_results) __iter__ = ea_passthrough(DatetimeLikeArrayMixin.__iter__) + mean = ea_passthrough(DatetimeLikeArrayMixin.mean) @property def freq(self): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1bf3cb86811cb..f122a6525237c 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -203,6 +203,7 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index, DatetimeDelegateMixin): to_frame month_name day_name + mean See Also -------- diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 0574a4b41c920..f5362c0b6bb5d 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -129,6 +129,7 @@ class TimedeltaIndex(DatetimeIndexOpsMixin, dtl.TimelikeOps, Int64Index, floor ceil to_frame + mean See Also -------- diff --git a/pandas/core/series.py b/pandas/core/series.py index 8fb6ad3e3ccc5..37f74b35ba2a8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3729,6 +3729,10 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, elif is_datetime64_dtype(delegate): # use DatetimeIndex implementation to handle skipna correctly delegate = DatetimeIndex(delegate) + elif is_timedelta64_dtype(delegate) and hasattr(TimedeltaIndex, name): + # use TimedeltaIndex to handle skipna correctly + # TODO: remove hasattr check after TimedeltaIndex has `std` method + delegate = TimedeltaIndex(delegate) # dispatch to numpy arrays elif isinstance(delegate, np.ndarray): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 487ff7932ec5f..568b229435434 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1205,6 +1205,47 @@ def test_mean_corner(self, float_frame, float_string_frame): means = float_frame.mean(0) assert means['bool'] == float_frame['bool'].values.mean() + def test_mean_datetimelike(self): + # GH#24757 check that datetimelike are excluded by default, handled + # correctly with numeric_only=True + + df = pd.DataFrame({ + 'A': np.arange(3), + 'B': pd.date_range('2016-01-01', periods=3), + 'C': pd.timedelta_range('1D', periods=3), + 'D': pd.period_range('2016', periods=3, freq='A') + }) + result = df.mean(numeric_only=True) + expected = pd.Series({'A': 1.}) + tm.assert_series_equal(result, expected) + + result = df.mean() + expected = pd.Series({ + 'A': 1., + 'C': df.loc[1, 'C'] + }) + tm.assert_series_equal(result, expected) + + @pytest.mark.xfail(reason="casts to object-dtype and then tries to " + "add timestamps", + raises=TypeError, strict=True) + def test_mean_datetimelike_numeric_only_false(self): + df = pd.DataFrame({ + 'A': np.arange(3), + 'B': pd.date_range('2016-01-01', periods=3), + 'C': pd.timedelta_range('1D', periods=3), + 'D': pd.period_range('2016', periods=3, freq='A') + }) + + result = df.mean(numeric_only=False) + expected = pd.Series({ + 'A': 1, + 'B': df.loc[1, 'B'], + 'C': df.loc[1, 'C'], + 'D': df.loc[1, 'D'] + }) + tm.assert_series_equal(result, expected) + def test_stats_mixed_type(self, float_string_frame): # don't blow up float_string_frame.std(1) diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index 223904048dd99..b0fd2f290031e 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -10,9 +10,78 @@ import pandas as pd from pandas import DataFrame, Series +from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray import pandas.util.testing as tm +class TestDatetimeLikeStatReductions: + + @pytest.mark.parametrize('box', [Series, pd.Index, DatetimeArray]) + def test_dt64_mean(self, tz_naive_fixture, box): + tz = tz_naive_fixture + + dti = pd.date_range('2001-01-01', periods=11, tz=tz) + # shuffle so that we are not just working with monotone-increasing + dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6]) + dtarr = dti._data + + obj = box(dtarr) + assert obj.mean() == pd.Timestamp('2001-01-06', tz=tz) + assert obj.mean(skipna=False) == pd.Timestamp('2001-01-06', tz=tz) + + # dtarr[-2] will be the first date 2001-01-1 + dtarr[-2] = pd.NaT + + obj = box(dtarr) + assert obj.mean() == pd.Timestamp('2001-01-06 07:12:00', tz=tz) + assert obj.mean(skipna=False) is pd.NaT + + @pytest.mark.parametrize('box', [Series, pd.Index, PeriodArray]) + def test_period_mean(self, box): + # GH#24757 + dti = pd.date_range('2001-01-01', periods=11) + # shuffle so that we are not just working with monotone-increasing + dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6]) + + # use hourly frequency to avoid rounding errors in expected results + # TODO: flesh this out with different frequencies + parr = dti._data.to_period('H') + obj = box(parr) + with pytest.raises(TypeError, match="ambiguous"): + obj.mean() + with pytest.raises(TypeError, match="ambiguous"): + obj.mean(skipna=True) + + # parr[-2] will be the first date 2001-01-1 + parr[-2] = pd.NaT + + with pytest.raises(TypeError, match="ambiguous"): + obj.mean() + with pytest.raises(TypeError, match="ambiguous"): + obj.mean(skipna=True) + + @pytest.mark.parametrize('box', [Series, pd.Index, TimedeltaArray]) + def test_td64_mean(self, box): + tdi = pd.TimedeltaIndex([0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4], + unit='D') + + tdarr = tdi._data + obj = box(tdarr) + + result = obj.mean() + expected = np.array(tdarr).mean() + assert result == expected + + tdarr[0] = pd.NaT + assert obj.mean(skipna=False) is pd.NaT + + result2 = obj.mean(skipna=True) + assert result2 == tdi[1:].mean() + + # exact equality fails by 1 nanosecond + assert result2.round('us') == (result * 11. / 10).round('us') + + class TestSeriesStatReductions: # Note: the name TestSeriesStatReductions indicates these tests # were moved from a series-specific test file, _not_ that these tests are