Skip to content

BUG: allow std to work with timedeltas (GH8471) #8476

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 5, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ TimedeltaIndex/Scalar
We introduce a new scalar type ``Timedelta``, which is a subclass of ``datetime.timedelta``, and behaves in a similar manner,
but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, parsing, and attributes.
This type is very similar to how ``Timestamp`` works for ``datetimes``. It is a nice-API box for the type. See the :ref:`docs <timedeltas.timedeltas>`.
(:issue:`3009`, :issue:`4533`, :issue:`8209`, :issue:`8187`, :issue:`8190`, :issue:`7869`, :issue:`7661`, :issue:`8345`)
(:issue:`3009`, :issue:`4533`, :issue:`8209`, :issue:`8187`, :issue:`8190`, :issue:`7869`, :issue:`7661`, :issue:`8345`, :issue:`8471`)

.. warning::

Expand Down
86 changes: 34 additions & 52 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3950,60 +3950,42 @@ def mad(self, axis=None, skipna=None, level=None, **kwargs):
return np.abs(demeaned).mean(axis=axis, skipna=skipna)
cls.mad = mad

@Substitution(outname='variance',
desc="Return unbiased variance over requested "
"axis.\n\nNormalized by N-1 by default. "
"This can be changed using the ddof argument")
@Appender(_num_doc)
def var(self, axis=None, skipna=None, level=None, ddof=1, **kwargs):
if skipna is None:
skipna = True
if axis is None:
axis = self._stat_axis_number
if level is not None:
return self._agg_by_level('var', axis=axis, level=level,
skipna=skipna, ddof=ddof)
def _make_stat_function_ddof(name, desc, f):

return self._reduce(nanops.nanvar, axis=axis, skipna=skipna,
ddof=ddof)
cls.var = var

@Substitution(outname='stdev',
desc="Return unbiased standard deviation over requested "
"axis.\n\nNormalized by N-1 by default. "
"This can be changed using the ddof argument")
@Appender(_num_doc)
def std(self, axis=None, skipna=None, level=None, ddof=1, **kwargs):
if skipna is None:
skipna = True
if axis is None:
axis = self._stat_axis_number
if level is not None:
return self._agg_by_level('std', axis=axis, level=level,
skipna=skipna, ddof=ddof)
result = self.var(axis=axis, skipna=skipna, ddof=ddof)
if getattr(result, 'ndim', 0) > 0:
return result.apply(np.sqrt)
return np.sqrt(result)
cls.std = std

@Substitution(outname='standarderror',
desc="Return unbiased standard error of the mean over "
"requested axis.\n\nNormalized by N-1 by default. "
"This can be changed using the ddof argument")
@Appender(_num_doc)
def sem(self, axis=None, skipna=None, level=None, ddof=1, **kwargs):
if skipna is None:
skipna = True
if axis is None:
axis = self._stat_axis_number
if level is not None:
return self._agg_by_level('sem', axis=axis, level=level,
skipna=skipna, ddof=ddof)
@Substitution(outname=name, desc=desc)
@Appender(_num_doc)
def stat_func(self, axis=None, skipna=None, level=None, ddof=1,
**kwargs):
if skipna is None:
skipna = True
if axis is None:
axis = self._stat_axis_number
if level is not None:
return self._agg_by_level(name, axis=axis, level=level,
skipna=skipna, ddof=ddof)
return self._reduce(f, axis=axis,
skipna=skipna, ddof=ddof)
stat_func.__name__ = name
return stat_func

return self._reduce(nanops.nansem, axis=axis, skipna=skipna,
ddof=ddof)
cls.sem = sem
cls.sem = _make_stat_function_ddof(
'sem',
"Return unbiased standard error of the mean over "
"requested axis.\n\nNormalized by N-1 by default. "
"This can be changed using the ddof argument",
nanops.nansem)
cls.var = _make_stat_function_ddof(
'var',
"Return unbiased variance over requested "
"axis.\n\nNormalized by N-1 by default. "
"This can be changed using the ddof argument",
nanops.nanvar)
cls.std = _make_stat_function_ddof(
'std',
"Return unbiased standard deviation over requested "
"axis.\n\nNormalized by N-1 by default. "
"This can be changed using the ddof argument",
nanops.nanstd)

@Substitution(outname='compounded',
desc="Return the compound percentage of the values for "
Expand Down
36 changes: 23 additions & 13 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def _wrap_results(result, dtype):
if not isinstance(result, np.ndarray):
result = lib.Timedelta(result)
else:
result = result.view(dtype)
result = result.astype('i8').view(dtype)

return result

Expand Down Expand Up @@ -295,7 +295,7 @@ def get_median(x):
if values.ndim > 1:
# there's a non-empty array to apply over otherwise numpy raises
if notempty:
return np.apply_along_axis(get_median, axis, values)
return _wrap_results(np.apply_along_axis(get_median, axis, values), dtype)

# must return the correct shape, but median is not defined for the
# empty set so return nans of shape "everything but the passed axis"
Expand All @@ -305,7 +305,7 @@ def get_median(x):
dims = np.arange(values.ndim)
ret = np.empty(shp[dims != axis])
ret.fill(np.nan)
return ret
return _wrap_results(ret, dtype)

# otherwise return a scalar value
return _wrap_results(get_median(values) if notempty else np.nan, dtype)
Expand All @@ -329,15 +329,8 @@ def _get_counts_nanvar(mask, axis, ddof):
return count, d


@disallow('M8','m8')
@bottleneck_switch(ddof=1)
def nanvar(values, axis=None, skipna=True, ddof=1):

# we are going to allow timedelta64[ns] here
# but NOT going to coerce them to the Timedelta type
# as this could cause overflow
# so var cannot be computed (but std can!)

def _nanvar(values, axis=None, skipna=True, ddof=1):
# private nanvar calculator
mask = isnull(values)
if not _is_floating_dtype(values):
values = values.astype('f8')
Expand All @@ -352,6 +345,23 @@ def nanvar(values, axis=None, skipna=True, ddof=1):
XX = _ensure_numeric((values ** 2).sum(axis))
return np.fabs((XX - X ** 2 / count) / d)

@disallow('M8')
@bottleneck_switch(ddof=1)
def nanstd(values, axis=None, skipna=True, ddof=1):

result = np.sqrt(_nanvar(values, axis=axis, skipna=skipna, ddof=ddof))
return _wrap_results(result, values.dtype)

@disallow('M8','m8')
@bottleneck_switch(ddof=1)
def nanvar(values, axis=None, skipna=True, ddof=1):

# we are going to allow timedelta64[ns] here
# but NOT going to coerce them to the Timedelta type
# as this could cause overflow
# so var cannot be computed (but std can!)
return _nanvar(values, axis=axis, skipna=skipna, ddof=ddof)

@disallow('M8','m8')
def nansem(values, axis=None, skipna=True, ddof=1):
var = nanvar(values, axis, skipna, ddof=ddof)
Expand Down Expand Up @@ -517,7 +527,7 @@ def nankurt(values, axis=None, skipna=True):
return result


@disallow('M8')
@disallow('M8','m8')
def nanprod(values, axis=None, skipna=True):
mask = isnull(values)
if skipna and not _is_any_int_dtype(values):
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/test_nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,10 @@ def test_nanvar(self):
self.check_funs_ddof(nanops.nanvar, np.var,
allow_complex=False, allow_date=False, allow_tdelta=False)

def test_nanstd(self):
self.check_funs_ddof(nanops.nanstd, np.std,
allow_complex=False, allow_date=False, allow_tdelta=True)

def test_nansem(self):
tm.skip_if_no_package('scipy.stats')
self.check_funs_ddof(nanops.nansem, np.var,
Expand Down
21 changes: 17 additions & 4 deletions pandas/tseries/tests/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,9 @@ def test_timedelta_ops(self):
expected = to_timedelta(timedelta(seconds=9))
self.assertEqual(result, expected)

result = td.to_frame().mean()
self.assertEqual(result[0], expected)

result = td.quantile(.1)
expected = Timedelta(np.timedelta64(2600,'ms'))
self.assertEqual(result, expected)
Expand All @@ -487,18 +490,28 @@ def test_timedelta_ops(self):
expected = to_timedelta('00:00:08')
self.assertEqual(result, expected)

result = td.to_frame().median()
self.assertEqual(result[0], expected)

# GH 6462
# consistency in returned values for sum
result = td.sum()
expected = to_timedelta('00:01:21')
self.assertEqual(result, expected)

# you can technically do a std, but var overflows
# so this is tricky
self.assertRaises(TypeError, lambda : td.std())
result = td.to_frame().sum()
self.assertEqual(result[0], expected)

# std
result = td.std()
expected = to_timedelta(Series(td.dropna().values).std())
self.assertEqual(result, expected)

result = td.to_frame().std()
self.assertEqual(result[0], expected)

# invalid ops
for op in ['skew','kurt','sem','var']:
for op in ['skew','kurt','sem','var','prod']:
self.assertRaises(TypeError, lambda : getattr(td,op)())

def test_timedelta_ops_scalar(self):
Expand Down