Skip to content

BUG: Series.resample fails on NaT index #39229

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 26, 2021
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ Groupby/resample/rolling
- Fixed bug in :meth:`DataFrameGroupBy.sum` and :meth:`SeriesGroupBy.sum` causing loss of precision through using Kahan summation (:issue:`38778`)
- Fixed bug in :meth:`DataFrameGroupBy.cumsum`, :meth:`SeriesGroupBy.cumsum`, :meth:`DataFrameGroupBy.mean` and :meth:`SeriesGroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`)
- Bug in :meth:`.Resampler.aggregate` and :meth:`DataFrame.transform` raising ``TypeError`` instead of ``SpecificationError`` when missing keys had mixed dtypes (:issue:`39025`)
- Bug in :meth:`Series.resample` would raise when index consisted of ``NaT`` (:issue:`39227`)

Reshaping
^^^^^^^^^
Expand Down
16 changes: 12 additions & 4 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,8 @@ def _wrap_result(self, result):

if isinstance(result, ABCSeries) and result.empty:
obj = self.obj
result.index = _asfreq_compat(obj.index, freq=self.freq)
# When index is all NaT, result is empty but index is not
result.index = _asfreq_compat(obj.index[:0], freq=self.freq)
result.name = getattr(obj, "name", None)

return result
Expand Down Expand Up @@ -1653,10 +1654,17 @@ def _get_period_bins(self, ax: PeriodIndex):
nat_count = np.sum(memb._isnan)
memb = memb[~memb._isnan]

# if index contains no valid (non-NaT) values, return empty index
if not len(memb):
binner = labels = PeriodIndex(data=[], freq=self.freq, name=ax.name)
return binner, [], labels
if len(ax) == 0:
# if index is empty, return empty bins
data = []
bins = np.array([], dtype=np.int64)
else:
# if index is all NaT, return a single bin
data = [NaT]
bins = np.array([len(ax)])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wait why do we have NaT in an index? e.g. why don't we have 0 bins?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Below this is the logic for when there are NaT along with non-NaT (lines 1712-1718 here):

        if nat_count > 0:
            # NaT handling as in pandas._lib.lib.generate_bins_dt64()
            # shift bins by the number of NaT
            bins += nat_count
            bins = np.insert(bins, 0, nat_count)
            binner = binner.insert(0, NaT)
            labels = labels.insert(0, NaT)

The added logic agrees with this in the case there are all NaT.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i c. can you unify this logic (maybe to a function)?

binner = labels = PeriodIndex(data=data, freq=self.freq, name=ax.name)
return binner, bins, labels

freq_mult = self.freq.n

Expand Down
26 changes: 25 additions & 1 deletion pandas/tests/resample/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pytest

from pandas import DataFrame, Series
from pandas import DataFrame, NaT, PeriodIndex, Series
import pandas._testing as tm
from pandas.core.groupby.groupby import DataError
from pandas.core.groupby.grouper import Grouper
Expand Down Expand Up @@ -110,6 +110,30 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method):
tm.assert_series_equal(result, expected, check_dtype=False)


@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
def test_resample_nat_index_series(request, freq, series, resample_method):
# GH39227

if freq == "M":
request.node.add_marker(pytest.mark.xfail(reason="Don't know why this fails"))

s = series.copy()
s.index = PeriodIndex([NaT] * len(s), freq=freq)
result = getattr(s.resample(freq), resample_method)()

if resample_method == "ohlc":
expected = DataFrame(
[], index=s.index[:0].copy(), columns=["open", "high", "low", "close"]
)
tm.assert_frame_equal(result, expected, check_dtype=False)
else:
expected = s[:0].copy()
tm.assert_series_equal(result, expected, check_dtype=False)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq


@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
@pytest.mark.parametrize("resample_method", ["count", "size"])
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/resample/test_period_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,9 +787,9 @@ def test_resample_with_nat(self, periods, values, freq, expected_values):
def test_resample_with_only_nat(self):
# GH 13224
pi = PeriodIndex([pd.NaT] * 3, freq="S")
frame = DataFrame([2, 3, 5], index=pi)
frame = DataFrame([2, 3, 5], index=pi, columns=["a"])
expected_index = PeriodIndex(data=[], freq=pi.freq)
expected = DataFrame(index=expected_index)
expected = DataFrame(index=expected_index, columns=["a"], dtype="int64")
result = frame.resample("1s").mean()
tm.assert_frame_equal(result, expected)

Expand Down