From 1f90a1dd6ac385db0da7364c77aa56ac0cc7bf69 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sun, 29 Jun 2014 18:42:34 +0900 Subject: [PATCH] BUG: PeriodIndex.min/max returns int --- doc/source/v0.14.1.txt | 1 + pandas/core/base.py | 31 ++++++++++++++++ pandas/tests/test_base.py | 77 +++++++++++++++++++++++++++++++++------ pandas/tseries/index.py | 28 -------------- pandas/tseries/period.py | 4 ++ 5 files changed, 101 insertions(+), 40 deletions(-) diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 3160b35386fa2..dbd8b15333d50 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -185,6 +185,7 @@ Bug Fixes - Bug in ``DatetimeIndex.asobject`` doesn't preserve ``name`` (:issue:`7299`) - Bug in multi-index slicing with datetimelike ranges (strings and Timestamps), (:issue:`7429`) - Bug in ``Index.min`` and ``max`` doesn't handle ``nan`` and ``NaT`` properly (:issue:`7261`) +- Bug in ``PeriodIndex.min/max`` results in ``int`` (:issue:`7609`) - Bug in ``resample`` where ``fill_method`` was ignored if you passed ``how`` (:issue:`7261`) - Bug in ``TimeGrouper`` doesn't exclude column specified by ``key`` (:issue:`7227`) - Bug in ``DataFrame`` and ``Series`` bar and barh plot raises ``TypeError`` when ``bottom`` diff --git a/pandas/core/base.py b/pandas/core/base.py index cc676b9682277..aff2713ee85f5 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -402,3 +402,34 @@ def tolist(self): """ return list(self.asobject) + def min(self, axis=None): + """ + Overridden ndarray.min to return an object + """ + import pandas.tslib as tslib + mask = self.asi8 == tslib.iNaT + masked = self[~mask] + if len(masked) == 0: + return self._na_value + elif self.is_monotonic: + return masked[0] + else: + min_stamp = masked.asi8.min() + return self._box_func(min_stamp) + + def max(self, axis=None): + """ + Overridden ndarray.max to return an object + """ + import pandas.tslib as tslib + mask = self.asi8 == tslib.iNaT + masked = self[~mask] + if len(masked) == 0: + return self._na_value + elif self.is_monotonic: + return masked[-1] + else: + max_stamp = masked.asi8.max() + return self._box_func(max_stamp) + + diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 291b10c70c83c..f41e745013f08 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -8,6 +8,7 @@ from pandas.util.testing import assertRaisesRegexp, assert_isinstance from pandas import Series, Index, Int64Index, DatetimeIndex, PeriodIndex from pandas import _np_version_under1p7 +import pandas.tslib as tslib import nose import pandas.util.testing as tm @@ -202,7 +203,10 @@ def test_ops(self): for op in ['max','min']: for o in self.objs: result = getattr(o,op)() - expected = getattr(o.values,op)() + if not isinstance(o, PeriodIndex): + expected = getattr(o.values, op)() + else: + expected = pd.Period(ordinal=getattr(o.values, op)(), freq=o.freq) try: self.assertEqual(result, expected) except ValueError: @@ -232,17 +236,6 @@ def test_nanops(self): # check DatetimeIndex non-monotonic path self.assertEqual(getattr(obj, op)(), datetime(2011, 11, 1)) - # explicitly create DatetimeIndex - obj = DatetimeIndex([]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - obj = DatetimeIndex([pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - def test_value_counts_unique_nunique(self): for o in self.objs: klass = type(o) @@ -552,6 +545,33 @@ def test_asobject_tolist(self): self.assertEqual(result.name, expected.name) self.assertEqual(idx.tolist(), expected_list) + def test_minmax(self): + for tz in [None, 'Asia/Tokyo', 'US/Eastern']: + # monotonic + idx1 = pd.DatetimeIndex([pd.NaT, '2011-01-01', '2011-01-02', + '2011-01-03'], tz=tz) + self.assertTrue(idx1.is_monotonic) + + # non-monotonic + idx2 = pd.DatetimeIndex(['2011-01-01', pd.NaT, '2011-01-03', + '2011-01-02', pd.NaT], tz=tz) + self.assertFalse(idx2.is_monotonic) + + for idx in [idx1, idx2]: + self.assertEqual(idx.min(), pd.Timestamp('2011-01-01', tz=tz)) + self.assertEqual(idx.max(), pd.Timestamp('2011-01-03', tz=tz)) + + for op in ['min', 'max']: + # Return NaT + obj = DatetimeIndex([]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = DatetimeIndex([pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + class TestPeriodIndexOps(Ops): _allowed = '_allow_period_index_ops' @@ -597,6 +617,39 @@ def test_asobject_tolist(self): self.assertTrue(result_list[2].ordinal, pd.tslib.iNaT) self.assertTrue(result_list[2].freq, 'D') + def test_minmax(self): + + # monotonic + idx1 = pd.PeriodIndex([pd.NaT, '2011-01-01', '2011-01-02', + '2011-01-03'], freq='D') + self.assertTrue(idx1.is_monotonic) + + # non-monotonic + idx2 = pd.PeriodIndex(['2011-01-01', pd.NaT, '2011-01-03', + '2011-01-02', pd.NaT], freq='D') + self.assertFalse(idx2.is_monotonic) + + for idx in [idx1, idx2]: + self.assertEqual(idx.min(), pd.Period('2011-01-01', freq='D')) + self.assertEqual(idx.max(), pd.Period('2011-01-03', freq='D')) + + for op in ['min', 'max']: + # Return NaT + obj = PeriodIndex([], freq='M') + result = getattr(obj, op)() + self.assertEqual(result.ordinal, tslib.iNaT) + self.assertEqual(result.freq, 'M') + + obj = PeriodIndex([pd.NaT], freq='M') + result = getattr(obj, op)() + self.assertEqual(result.ordinal, tslib.iNaT) + self.assertEqual(result.freq, 'M') + + obj = PeriodIndex([pd.NaT, pd.NaT, pd.NaT], freq='M') + result = getattr(obj, op)() + self.assertEqual(result.ordinal, tslib.iNaT) + self.assertEqual(result.freq, 'M') + if __name__ == '__main__': import nose diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index ff585d80af830..ac002b86f3de9 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1783,34 +1783,6 @@ def indexer_between_time(self, start_time, end_time, include_start=True, return mask.nonzero()[0] - def min(self, axis=None): - """ - Overridden ndarray.min to return a Timestamp - """ - mask = self.asi8 == tslib.iNaT - masked = self[~mask] - if len(masked) == 0: - return tslib.NaT - elif self.is_monotonic: - return masked[0] - else: - min_stamp = masked.asi8.min() - return Timestamp(min_stamp, tz=self.tz) - - def max(self, axis=None): - """ - Overridden ndarray.max to return a Timestamp - """ - mask = self.asi8 == tslib.iNaT - masked = self[~mask] - if len(masked) == 0: - return tslib.NaT - elif self.is_monotonic: - return masked[-1] - else: - max_stamp = masked.asi8.max() - return Timestamp(max_stamp, tz=self.tz) - def to_julian_date(self): """ Convert DatetimeIndex to Float64Index of Julian Dates. diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index d41438bbfd208..5ded7161130bc 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -712,6 +712,10 @@ def _simple_new(cls, values, name, freq=None, **kwargs): result.freq = freq return result + @property + def _na_value(self): + return self._box_func(tslib.iNaT) + def __contains__(self, key): if not isinstance(key, Period) or key.freq != self.freq: if isinstance(key, compat.string_types):