From 9305c36ca536aa7877517c1cdfb9df9481a01194 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sat, 9 Jul 2016 11:35:12 +0900 Subject: [PATCH] COMPAT: Period(NaT) now returns pd.NaT --- doc/source/whatsnew/v0.19.0.txt | 39 +++ pandas/src/period.pyx | 269 ++++++++++--------- pandas/tests/indexes/test_datetimelike.py | 9 +- pandas/tseries/period.py | 49 ++-- pandas/tseries/tests/test_base.py | 26 +- pandas/tseries/tests/test_period.py | 304 +++++++++++++--------- pandas/tseries/tests/test_tslib.py | 7 + pandas/tslib.pyx | 5 +- 8 files changed, 406 insertions(+), 302 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index bef02a06135de..4092694ca6cd1 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -445,6 +445,45 @@ Furthermore: - Passing duplicated ``percentiles`` will now raise a ``ValueError``. - Bug in ``.describe()`` on a DataFrame with a mixed-dtype column index, which would previously raise a ``TypeError`` (:issue:`13288`) +.. _whatsnew_0190.api.periodnat: + +``Period('NaT')`` now returns ``pd.NaT`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, ``Period`` has its own ``Period('NaT')`` representation different from ``pd.NaT``. Now ``Period('NaT')`` has been changed to return ``pd.NaT``. (:issue:`12759`) + +Previous Behavior: + +.. code-block:: ipython + + In [5]: pd.Period('NaT', freq='D') + Out[5]: Period('NaT', 'D') + +New Behavior: + +.. ipython:: python + + pd.Period('NaT') + + +To be compat with ``Period`` addition and subtraction, ``pd.NaT`` now supports addition and subtraction with ``int``. Previously it raises ``ValueError``. + +Previous Behavior: + +.. code-block:: ipython + + In [5]: pd.NaT + 1 + ... + ValueError: Cannot add integral value to Timestamp without freq. + +New Behavior: + +.. ipython:: python + + pd.NaT + 1 + pd.NaT - 1 + + .. _whatsnew_0190.deprecations: Deprecations diff --git a/pandas/src/period.pyx b/pandas/src/period.pyx index af2e295ae0cfc..37f265ede07e7 100644 --- a/pandas/src/period.pyx +++ b/pandas/src/period.pyx @@ -472,7 +472,11 @@ def extract_ordinals(ndarray[object] values, freq): except AttributeError: p = Period(p, freq=freq) - ordinals[i] = p.ordinal + if p is tslib.NaT: + # input may contain NaT-like string + ordinals[i] = tslib.iNaT + else: + ordinals[i] = p.ordinal return ordinals @@ -665,24 +669,8 @@ class IncompatibleFrequency(ValueError): pass -cdef class Period(object): - """ - Represents an period of time +cdef class _Period(object): - Parameters - ---------- - value : Period or compat.string_types, default None - The time period represented (e.g., '4Q2005') - freq : str, default None - One of pandas period strings or corresponding objects - year : int, default None - month : int, default 1 - quarter : int, default None - day : int, default 1 - hour : int, default 0 - minute : int, default 0 - second : int, default 0 - """ cdef public: int64_t ordinal object freq @@ -711,97 +699,22 @@ cdef class Period(object): @classmethod def _from_ordinal(cls, ordinal, freq): """ fast creation from an ordinal and freq that are already validated! """ - self = Period.__new__(cls) - self.ordinal = ordinal - self.freq = cls._maybe_convert_freq(freq) - return self - - def __init__(self, value=None, freq=None, ordinal=None, - year=None, month=1, quarter=None, day=1, - hour=0, minute=0, second=0): - # freq points to a tuple (base, mult); base is one of the defined - # periods such as A, Q, etc. Every five minutes would be, e.g., - # ('T', 5) but may be passed in as a string like '5T' - - # ordinal is the period offset from the gregorian proleptic epoch - - if ordinal is not None and value is not None: - raise ValueError(("Only value or ordinal but not both should be " - "given but not both")) - elif ordinal is not None: - if not lib.is_integer(ordinal): - raise ValueError("Ordinal must be an integer") - if freq is None: - raise ValueError('Must supply freq for ordinal value') - - elif value is None: - if freq is None: - raise ValueError("If value is None, freq cannot be None") - ordinal = _ordinal_from_fields(year, month, quarter, day, - hour, minute, second, freq) - - elif isinstance(value, Period): - other = value - if freq is None or frequencies.get_freq_code(freq) == frequencies.get_freq_code(other.freq): - ordinal = other.ordinal - freq = other.freq - else: - converted = other.asfreq(freq) - ordinal = converted.ordinal - - elif is_null_datetimelike(value) or value in tslib._nat_strings: - ordinal = tslib.iNaT - if freq is None: - raise ValueError("If value is NaT, freq cannot be None " - "because it cannot be inferred") - - elif isinstance(value, compat.string_types) or lib.is_integer(value): - if lib.is_integer(value): - value = str(value) - value = value.upper() - dt, _, reso = parse_time_string(value, freq) - - if freq is None: - try: - freq = frequencies.Resolution.get_freq(reso) - except KeyError: - raise ValueError("Invalid frequency or could not infer: %s" % reso) - - elif isinstance(value, datetime): - dt = value - if freq is None: - raise ValueError('Must supply freq for datetime value') - elif isinstance(value, np.datetime64): - dt = Timestamp(value) - if freq is None: - raise ValueError('Must supply freq for datetime value') - elif isinstance(value, date): - dt = datetime(year=value.year, month=value.month, day=value.day) - if freq is None: - raise ValueError('Must supply freq for datetime value') - else: - msg = "Value must be Period, string, integer, or datetime" - raise ValueError(msg) - - base, mult = frequencies.get_freq_code(freq) - - if ordinal is None: - self.ordinal = get_period_ordinal(dt.year, dt.month, dt.day, - dt.hour, dt.minute, dt.second, - dt.microsecond, 0, base) + if ordinal == tslib.iNaT: + return tslib.NaT else: + self = _Period.__new__(cls) self.ordinal = ordinal - - self.freq = self._maybe_convert_freq(freq) + self.freq = cls._maybe_convert_freq(freq) + return self def __richcmp__(self, other, op): if isinstance(other, Period): if other.freq != self.freq: msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: - return _nat_scalar_rules[op] return PyObject_RichCompareBool(self.ordinal, other.ordinal, op) + elif other is tslib.NaT: + return _nat_scalar_rules[op] # index/series like elif hasattr(other, '_typ'): return NotImplemented @@ -824,10 +737,7 @@ cdef class Period(object): offset_nanos = tslib._delta_to_nanoseconds(offset) if nanos % offset_nanos == 0: - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - ordinal = self.ordinal + (nanos // offset_nanos) + ordinal = self.ordinal + (nanos // offset_nanos) return Period(ordinal=ordinal, freq=self.freq) msg = 'Input cannot be converted to Period(freq={0})' raise IncompatibleFrequency(msg.format(self.freqstr)) @@ -835,10 +745,7 @@ cdef class Period(object): freqstr = frequencies.get_standard_freq(other) base = frequencies.get_base_alias(freqstr) if base == self.freq.rule_code: - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - ordinal = self.ordinal + other.n + ordinal = self.ordinal + other.n return Period(ordinal=ordinal, freq=self.freq) msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) @@ -853,10 +760,7 @@ cdef class Period(object): elif other is tslib.NaT: return tslib.NaT elif lib.is_integer(other): - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - ordinal = self.ordinal + other * self.freq.n + ordinal = self.ordinal + other * self.freq.n return Period(ordinal=ordinal, freq=self.freq) else: # pragma: no cover return NotImplemented @@ -872,17 +776,12 @@ cdef class Period(object): neg_other = -other return self + neg_other elif lib.is_integer(other): - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - ordinal = self.ordinal - other * self.freq.n + ordinal = self.ordinal - other * self.freq.n return Period(ordinal=ordinal, freq=self.freq) elif isinstance(other, Period): if other.freq != self.freq: msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: - return Period(ordinal=tslib.iNaT, freq=self.freq) return self.ordinal - other.ordinal elif getattr(other, '_typ', None) == 'periodindex': return -other.__sub__(self) @@ -914,16 +813,13 @@ cdef class Period(object): base1, mult1 = frequencies.get_freq_code(self.freq) base2, mult2 = frequencies.get_freq_code(freq) - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal + # mult1 can't be negative or 0 + end = how == 'E' + if end: + ordinal = self.ordinal + mult1 - 1 else: - # mult1 can't be negative or 0 - end = how == 'E' - if end: - ordinal = self.ordinal + mult1 - 1 - else: - ordinal = self.ordinal - ordinal = period_asfreq(ordinal, base1, base2, end) + ordinal = self.ordinal + ordinal = period_asfreq(ordinal, base1, base2, end) return Period(ordinal=ordinal, freq=freq) @@ -933,12 +829,9 @@ cdef class Period(object): @property def end_time(self): - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - # freq.n can't be negative or 0 - # ordinal = (self + self.freq.n).start_time.value - 1 - ordinal = (self + 1).start_time.value - 1 + # freq.n can't be negative or 0 + # ordinal = (self + self.freq.n).start_time.value - 1 + ordinal = (self + 1).start_time.value - 1 return Timestamp(ordinal) def to_timestamp(self, freq=None, how='start', tz=None): @@ -1199,8 +1092,114 @@ cdef class Period(object): return period_format(self.ordinal, base, fmt) -def _ordinal_from_fields(year, month, quarter, day, hour, minute, - second, freq): +class Period(_Period): + """ + Represents an period of time + + Parameters + ---------- + value : Period or compat.string_types, default None + The time period represented (e.g., '4Q2005') + freq : str, default None + One of pandas period strings or corresponding objects + year : int, default None + month : int, default 1 + quarter : int, default None + day : int, default 1 + hour : int, default 0 + minute : int, default 0 + second : int, default 0 + """ + + def __new__(cls, value=None, freq=None, ordinal=None, + year=None, month=None, quarter=None, day=None, + hour=None, minute=None, second=None): + # freq points to a tuple (base, mult); base is one of the defined + # periods such as A, Q, etc. Every five minutes would be, e.g., + # ('T', 5) but may be passed in as a string like '5T' + + # ordinal is the period offset from the gregorian proleptic epoch + + cdef _Period self + + if ordinal is not None and value is not None: + raise ValueError(("Only value or ordinal but not both should be " + "given but not both")) + elif ordinal is not None: + if not lib.is_integer(ordinal): + raise ValueError("Ordinal must be an integer") + if freq is None: + raise ValueError('Must supply freq for ordinal value') + + elif value is None: + if (year is None and month is None and quarter is None and + day is None and hour is None and minute is None and second is None): + ordinal = tslib.iNaT + else: + if freq is None: + raise ValueError("If value is None, freq cannot be None") + + # set defaults + month = 1 if month is None else month + day = 1 if day is None else day + hour = 0 if hour is None else hour + minute = 0 if minute is None else minute + second = 0 if second is None else second + + ordinal = _ordinal_from_fields(year, month, quarter, day, + hour, minute, second, freq) + + elif isinstance(value, Period): + other = value + if freq is None or frequencies.get_freq_code(freq) == frequencies.get_freq_code(other.freq): + ordinal = other.ordinal + freq = other.freq + else: + converted = other.asfreq(freq) + ordinal = converted.ordinal + + elif is_null_datetimelike(value) or value in tslib._nat_strings: + ordinal = tslib.iNaT + + elif isinstance(value, compat.string_types) or lib.is_integer(value): + if lib.is_integer(value): + value = str(value) + value = value.upper() + dt, _, reso = parse_time_string(value, freq) + + if freq is None: + try: + freq = frequencies.Resolution.get_freq(reso) + except KeyError: + raise ValueError("Invalid frequency or could not infer: %s" % reso) + + elif isinstance(value, datetime): + dt = value + if freq is None: + raise ValueError('Must supply freq for datetime value') + elif isinstance(value, np.datetime64): + dt = Timestamp(value) + if freq is None: + raise ValueError('Must supply freq for datetime value') + elif isinstance(value, date): + dt = datetime(year=value.year, month=value.month, day=value.day) + if freq is None: + raise ValueError('Must supply freq for datetime value') + else: + msg = "Value must be Period, string, integer, or datetime" + raise ValueError(msg) + + if ordinal is None: + base, mult = frequencies.get_freq_code(freq) + ordinal = get_period_ordinal(dt.year, dt.month, dt.day, + dt.hour, dt.minute, dt.second, + dt.microsecond, 0, base) + + return cls._from_ordinal(ordinal, freq) + + +def _ordinal_from_fields(year, month, quarter, day, + hour, minute, second, freq): base, mult = frequencies.get_freq_code(freq) if quarter is not None: year, month = _quarter_to_myear(year, quarter, freq) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 9eba481a66685..5c21f71d64660 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -741,14 +741,7 @@ def test_astype(self): result = idx.astype(object) expected = Index([Period('2016-05-16', freq='D')] + [Period(NaT, freq='D')] * 3, dtype='object') - # Hack because of lack of support for Period null checking (GH12759) - tm.assert_index_equal(result[:1], expected[:1]) - result_arr = np.asarray([p.ordinal for p in result], dtype=np.int64) - expected_arr = np.asarray([p.ordinal for p in expected], - dtype=np.int64) - tm.assert_numpy_array_equal(result_arr, expected_arr) - # TODO: When GH12759 is resolved, change the above hack to: - # tm.assert_index_equal(result, expected) # now, it raises. + tm.assert_index_equal(result, expected) result = idx.astype(int) expected = Int64Index([16937] + [-9223372036854775808] * 3, diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 45f634050a5d8..dffb71cff526a 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -92,13 +92,14 @@ def wrapper(self, other): result[mask] = nat_result return result + elif other is tslib.NaT: + result = np.empty(len(self.values), dtype=bool) + result.fill(nat_result) else: other = Period(other, freq=self.freq) func = getattr(self.values, opname) result = func(other.ordinal) - if other.ordinal == tslib.iNaT: - result.fill(nat_result) mask = self.values == tslib.iNaT if mask.any(): result[mask] = nat_result @@ -235,7 +236,7 @@ def _from_arraylike(cls, data, freq, tz): data = _ensure_int64(data) if freq is None: raise ValueError('freq not specified') - data = np.array([Period(x, freq=freq).ordinal for x in data], + data = np.array([Period(x, freq=freq) for x in data], dtype=np.int64) except (TypeError, ValueError): data = _ensure_object(data) @@ -322,15 +323,18 @@ def _na_value(self): return self._box_func(tslib.iNaT) def __contains__(self, key): - if not isinstance(key, Period) or key.freq != self.freq: - if isinstance(key, compat.string_types): - try: - self.get_loc(key) - return True - except Exception: - return False + if isinstance(key, Period): + if key.freq != self.freq: + return False + else: + return key.ordinal in self._engine + else: + try: + self.get_loc(key) + return True + except Exception: + return False return False - return key.ordinal in self._engine def __array_wrap__(self, result, context=None): """ @@ -622,17 +626,13 @@ def _sub_period(self, other): msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - if other.ordinal == tslib.iNaT: - new_data = np.empty(len(self)) - new_data.fill(np.nan) - else: - asi8 = self.asi8 - new_data = asi8 - other.ordinal + asi8 = self.asi8 + new_data = asi8 - other.ordinal - if self.hasnans: - mask = asi8 == tslib.iNaT - new_data = new_data.astype(np.float64) - new_data[mask] = np.nan + if self.hasnans: + mask = asi8 == tslib.iNaT + new_data = new_data.astype(np.float64) + new_data[mask] = np.nan # result must be Int64Index or Float64Index return Index(new_data, name=self.name) @@ -740,8 +740,10 @@ def get_loc(self, key, method=None, tolerance=None): # we cannot construct the Period # as we have an invalid type raise KeyError(key) + try: - return Index.get_loc(self, key.ordinal, method, tolerance) + ordinal = tslib.iNaT if key is tslib.NaT else key.ordinal + return Index.get_loc(self, ordinal, method, tolerance) except KeyError: raise KeyError(key) @@ -1044,8 +1046,7 @@ def _get_ordinal_range(start, end, periods, freq, mult=1): if is_start_per and is_end_per and start.freq != end.freq: raise ValueError('Start and end must have same freq') - if ((is_start_per and start.ordinal == tslib.iNaT) or - (is_end_per and end.ordinal == tslib.iNaT)): + if (start is tslib.NaT or end is tslib.NaT): raise ValueError('Start and end must not be NaT') if freq is None: diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 68cea17ba3fc9..958a10c329a46 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -1587,17 +1587,16 @@ def test_asobject_tolist(self): result = idx.asobject self.assertTrue(isinstance(result, Index)) self.assertEqual(result.dtype, object) + tm.assert_index_equal(result, expected) for i in [0, 1, 3]: - self.assertTrue(result[i], expected[i]) - self.assertTrue(result[2].ordinal, pd.tslib.iNaT) - self.assertTrue(result[2].freq, 'D') + self.assertEqual(result[i], expected[i]) + self.assertIs(result[2], pd.NaT) self.assertEqual(result.name, expected.name) result_list = idx.tolist() for i in [0, 1, 3]: - self.assertTrue(result_list[i], expected_list[i]) - self.assertTrue(result_list[2].ordinal, pd.tslib.iNaT) - self.assertTrue(result_list[2].freq, 'D') + self.assertEqual(result_list[i], expected_list[i]) + self.assertIs(result_list[2], pd.NaT) def test_minmax(self): @@ -1623,18 +1622,15 @@ def test_minmax(self): # Return NaT obj = PeriodIndex([], freq='M') result = getattr(obj, op)() - self.assertEqual(result.ordinal, tslib.iNaT) - self.assertEqual(result.freq, 'M') + self.assertIs(result, tslib.NaT) obj = PeriodIndex([pd.NaT], freq='M') result = getattr(obj, op)() - self.assertEqual(result.ordinal, tslib.iNaT) - self.assertEqual(result.freq, 'M') + self.assertIs(result, tslib.NaT) obj = PeriodIndex([pd.NaT, pd.NaT, pd.NaT], freq='M') result = getattr(obj, op)() - self.assertEqual(result.ordinal, tslib.iNaT) - self.assertEqual(result.freq, 'M') + self.assertIs(result, tslib.NaT) def test_numpy_minmax(self): pr = pd.period_range(start='2016-01-15', end='2016-01-20') @@ -1735,9 +1731,9 @@ def test_representation_to_series(self): 2 2013 dtype: object""" - exp6 = """0 2011-01-01 09:00 -1 2012-02-01 10:00 -2 NaT + exp6 = """0 2011-01-01 09:00 +1 2012-02-01 10:00 +2 NaT dtype: object""" exp7 = """0 2013Q1 diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 591fa19aad585..fd5939909746f 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -36,14 +36,17 @@ def test_quarterly_negative_ordinals(self): p = Period(ordinal=-1, freq='Q-DEC') self.assertEqual(p.year, 1969) self.assertEqual(p.quarter, 4) + self.assertIsInstance(p, Period) p = Period(ordinal=-2, freq='Q-DEC') self.assertEqual(p.year, 1969) self.assertEqual(p.quarter, 3) + self.assertIsInstance(p, Period) p = Period(ordinal=-2, freq='M') self.assertEqual(p.year, 1969) self.assertEqual(p.month, 11) + self.assertIsInstance(p, Period) def test_period_cons_quarterly(self): # bugs in scikits.timeseries @@ -67,6 +70,7 @@ def test_period_cons_annual(self): stamp = exp.to_timestamp('D', how='end') + timedelta(days=30) p = Period(stamp, freq=freq) self.assertEqual(p, exp + 1) + self.assertIsInstance(p, Period) def test_period_cons_weekly(self): for num in range(10, 17): @@ -77,34 +81,46 @@ def test_period_cons_weekly(self): result = Period(daystr, freq=freq) expected = Period(daystr, freq='D').asfreq(freq) self.assertEqual(result, expected) + self.assertIsInstance(result, Period) + + def test_period_from_ordinal(self): + p = pd.Period('2011-01', freq='M') + res = pd.Period._from_ordinal(p.ordinal, freq='M') + self.assertEqual(p, res) + self.assertIsInstance(res, Period) def test_period_cons_nat(self): p = Period('NaT', freq='M') - self.assertEqual(p.ordinal, tslib.iNaT) - self.assertEqual(p.freq, 'M') - self.assertEqual((p + 1).ordinal, tslib.iNaT) - self.assertEqual((1 + p).ordinal, tslib.iNaT) + self.assertIs(p, pd.NaT) p = Period('nat', freq='W-SUN') - self.assertEqual(p.ordinal, tslib.iNaT) - self.assertEqual(p.freq, 'W-SUN') - self.assertEqual((p + 1).ordinal, tslib.iNaT) - self.assertEqual((1 + p).ordinal, tslib.iNaT) + self.assertIs(p, pd.NaT) p = Period(tslib.iNaT, freq='D') - self.assertEqual(p.ordinal, tslib.iNaT) - self.assertEqual(p.freq, 'D') - self.assertEqual((p + 1).ordinal, tslib.iNaT) - self.assertEqual((1 + p).ordinal, tslib.iNaT) + self.assertIs(p, pd.NaT) p = Period(tslib.iNaT, freq='3D') - self.assertEqual(p.ordinal, tslib.iNaT) - self.assertEqual(p.freq, offsets.Day(3)) - self.assertEqual(p.freqstr, '3D') - self.assertEqual((p + 1).ordinal, tslib.iNaT) - self.assertEqual((1 + p).ordinal, tslib.iNaT) + self.assertIs(p, pd.NaT) + + p = Period('NaT') + self.assertIs(p, pd.NaT) + + p = Period(tslib.iNaT) + self.assertIs(p, pd.NaT) + + def test_cons_null_like(self): + # check Timestamp compat + self.assertIs(Timestamp('NaT'), pd.NaT) + self.assertIs(Period('NaT'), pd.NaT) + + self.assertIs(Timestamp(None), pd.NaT) + self.assertIs(Period(None), pd.NaT) - self.assertRaises(ValueError, Period, 'NaT') + self.assertIs(Timestamp(float('nan')), pd.NaT) + self.assertIs(Period(float('nan')), pd.NaT) + + self.assertIs(Timestamp(np.nan), pd.NaT) + self.assertIs(Period(np.nan), pd.NaT) def test_period_cons_mult(self): p1 = Period('2011-01', freq='3M') @@ -197,13 +213,6 @@ def test_timestamp_tz_arg_dateutil_from_string(self): freq='M').to_timestamp(tz='dateutil/Europe/Brussels') self.assertEqual(p.tz, gettz('Europe/Brussels')) - def test_timestamp_nat_tz(self): - t = Period('NaT', freq='M').to_timestamp() - self.assertTrue(t is tslib.NaT) - - t = Period('NaT', freq='M').to_timestamp(tz='Asia/Tokyo') - self.assertTrue(t is tslib.NaT) - def test_timestamp_mult(self): p = pd.Period('2011-01', freq='M') self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) @@ -213,12 +222,6 @@ def test_timestamp_mult(self): self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-03-31')) - def test_timestamp_nat_mult(self): - for freq in ['M', '3M']: - p = pd.Period('NaT', freq=freq) - self.assertTrue(p.to_timestamp(how='S') is pd.NaT) - self.assertTrue(p.to_timestamp(how='E') is pd.NaT) - def test_period_constructor(self): i1 = Period('1/1/2005', freq='M') i2 = Period('Jan 2005') @@ -552,9 +555,6 @@ def _ex(p): result = p.to_timestamp('5S', how='start') self.assertEqual(result, expected) - p = Period('NaT', freq='W') - self.assertTrue(p.to_timestamp() is tslib.NaT) - def test_start_time(self): freq_lst = ['A', 'Q', 'M', 'D', 'H', 'T', 'S'] xp = datetime(2012, 1, 1) @@ -566,9 +566,6 @@ def test_start_time(self): self.assertEqual(Period('2012', freq='W').start_time, datetime(2011, 12, 26)) - p = Period('NaT', freq='W') - self.assertTrue(p.start_time is tslib.NaT) - def test_end_time(self): p = Period('2012', freq='A') @@ -607,9 +604,6 @@ def _ex(*args): xp = _ex(2012, 1, 16) self.assertEqual(xp, p.end_time) - p = Period('NaT', freq='W') - self.assertTrue(p.end_time is tslib.NaT) - def test_anchor_week_end_time(self): def _ex(*args): return Timestamp(Timestamp(datetime(*args)).value - 1) @@ -758,15 +752,14 @@ def test_properties_secondly(self): def test_properties_nat(self): p_nat = Period('NaT', freq='M') t_nat = pd.Timestamp('NaT') + self.assertIs(p_nat, t_nat) + # confirm Period('NaT') work identical with Timestamp('NaT') for f in ['year', 'month', 'day', 'hour', 'minute', 'second', 'week', 'dayofyear', 'quarter', 'days_in_month']: self.assertTrue(np.isnan(getattr(p_nat, f))) self.assertTrue(np.isnan(getattr(t_nat, f))) - for f in ['weekofyear', 'dayofweek', 'weekday', 'qyear']: - self.assertTrue(np.isnan(getattr(p_nat, f))) - def test_pnow(self): dt = datetime.now() @@ -789,7 +782,7 @@ def test_constructor_corner(self): self.assertRaises(ValueError, Period, 1.6, freq='D') self.assertRaises(ValueError, Period, ordinal=1.6, freq='D') self.assertRaises(ValueError, Period, ordinal=2, value=1, freq='D') - self.assertRaises(ValueError, Period) + self.assertIs(Period(None), pd.NaT) self.assertRaises(ValueError, Period, month=1) p = Period('2007-01-01', freq='D') @@ -1526,12 +1519,6 @@ def test_conv_secondly(self): self.assertEqual(ival_S.asfreq('S'), ival_S) - def test_asfreq_nat(self): - p = Period('NaT', freq='A') - result = p.asfreq('M') - self.assertEqual(result.ordinal, tslib.iNaT) - self.assertEqual(result.freq, 'M') - def test_asfreq_mult(self): # normal freq to mult freq p = Period(freq='A', year=2007) @@ -1603,21 +1590,6 @@ def test_asfreq_mult(self): self.assertEqual(result.ordinal, expected.ordinal) self.assertEqual(result.freq, expected.freq) - def test_asfreq_mult_nat(self): - # normal freq to mult freq - for p in [Period('NaT', freq='A'), Period('NaT', freq='3A'), - Period('NaT', freq='2M'), Period('NaT', freq='3D')]: - for freq in ['3A', offsets.YearEnd(3)]: - result = p.asfreq(freq) - expected = Period('NaT', freq='3A') - self.assertEqual(result.ordinal, pd.tslib.iNaT) - self.assertEqual(result.freq, expected.freq) - - result = p.asfreq(freq, how='S') - expected = Period('NaT', freq='3A') - self.assertEqual(result.ordinal, pd.tslib.iNaT) - self.assertEqual(result.freq, expected.freq) - class TestPeriodIndex(tm.TestCase): def setUp(self): @@ -1995,6 +1967,19 @@ def test_getitem_datetime(self): rs = ts[dt1:dt4] tm.assert_series_equal(rs, ts) + def test_getitem_nat(self): + idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') + self.assertEqual(idx[0], pd.Period('2011-01', freq='M')) + self.assertIs(idx[1], tslib.NaT) + + s = pd.Series([0, 1, 2], index=idx) + self.assertEqual(s[pd.NaT], 1) + + s = pd.Series(idx, index=idx) + self.assertEqual(s[pd.Period('2011-01', freq='M')], + pd.Period('2011-01', freq='M')) + self.assertIs(s[pd.NaT], tslib.NaT) + def test_slice_with_negative_step(self): ts = Series(np.arange(20), period_range('2014-01', periods=20, freq='M')) @@ -2038,6 +2023,19 @@ def test_contains(self): self.assertFalse(Period('2007-01', freq='D') in rng) self.assertFalse(Period('2007-01', freq='2M') in rng) + def test_contains_nat(self): + idx = period_range('2007-01', freq='M', periods=10) + self.assertFalse(pd.NaT in idx) + self.assertFalse(None in idx) + self.assertFalse(float('nan') in idx) + self.assertFalse(np.nan in idx) + + idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') + self.assertTrue(pd.NaT in idx) + self.assertTrue(None in idx) + self.assertTrue(float('nan') in idx) + self.assertTrue(np.nan in idx) + def test_sub(self): rng = period_range('2007-01', periods=50) @@ -3292,6 +3290,17 @@ def test_get_loc_msg(self): except KeyError as inst: self.assertEqual(inst.args[0], bad_period) + def test_get_loc_nat(self): + didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03']) + pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M') + + # check DatetimeIndex compat + for idx in [didx, pidx]: + self.assertEqual(idx.get_loc(pd.NaT), 1) + self.assertEqual(idx.get_loc(None), 1) + self.assertEqual(idx.get_loc(float('nan')), 1) + self.assertEqual(idx.get_loc(np.nan), 1) + def test_append_concat(self): # #1815 d1 = date_range('12/31/1990', '12/31/1999', freq='A-DEC') @@ -3576,95 +3585,87 @@ def test_add_offset_nat(self): for freq in ['A', '2A', '3A']: p = Period('NaT', freq=freq) for o in [offsets.YearEnd(2)]: - self.assertEqual((p + o).ordinal, tslib.iNaT) - self.assertEqual((o + p).ordinal, tslib.iNaT) + self.assertIs(p + o, tslib.NaT) + self.assertIs(o + p, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o + self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): with tm.assertRaises(TypeError): o + p else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p + self.assertIs(o + p, tslib.NaT) for freq in ['M', '2M', '3M']: p = Period('NaT', freq=freq) for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: - self.assertEqual((p + o).ordinal, tslib.iNaT) + self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): with tm.assertRaises(TypeError): o + p else: - self.assertEqual((o + p).ordinal, tslib.iNaT) + self.assertIs(o + p, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o + self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): with tm.assertRaises(TypeError): o + p else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p + self.assertIs(o + p, tslib.NaT) + # freq is Tick for freq in ['D', '2D', '3D']: p = Period('NaT', freq=freq) for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), np.timedelta64(3600 * 24, 's'), timedelta(-2), timedelta(hours=48)]: - self.assertEqual((p + o).ordinal, tslib.iNaT) + self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): with tm.assertRaises(TypeError): o + p else: - self.assertEqual((o + p).ordinal, tslib.iNaT) + self.assertIs(o + p, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(4, 'h'), timedelta(hours=23)]: - - with tm.assertRaises(period.IncompatibleFrequency): - p + o + self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): with tm.assertRaises(TypeError): o + p else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p + self.assertIs(o + p, tslib.NaT) for freq in ['H', '2H', '3H']: p = Period('NaT', freq=freq) for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), np.timedelta64(3600, 's'), timedelta(minutes=120), timedelta(days=4, minutes=180)]: - self.assertEqual((p + o).ordinal, tslib.iNaT) + self.assertIs(p + o, tslib.NaT) if not isinstance(o, np.timedelta64): - self.assertEqual((o + p).ordinal, tslib.iNaT) + self.assertIs(o + p, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(3200, 's'), timedelta(hours=23, minutes=30)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o + self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): with tm.assertRaises(TypeError): o + p else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p + self.assertIs(o + p, tslib.NaT) def test_sub_pdnat(self): # GH 13071 @@ -3749,24 +3750,22 @@ def test_sub_offset_nat(self): for freq in ['A', '2A', '3A']: p = Period('NaT', freq=freq) for o in [offsets.YearEnd(2)]: - self.assertEqual((p - o).ordinal, tslib.iNaT) + self.assertIs(p - o, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o + self.assertIs(p - o, tslib.NaT) for freq in ['M', '2M', '3M']: p = Period('NaT', freq=freq) for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: - self.assertEqual((p - o).ordinal, tslib.iNaT) + self.assertIs(p - o, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o + self.assertIs(p - o, tslib.NaT) # freq is Tick for freq in ['D', '2D', '3D']: @@ -3774,37 +3773,33 @@ def test_sub_offset_nat(self): for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), np.timedelta64(3600 * 24, 's'), timedelta(-2), timedelta(hours=48)]: - self.assertEqual((p - o).ordinal, tslib.iNaT) + self.assertIs(p - o, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(4, 'h'), timedelta(hours=23)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o + self.assertIs(p - o, tslib.NaT) for freq in ['H', '2H', '3H']: p = Period('NaT', freq=freq) for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), np.timedelta64(3600, 's'), timedelta(minutes=120), timedelta(days=4, minutes=180)]: - self.assertEqual((p - o).ordinal, tslib.iNaT) + self.assertIs(p - o, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(3200, 's'), timedelta(hours=23, minutes=30)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o + self.assertIs(p - o, tslib.NaT) def test_nat_ops(self): for freq in ['M', '2M', '3M']: p = Period('NaT', freq=freq) - self.assertEqual((p + 1).ordinal, tslib.iNaT) - self.assertEqual((1 + p).ordinal, tslib.iNaT) - self.assertEqual((p - 1).ordinal, tslib.iNaT) - self.assertEqual((p - Period('2011-01', freq=freq)).ordinal, - tslib.iNaT) - self.assertEqual((Period('2011-01', freq=freq) - p).ordinal, - tslib.iNaT) + self.assertIs(p + 1, tslib.NaT) + self.assertIs(1 + p, tslib.NaT) + self.assertIs(p - 1, tslib.NaT) + self.assertIs(p - Period('2011-01', freq=freq), tslib.NaT) + self.assertIs(Period('2011-01', freq=freq) - p, tslib.NaT) def test_period_ops_offset(self): p = Period('2011-04-01', freq='D') @@ -3830,18 +3825,17 @@ class TestPeriodIndexSeriesMethods(tm.TestCase): def _check(self, values, func, expected): idx = pd.PeriodIndex(values) result = func(idx) - tm.assert_index_equal(result, pd.PeriodIndex(expected)) + if isinstance(expected, pd.Index): + tm.assert_index_equal(result, expected) + else: + # comp op results in bool + tm.assert_numpy_array_equal(result, expected) s = pd.Series(values) result = func(s) - exp = pd.Series(expected) - # Period(NaT) != Period(NaT) - - lmask = result.map(lambda x: x.ordinal != tslib.iNaT) - rmask = exp.map(lambda x: x.ordinal != tslib.iNaT) - tm.assert_series_equal(lmask, rmask) - tm.assert_series_equal(result[lmask], exp[rmask]) + exp = pd.Series(expected, name=values.name) + tm.assert_series_equal(result, exp) def test_pi_ops(self): idx = PeriodIndex(['2011-01', '2011-02', '2011-03', @@ -3962,7 +3956,7 @@ def test_pi_sub_period(self): exp = pd.Index([12, 11, 10, 9], name='idx') tm.assert_index_equal(result, exp) - exp = pd.Index([np.nan, np.nan, np.nan, np.nan], name='idx') + exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) @@ -3987,10 +3981,82 @@ def test_pi_sub_period_nat(self): exp = pd.Index([12, np.nan, 10, 9], name='idx') tm.assert_index_equal(result, exp) - exp = pd.Index([np.nan, np.nan, np.nan, np.nan], name='idx') + exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) + def test_pi_comp_period(self): + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', + '2011-04'], freq='M', name='idx') + + f = lambda x: x == pd.Period('2011-03', freq='M') + exp = np.array([False, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') == x + self._check(idx, f, exp) + + f = lambda x: x != pd.Period('2011-03', freq='M') + exp = np.array([True, True, False, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') != x + self._check(idx, f, exp) + + f = lambda x: pd.Period('2011-03', freq='M') >= x + exp = np.array([True, True, True, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x > pd.Period('2011-03', freq='M') + exp = np.array([False, False, False, True], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: pd.Period('2011-03', freq='M') >= x + exp = np.array([True, True, True, False], dtype=np.bool) + self._check(idx, f, exp) + + def test_pi_comp_period_nat(self): + idx = PeriodIndex(['2011-01', 'NaT', '2011-03', + '2011-04'], freq='M', name='idx') + + f = lambda x: x == pd.Period('2011-03', freq='M') + exp = np.array([False, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') == x + self._check(idx, f, exp) + + f = lambda x: x == tslib.NaT + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: tslib.NaT == x + self._check(idx, f, exp) + + f = lambda x: x != pd.Period('2011-03', freq='M') + exp = np.array([True, True, False, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') != x + self._check(idx, f, exp) + + f = lambda x: x != tslib.NaT + exp = np.array([True, True, True, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: tslib.NaT != x + self._check(idx, f, exp) + + f = lambda x: pd.Period('2011-03', freq='M') >= x + exp = np.array([True, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x < pd.Period('2011-03', freq='M') + exp = np.array([True, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x > tslib.NaT + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: tslib.NaT >= x + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + class TestPeriodRepresentation(tm.TestCase): """ diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index ce88edcf4249b..7e97632ddfef3 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -1197,6 +1197,13 @@ def test_nat_arithmetic(self): self.assertIs(left - right, pd.NaT) self.assertIs(right - left, pd.NaT) + # int addition / subtraction + for (left, right) in [(pd.NaT, 2), (pd.NaT, 0), (pd.NaT, -3)]: + self.assertIs(right + left, pd.NaT) + self.assertIs(left + right, pd.NaT) + self.assertIs(left - right, pd.NaT) + self.assertIs(right - left, pd.NaT) + def test_nat_arithmetic_index(self): # GH 11718 diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 650b4c7979d8d..61955747b6d78 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -1082,7 +1082,10 @@ cdef class _Timestamp(datetime): return Timestamp(self.value + other_int, tz=self.tzinfo, freq=self.freq) elif is_integer_object(other): - if self.freq is None: + if self is NaT: + # to be compat with Period + return NaT + elif self.freq is None: raise ValueError("Cannot add integral value to Timestamp " "without freq.") return Timestamp((self.freq * other).apply(self), freq=self.freq)