diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 18ae412c9711d..c6144619da963 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -130,7 +130,7 @@ Enhancements - All offsets ``apply``, ``rollforward`` and ``rollback`` can now handle ``np.datetime64``, previously results in ``ApplyTypeError`` (:issue:`7452`) - +- ``Period`` and ``PeriodIndex`` can contain ``NaT`` in its values (:issue:`7485`) .. _whatsnew_0141.performance: @@ -239,6 +239,9 @@ Bug Fixes - Bug in passing input with ``tzinfo`` to some offsets ``apply``, ``rollforward`` or ``rollback`` resets ``tzinfo`` or raises ``ValueError`` (:issue:`7465`) +- Bug in ``DatetimeIndex.to_period``, ``PeriodIndex.asobject``, ``PeriodIndex.to_timestamp`` doesn't preserve ``name`` (:issue:`7485`) +- Bug in ``DatetimeIndex.to_period`` and ``PeriodIndex.to_timestanp`` handle ``NaT`` incorrectly (:issue:`7228`) + - BUG in ``resample`` raises ``ValueError`` when target contains ``NaT`` (:issue:`7227`) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 87c1742c54b01..16468f24a0ee1 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -809,7 +809,7 @@ def to_period(self, freq=None): if freq is None: freq = get_period_alias(self.freqstr) - return PeriodIndex(self.values, freq=freq, tz=self.tz) + return PeriodIndex(self.values, name=self.name, freq=freq, tz=self.tz) def order(self, return_indexer=False, ascending=True): """ diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 31785bb7a6753..c44c3c9272f6a 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -102,6 +102,12 @@ def __init__(self, value=None, freq=None, ordinal=None, converted = other.asfreq(freq) self.ordinal = converted.ordinal + elif com._is_null_datelike_scalar(value) or value in tslib._nat_strings: + self.ordinal = tslib.iNaT + if freq is None: + raise ValueError("If value is NaT, freq cannot be None " + "because it cannot be inferred") + elif isinstance(value, compat.string_types) or com.is_integer(value): if com.is_integer(value): value = str(value) @@ -136,6 +142,8 @@ def __eq__(self, other): if isinstance(other, Period): if other.freq != self.freq: raise ValueError("Cannot compare non-conforming periods") + if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: + return False return (self.ordinal == other.ordinal and _gfc(self.freq) == _gfc(other.freq)) return NotImplemented @@ -148,26 +156,38 @@ def __hash__(self): def __add__(self, other): if com.is_integer(other): - return Period(ordinal=self.ordinal + other, freq=self.freq) + if self.ordinal == tslib.iNaT: + ordinal = self.ordinal + else: + ordinal = self.ordinal + other + return Period(ordinal=ordinal, freq=self.freq) else: # pragma: no cover - raise TypeError(other) + return NotImplemented def __sub__(self, other): if com.is_integer(other): - return Period(ordinal=self.ordinal - other, freq=self.freq) + if self.ordinal == tslib.iNaT: + ordinal = self.ordinal + else: + ordinal = self.ordinal - other + return Period(ordinal=ordinal, freq=self.freq) if isinstance(other, Period): if other.freq != self.freq: raise ValueError("Cannot do arithmetic with " "non-conforming periods") + if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: + return Period(ordinal=tslib.iNaT, freq=self.freq) return self.ordinal - other.ordinal else: # pragma: no cover - raise TypeError(other) + return NotImplemented def _comp_method(func, name): def f(self, other): if isinstance(other, Period): if other.freq != self.freq: raise ValueError("Cannot compare non-conforming periods") + if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: + return False return func(self.ordinal, other.ordinal) else: raise TypeError(other) @@ -213,7 +233,10 @@ def start_time(self): @property def end_time(self): - ordinal = (self + 1).start_time.value - 1 + if self.ordinal == tslib.iNaT: + ordinal = self.ordinal + else: + ordinal = (self + 1).start_time.value - 1 return Timestamp(ordinal) def to_timestamp(self, freq=None, how='start', tz=None): @@ -480,6 +503,11 @@ def _period_index_cmp(opname): Wrap comparison operations to convert datetime-like to datetime64 """ def wrapper(self, other): + if opname == '__ne__': + fill_value = True + else: + fill_value = False + if isinstance(other, Period): func = getattr(self.values, opname) if other.freq != self.freq: @@ -489,12 +517,26 @@ def wrapper(self, other): elif isinstance(other, PeriodIndex): if other.freq != self.freq: raise AssertionError("Frequencies must be equal") - return getattr(self.values, opname)(other.values) + + result = getattr(self.values, opname)(other.values) + + mask = (com.mask_missing(self.values, tslib.iNaT) | + com.mask_missing(other.values, tslib.iNaT)) + if mask.any(): + result[mask] = fill_value + + return result else: other = Period(other, freq=self.freq) func = getattr(self.values, opname) result = func(other.ordinal) + if other.ordinal == tslib.iNaT: + result.fill(fill_value) + mask = self.values == tslib.iNaT + if mask.any(): + result[mask] = fill_value + return result return wrapper @@ -712,7 +754,7 @@ def asof_locs(self, where, mask): @property def asobject(self): - return Index(self._box_values(self.values), dtype=object) + return Index(self._box_values(self.values), name=self.name, dtype=object) def _array_values(self): return self.asobject @@ -768,11 +810,7 @@ def asfreq(self, freq=None, how='E'): end = how == 'E' new_data = tslib.period_asfreq_arr(self.values, base1, base2, end) - - result = new_data.view(PeriodIndex) - result.name = self.name - result.freq = freq - return result + return self._simple_new(new_data, self.name, freq=freq) def to_datetime(self, dayfirst=False): return self.to_timestamp() @@ -868,16 +906,23 @@ def shift(self, n): ------- shifted : PeriodIndex """ - if n == 0: - return self - - return PeriodIndex(data=self.values + n, freq=self.freq) + mask = self.values == tslib.iNaT + values = self.values + n + values[mask] = tslib.iNaT + return PeriodIndex(data=values, name=self.name, freq=self.freq) def __add__(self, other): - return PeriodIndex(ordinal=self.values + other, freq=self.freq) + try: + return self.shift(other) + except TypeError: + # self.values + other raises TypeError for invalid input + return NotImplemented def __sub__(self, other): - return PeriodIndex(ordinal=self.values - other, freq=self.freq) + try: + return self.shift(-other) + except TypeError: + return NotImplemented @property def inferred_type(self): @@ -1207,8 +1252,11 @@ def _get_ordinal_range(start, end, periods, freq): is_start_per = isinstance(start, Period) is_end_per = isinstance(end, Period) - if is_start_per and is_end_per and (start.freq != end.freq): + if is_start_per and is_end_per and start.freq != end.freq: raise ValueError('Start and end must have same freq') + if ((is_start_per and start.ordinal == tslib.iNaT) or + (is_end_per and end.ordinal == tslib.iNaT)): + raise ValueError('Start and end must not be NaT') if freq is None: if is_start_per: diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index d58621b320a84..af39bba8e43af 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -24,6 +24,7 @@ from pandas.compat import range, lrange, lmap, zip from pandas import Series, TimeSeries, DataFrame, _np_version_under1p9 +from pandas import tslib from pandas.util.testing import(assert_series_equal, assert_almost_equal, assertRaisesRegexp) import pandas.util.testing as tm @@ -77,6 +78,21 @@ def test_period_cons_weekly(self): expected = Period(daystr, freq='D').asfreq(freq) self.assertEqual(result, expected) + def test_period_cons_nat(self): + p = Period('NaT', freq='M') + self.assertEqual(p.ordinal, tslib.iNaT) + self.assertEqual(p.freq, 'M') + + p = Period('nat', freq='W-SUN') + self.assertEqual(p.ordinal, tslib.iNaT) + self.assertEqual(p.freq, 'W-SUN') + + p = Period(tslib.iNaT, freq='D') + self.assertEqual(p.ordinal, tslib.iNaT) + self.assertEqual(p.freq, 'D') + + self.assertRaises(ValueError, Period, 'NaT') + def test_timestamp_tz_arg(self): import pytz p = Period('1/1/2005', freq='M').to_timestamp(tz='Europe/Brussels') @@ -94,6 +110,13 @@ def test_timestamp_tz_arg_dateutil_from_string(self): p = Period('1/1/2005', freq='M').to_timestamp(tz='dateutil/Europe/Brussels') self.assertEqual(p.tz, dateutil.tz.gettz('Europe/Brussels')) + def test_timestamp_nat_tz(self): + t = Period('NaT', freq='M').to_timestamp() + self.assertTrue(t is tslib.NaT) + + t = Period('NaT', freq='M').to_timestamp(tz='Asia/Tokyo') + self.assertTrue(t is tslib.NaT) + def test_period_constructor(self): i1 = Period('1/1/2005', freq='M') i2 = Period('Jan 2005') @@ -219,6 +242,10 @@ def test_repr(self): p = Period('2000-12-15') self.assertIn('2000-12-15', repr(p)) + def test_repr_nat(self): + p = Period('nat', freq='M') + self.assertIn(repr(tslib.NaT), repr(p)) + def test_millisecond_repr(self): p = Period('2000-01-01 12:15:02.123') @@ -296,6 +323,9 @@ def _ex(p): assertRaisesRegexp(ValueError, 'Only mult == 1', p.to_timestamp, '5t') + p = Period('NaT', freq='W') + self.assertTrue(p.to_timestamp() is tslib.NaT) + def test_start_time(self): freq_lst = ['A', 'Q', 'M', 'D', 'H', 'T', 'S'] xp = datetime(2012, 1, 1) @@ -307,6 +337,9 @@ def test_start_time(self): self.assertEqual(Period('2012', freq='W').start_time, datetime(2011, 12, 26)) + p = Period('NaT', freq='W') + self.assertTrue(p.start_time is tslib.NaT) + def test_end_time(self): p = Period('2012', freq='A') @@ -338,6 +371,9 @@ def _ex(*args): xp = _ex(2012, 1, 2) self.assertEqual(Period('2012', freq='W').end_time, xp) + p = Period('NaT', freq='W') + self.assertTrue(p.end_time is tslib.NaT) + def test_anchor_week_end_time(self): def _ex(*args): return Timestamp(Timestamp(datetime(*args)).value - 1) @@ -449,6 +485,18 @@ def test_properties_secondly(self): assert_equal(s_date.minute, 0) assert_equal(s_date.second, 0) + def test_properties_nat(self): + p_nat = Period('NaT', freq='M') + t_nat = pd.Timestamp('NaT') + # confirm Period('NaT') work identical with Timestamp('NaT') + for f in ['year', 'month', 'day', 'hour', 'minute', 'second', + 'week', 'dayofyear', 'quarter']: + self.assertEqual(getattr(p_nat, f), -1) + self.assertEqual(getattr(t_nat, f), -1) + + for f in ['weekofyear', 'dayofweek', 'weekday', 'qyear']: + self.assertEqual(getattr(p_nat, f), -1) + def test_pnow(self): dt = datetime.now() @@ -1084,6 +1132,12 @@ def test_conv_secondly(self): assert_equal(ival_S.asfreq('S'), ival_S) + def test_asfreq_nat(self): + p = Period('NaT', freq='A') + result = p.asfreq('M') + self.assertEqual(result.ordinal, tslib.iNaT) + self.assertEqual(result.freq, 'M') + class TestPeriodIndex(tm.TestCase): @@ -1213,6 +1267,12 @@ def test_constructor_simple_new(self): result = idx._simple_new(idx.astype('i8'), 'p', freq=idx.freq) self.assertTrue(result.equals(idx)) + def test_constructor_nat(self): + self.assertRaises( + ValueError, period_range, start='NaT', end='2011-01-01', freq='M') + self.assertRaises( + ValueError, period_range, start='2011-01-01', end='NaT', freq='M') + def test_is_(self): create_index = lambda: PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') @@ -1374,6 +1434,32 @@ def test_to_timestamp_repr_is_code(self): for z in zs: self.assertEqual( eval(repr(z)), z) + def test_to_timestamp_period_nat(self): + # GH 7228 + index = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx') + + result = index.to_timestamp('D') + expected = DatetimeIndex([pd.NaT, datetime(2011, 1, 1), + datetime(2011, 2, 1)], name='idx') + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, 'idx') + + result2 = result.to_period(freq='M') + self.assertTrue(result2.equals(index)) + self.assertEqual(result2.name, 'idx') + + def test_asobject_period_nat(self): + index = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx') + + result = index.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assertTrue(isinstance(result[0], Period)) + self.assertEqual(result[0].ordinal, tslib.iNaT) + self.assertEqual(result[1], Period('2011-01', freq='M')) + self.assertEqual(result[2], Period('2011-02', freq='M')) + self.assertEqual(result.name, 'idx') + def test_as_frame_columns(self): rng = period_range('1/1/2000', periods=5) df = DataFrame(randn(10, 5), columns=rng) @@ -1649,6 +1735,13 @@ def test_shift(self): assert_equal(len(pi1), len(pi2)) assert_equal(pi1.shift(-1).values, pi2.values) + def test_shift_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M', name='idx') + result = idx.shift(1) + expected = PeriodIndex(['2011-02', '2011-03', 'NaT', '2011-05'], freq='M', name='idx') + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + def test_asfreq(self): pi1 = PeriodIndex(freq='A', start='1/1/2001', end='1/1/2001') pi2 = PeriodIndex(freq='Q', start='1/1/2001', end='1/1/2001') @@ -1711,6 +1804,12 @@ def test_asfreq(self): self.assertRaises(ValueError, pi7.asfreq, 'T', 'foo') self.assertRaises(ValueError, pi1.asfreq, '5t') + def test_asfreq_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M') + result = idx.asfreq(freq='Q') + expected = PeriodIndex(['2011Q1', '2011Q1', 'NaT', '2011Q2'], freq='Q') + self.assertTrue(result.equals(expected)) + def test_ts_repr(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/31/2010') ts = Series(np.random.randn(len(index)), index=index) @@ -1987,6 +2086,11 @@ def test_range_slice_outofbounds(self): tm.assert_frame_equal(df['2013-06':'2013-09'], empty) tm.assert_frame_equal(df['2013-11':'2013-12'], empty) + def test_pindex_fieldaccessor_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2012-03', '2012-04'], freq='D') + self.assert_numpy_array_equal(idx.year, np.array([2011, 2011, -1, 2012, 2012])) + self.assert_numpy_array_equal(idx.month, np.array([1, 2, -1, 3, 4])) + def test_pindex_qaccess(self): pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') s = Series(np.random.rand(len(pi)), index=pi).cumsum() @@ -2382,8 +2486,33 @@ def test_add(self): dt2 = Period(freq='D', year=2008, month=1, day=2) assert_equal(dt1 + 1, dt2) # - self.assertRaises(TypeError, dt1.__add__, "str") - self.assertRaises(TypeError, dt1.__add__, dt2) + # GH 4731 + msg = "unsupported operand type\(s\)" + with tm.assertRaisesRegexp(TypeError, msg): + dt1 + "str" + + with tm.assertRaisesRegexp(TypeError, msg): + dt1 + dt2 + + def test_nat_ops(self): + p = Period('NaT', freq='M') + self.assertEqual((p + 1).ordinal, tslib.iNaT) + self.assertEqual((p - 1).ordinal, tslib.iNaT) + self.assertEqual((p - Period('2011-01', freq='M')).ordinal, tslib.iNaT) + self.assertEqual((Period('2011-01', freq='M') - p).ordinal, tslib.iNaT) + + def test_pi_ops_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M', name='idx') + result = idx + 2 + expected = PeriodIndex(['2011-03', '2011-04', 'NaT', '2011-06'], freq='M', name='idx') + self.assertTrue(result.equals(expected)) + + result2 = result - 2 + self.assertTrue(result2.equals(idx)) + + msg = "unsupported operand type\(s\)" + with tm.assertRaisesRegexp(TypeError, msg): + idx + "str" class TestPeriodRepresentation(tm.TestCase): @@ -2459,7 +2588,8 @@ def test_equal(self): self.assertEqual(self.january1, self.january2) def test_equal_Raises_Value(self): - self.assertRaises(ValueError, self.january1.__eq__, self.day) + with tm.assertRaises(ValueError): + self.january1 == self.day def test_notEqual(self): self.assertNotEqual(self.january1, 1) @@ -2469,41 +2599,87 @@ def test_greater(self): self.assertTrue(self.february > self.january1) def test_greater_Raises_Value(self): - self.assertRaises(ValueError, self.january1.__gt__, self.day) + with tm.assertRaises(ValueError): + self.january1 > self.day def test_greater_Raises_Type(self): - self.assertRaises(TypeError, self.january1.__gt__, 1) + with tm.assertRaises(TypeError): + self.january1 > 1 def test_greaterEqual(self): self.assertTrue(self.january1 >= self.january2) def test_greaterEqual_Raises_Value(self): - self.assertRaises(ValueError, self.january1.__ge__, self.day) - self.assertRaises(TypeError, self.january1.__ge__, 1) + with tm.assertRaises(ValueError): + self.january1 >= self.day + with tm.assertRaises(TypeError): + print(self.january1 >= 1) def test_smallerEqual(self): self.assertTrue(self.january1 <= self.january2) def test_smallerEqual_Raises_Value(self): - self.assertRaises(ValueError, self.january1.__le__, self.day) + with tm.assertRaises(ValueError): + self.january1 <= self.day def test_smallerEqual_Raises_Type(self): - self.assertRaises(TypeError, self.january1.__le__, 1) + with tm.assertRaises(TypeError): + self.january1 <= 1 def test_smaller(self): self.assertTrue(self.january1 < self.february) def test_smaller_Raises_Value(self): - self.assertRaises(ValueError, self.january1.__lt__, self.day) + with tm.assertRaises(ValueError): + self.january1 < self.day def test_smaller_Raises_Type(self): - self.assertRaises(TypeError, self.january1.__lt__, 1) + with tm.assertRaises(TypeError): + self.january1 < 1 def test_sort(self): periods = [self.march, self.january1, self.february] correctPeriods = [self.january1, self.february, self.march] self.assertEqual(sorted(periods), correctPeriods) + def test_period_nat_comp(self): + p_nat = Period('NaT', freq='D') + p = Period('2011-01-01', freq='D') + + nat = pd.Timestamp('NaT') + t = pd.Timestamp('2011-01-01') + # confirm Period('NaT') work identical with Timestamp('NaT') + for left, right in [(p_nat, p), (p, p_nat), (p_nat, p_nat), + (nat, t), (t, nat), (nat, nat)]: + self.assertEqual(left < right, False) + self.assertEqual(left > right, False) + self.assertEqual(left == right, False) + self.assertEqual(left != right, True) + self.assertEqual(left <= right, False) + self.assertEqual(left >= right, False) + + def test_pi_nat_comp(self): + idx1 = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-05'], freq='M') + + result = idx1 > Period('2011-02', freq='M') + self.assert_numpy_array_equal(result, np.array([False, False, False, True])) + + result = idx1 == Period('NaT', freq='M') + self.assert_numpy_array_equal(result, np.array([False, False, False, False])) + + result = idx1 != Period('NaT', freq='M') + self.assert_numpy_array_equal(result, np.array([True, True, True, True])) + + idx2 = PeriodIndex(['2011-02', '2011-01', '2011-04', 'NaT'], freq='M') + result = idx1 < idx2 + self.assert_numpy_array_equal(result, np.array([True, False, False, False])) + + result = idx1 == idx1 + self.assert_numpy_array_equal(result, np.array([True, True, False, True])) + + result = idx1 != idx1 + self.assert_numpy_array_equal(result, np.array([False, False, True, False])) + if __name__ == '__main__': import nose diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 2e0e8af3f5119..24b1215b949a3 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -3028,6 +3028,9 @@ def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None): if tz is None: for i in range(l): + if dtarr[i] == iNaT: + out[i] = iNaT + continue pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts) out[i] = get_period_ordinal(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) @@ -3049,6 +3052,9 @@ def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq): out = np.empty(l, dtype='i8') for i in range(l): + if periodarr[i] == iNaT: + out[i] = iNaT + continue out[i] = period_ordinal_to_dt64(periodarr[i], freq) return out @@ -3065,6 +3071,9 @@ cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int freq2, cdef: int64_t retval + if period_ordinal == iNaT: + return iNaT + if end: retval = asfreq(period_ordinal, freq1, freq2, END) else: @@ -3100,6 +3109,9 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): relation = START for i in range(n): + if arr[i] == iNaT: + result[i] = iNaT + continue val = func(arr[i], relation, &finfo) if val == INT32_MIN: raise ValueError("Unable to convert to desired frequency.") @@ -3120,6 +3132,9 @@ cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq): date_info dinfo float subsecond_fraction + if ordinal == iNaT: + return NPY_NAT + get_date_info(ordinal, freq, &dinfo) dts.year = dinfo.year @@ -3138,6 +3153,9 @@ def period_format(int64_t value, int freq, object fmt=None): cdef: int freq_group + if value == iNaT: + return repr(NaT) + if fmt is None: freq_group = (freq // 1000) * 1000 if freq_group == 1000: # FR_ANN @@ -3241,6 +3259,8 @@ def get_period_field(int code, int64_t value, int freq): cdef accessor f = _get_accessor_func(code) if f is NULL: raise ValueError('Unrecognized period code: %d' % code) + if value == iNaT: + return -1 return f(value, freq) def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): @@ -3257,6 +3277,9 @@ def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): out = np.empty(sz, dtype=np.int64) for i in range(sz): + if arr[i] == iNaT: + out[i] = -1 + continue out[i] = f(arr[i], freq) return out