diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index b29c841896072..bd1256647dda1 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -1,5 +1,3 @@ -import re - cimport numpy as cnp cnp.import_array() @@ -7,14 +5,6 @@ from pandas._libs.tslibs.util cimport is_integer_object from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS -# ---------------------------------------------------------------------- -# Constants - -# hack to handle WOM-1MON -opattern = re.compile( - r'([+\-]?\d*|[+\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)' -) - INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}" # --------------------------------------------------------------------- @@ -194,20 +184,19 @@ cpdef _base_and_stride(str freqstr): -------- _freq_and_stride('5Min') -> 'Min', 5 """ - groups = opattern.match(freqstr) + base = freqstr.lstrip('+-. 0123456789') + stride = freqstr[:freqstr.index(base)] - if not groups: + if not base: raise ValueError("Could not evaluate {freq}".format(freq=freqstr)) - stride = groups.group(1) - + # Possible for stride to be float at this point. Should it fail or floor? + # Right now it fails. if len(stride): stride = int(stride) else: stride = 1 - base = groups.group(2) - return base, stride diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index ad7c32ca31940..38ae9be10dd18 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -24,7 +24,7 @@ from pandas._libs.tslibs.util cimport ( from pandas._libs.tslibs.c_timestamp cimport _Timestamp -from pandas._libs.tslibs.ccalendar import DAY_SECONDS +from pandas._libs.tslibs.frequencies import _base_and_stride from pandas._libs.tslibs.np_datetime cimport ( cmp_scalar, reverse_ops, td64_to_tdstruct, pandas_timedeltastruct) @@ -235,7 +235,7 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): else: result[i] = parse_timedelta_string(values[i]) except (TypeError, ValueError): - unit = parse_timedelta_unit(unit) + unit, stride = _base_and_stride(unit) for i in range(n): try: result[i] = convert_to_timedelta64(values[i], unit) @@ -257,36 +257,52 @@ cpdef inline object precision_from_unit(object unit): int64_t m int p - if unit == 'Y': - m = 1000000000L * 31556952 - p = 9 - elif unit == 'M': - m = 1000000000L * 2629746 - p = 9 - elif unit == 'W': - m = 1000000000L * DAY_SECONDS * 7 - p = 9 - elif unit == 'D' or unit == 'd': - m = 1000000000L * DAY_SECONDS + if unit is None: + m = 1L + p = 0 + return m, p + + unit, stride = _base_and_stride(unit) + + if unit in ['Y', 'A', 'M', 'W']: + warnings.warn("Y, A, M, and W units are deprecated and " + "will be removed in a future version.", + FutureWarning) + + # Don't know why previous implementation used the multiplication in the + # in-line comment instead of the value. Current approach saves an + # operation. + if unit in ['D', 'd']: + m = stride * 86400000000000L # 1000000000L * DAY_SECONDS p = 9 - elif unit == 'h': - m = 1000000000L * 3600 + elif unit in ['H', 'h']: + m = stride * 3600000000000L # 1000000000L * 3600 p = 9 - elif unit == 'm': - m = 1000000000L * 60 + elif unit in ['T', 'min', 'm']: + m = stride * 60000000000L # 1000000000L * 60 p = 9 - elif unit == 's': - m = 1000000000L + elif unit in ['S', 's']: + m = stride * 1000000000L p = 9 - elif unit == 'ms': - m = 1000000L + elif unit in ['L', 'ms']: + m = stride * 1000000L p = 6 - elif unit == 'us': - m = 1000L + elif unit in ['U', 'us']: + m = stride * 1000L p = 3 - elif unit == 'ns' or unit is None: - m = 1L + elif unit in ['N', 'ns']: + m = stride * 1L p = 0 + # deprecated units at end because rarely evaluated + elif unit in ['Y', 'A']: + m = stride * 1000000000L * 31556952 + p = 9 + elif unit == 'M': + m = stride * 1000000000L * 2629746 + p = 9 + elif unit == 'W': + m = stride * 1000000000L * 86400 * 7 + p = 9 else: raise ValueError("cannot cast unit {unit}".format(unit=unit)) return m, p @@ -305,7 +321,7 @@ cdef inline int64_t cast_from_unit(object ts, object unit) except? -1: if ts is None: return m - # cast the unit, multiply base/frace separately + # cast the unit, multiply base/frac separately # to avoid precision issues from float -> int base = ts frac = ts - base @@ -505,8 +521,9 @@ cdef inline timedelta_from_spec(object number, object frac, object unit): try: unit = ''.join(unit) + unit, stride = _base_and_stride(unit) if unit == 'M': - # To parse ISO 8601 string, 'M' should be treated as minute, + # To parse ISO 8601 string, 'm' should be treated as minute, # not month unit = 'm' unit = parse_timedelta_unit(unit) @@ -1255,6 +1272,9 @@ class Timedelta(_Timedelta): "[weeks, days, hours, minutes, seconds, " "milliseconds, microseconds, nanoseconds]") + if unit is not None: + unit, stride = _base_and_stride(unit) + if unit in {'Y', 'y', 'M'}: warnings.warn("M and Y units are deprecated and " "will be removed in a future version.", diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index b07647cf5b5fb..ce2199ddd17bd 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -7,6 +7,7 @@ from pandas._libs import tslib, tslibs from pandas._libs.tslibs import Timestamp, conversion, parsing +from pandas._libs.tslibs.frequencies import _base_and_stride from pandas._libs.tslibs.parsing import ( # noqa DateParseError, _format_is_iso, @@ -378,7 +379,6 @@ def _convert_listlike_datetimes( arg = ensure_object(arg) require_iso8601 = False - if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) @@ -490,15 +490,18 @@ def _adjust_to_origin(arg, origin, unit): origin : 'julian' or Timestamp origin offset for the arg unit : string - passed unit from to_datetime, must be 'D' + passed unit from to_datetime, must be 'D' if origin is 'julian' Returns ------- ndarray or scalar of adjusted date(s) """ + from pandas import DatetimeIndex + if origin == "julian": original = arg j0 = Timestamp(0).to_julian_date() + unit, stride = _base_and_stride(unit) if unit != "D": raise ValueError("unit must be 'D' for origin='julian'") try: @@ -527,7 +530,8 @@ def _adjust_to_origin(arg, origin, unit): ) ) - # we are going to offset back to unix / epoch time + # test the origin to make sure within valid range and no time + # zone try: offset = Timestamp(origin) except tslibs.OutOfBoundsDatetime: @@ -542,18 +546,28 @@ def _adjust_to_origin(arg, origin, unit): if offset.tz is not None: raise ValueError("origin offset {} must be tz-naive".format(offset)) - offset -= Timestamp(0) - # convert the offset to the unit of the arg - # this should be lossless in terms of precision - offset = offset // tslibs.Timedelta(1, unit=unit) + unit, stride = _base_and_stride(unit) + + delta = tslibs.Timedelta(stride, unit=unit) - # scalars & ndarray-like can handle the addition + # scalars & ndarray-like can handle the multiplication and addition if is_list_like(arg) and not isinstance( arg, (ABCSeries, ABCIndexClass, np.ndarray) ): arg = np.asarray(arg) - arg = arg + offset + + if stride == 1 and (offset - offset.floor("D")) == tslibs.Timedelta(0): + arg = arg + (offset.value // delta.value) + else: + # convert any integer type to int64 to prevent overflow + if is_integer_dtype(arg): + arg = arg.astype("int64", copy=False) + try: + arg = DatetimeIndex((arg * delta.value) + offset.value) + except TypeError: + arg = Timestamp((arg * delta.value) + offset.value) + return arg @@ -627,11 +641,27 @@ def to_datetime( - If True, require an exact format match. - If False, allow the format to match anywhere in the target string. - unit : string, default 'ns' - unit of the arg (D,s,ms,us,ns) denote the unit, which is an - integer or float number. This will be based off the origin. - Example, with unit='ms' and origin='unix' (the default), this - would calculate the number of milliseconds to the unix epoch start. + unit : string, default is 'N' + The unit code for the value(s) in `arg`. Used when `arg` is + a numeric value or ordered collection of numeric values. + The unit code is a subset of pandas offset aliases, ISO 8601 + codes, and legacy codes. + + - 'D', for daily + - 'H' or 'h' for hourly + - 'T', 'm', or 'min' for minutely + - 'S' or 's' for seconds + - 'L' or 'ms' for milliseconds + - 'U' or 'us' for microseconds + - 'N' or 'ns' for nanoseconds + + The resulting DatetimeIndex will be based off the `origin`. + For example, with unit='L' and origin='unix' (the default) then + the values in `arg` would represent the number of milliseconds + from the unix epoch start. + + The unit code can be prefixed with a stride. For example, + results would be equivalent between unit='24H' and unit='D'. infer_datetime_format : boolean, default False If True and no `format` is given, attempt to infer the format of the datetime strings, and if it can be inferred, switch to a faster @@ -885,7 +915,7 @@ def coerce(values): # we allow coercion to if errors allows values = to_numeric(values, errors=errors) - # prevent overflow in case of int8 or int16 + # convert any integer type to int64 to prevent overflow if is_integer_dtype(values): values = values.astype("int64", copy=False) return values diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 9af0f47f6dce9..55b80ba6a0783 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1358,6 +1358,98 @@ def test_to_datetime_errors_ignore_utc_true(self): expected = DatetimeIndex(["1970-01-01 00:00:01"], tz="UTC") tm.assert_index_equal(result, expected) + @pytest.mark.parametrize( + "unitl,unitr", + [ + ("D", "24h"), + ("2D", "48H"), + ("3D", "72H"), + ("4D", "96H"), + ("7D", "168H"), + ("86400000000000N", "D"), + ("60T", "H"), + ("120m", "2H"), + ("60min", "H"), + ("3600S", "h"), + ("3600000ms", "H"), + ("3600000L", "H"), + ("3600000000U", "H"), + ("60s", "m"), + ("60S", "T"), + ("60S", "min"), + ("1000ms", "S"), + ("1000L", "S"), + ("1000000U", "S"), + ("1000000000N", "S"), + ], + ) + def test_to_datetime_stride(self, unitl, unitr): + result = pd.to_datetime([1, 2, 3, 5], unit=unitl) + expected = pd.to_datetime([1, 2, 3, 5], unit=unitr) + tm.assert_index_equal(result, expected) + + result = pd.to_datetime(2, unit=unitl) + expected = pd.to_datetime(2, unit=unitr) + assert result == expected + + # Can't use 'm' for minutes and 'M' for months in the following test + # since tested against pd.date_range which sees both 'm' and 'M' as + # months. + @pytest.mark.parametrize( + "unit,epoch", + [ + ("D", "1980-01-02"), + ("D", "2018-05-18"), + ("D", "2018-05-18T11"), + ("D", "2018-05-18T11:04"), + ("D", "2018-05-18T11:04:52"), + ("2D", "1970-01-01"), + ("2D", "1970-01-01T21:12:43"), + ("2D", "2019-05-03"), + ("2D", "2019-05-03T12:11"), + ("3D", "1970-01-01"), + ("3D", "2019-05-03T14"), + ("4D", "1970-05-03"), + ("4D", "2019-05-03T11"), + ("5D", "2019-05-03T11"), + ("6D", "2019-05-03T11"), + ("7D", "2019-05-03T11"), + ("14D", "2019-05-03T11"), + ("H", "2018-05-18"), + ("H", "2018-05-18T11"), + ("H", "2018-05-18T11:04"), + ("H", "2018-05-18T11:04:52"), + ("12h", "1990-05-03T12:00:00"), + ("24H", "1980-12-31"), + ("48h", "1980-12-31"), + ("96h", "1980-12-31"), + ("2H", "2019-12-31T11:59"), + ("24h", "2001-08-15"), + ("5T", "2001-08-15"), + ("5min", "2001-08-15"), + ("10T", "2001-08-15"), + ("5S", "1970-01-01T01:10"), + ("60S", "1970-01-01T01:10:12"), + ("5T", "1980-12-31"), + ("1000T", "1980-12-31"), + ("100N", "1980-12-31"), + ("N", "1980-12-31"), + ], + ) + def test_to_datetime_stride_epoch(self, unit, epoch): + result = pd.to_datetime(list(range(100)), unit=unit, origin=epoch) + expected = pd.date_range(start=epoch, freq=unit, periods=100) + tm.assert_index_equal(result, expected) + + result = pd.to_datetime(2, unit=unit, origin=epoch) + expected = pd.date_range(start=epoch, freq=unit, periods=100)[2] + assert result == expected + + @pytest.mark.parametrize("unit", ["Y", "A", "M", "W"]) + def test_to_datetime_unit_code_deprecated(self, unit): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + _ = pd.to_datetime(list(range(100)), unit=unit, origin="unix") + class TestToDatetimeMisc: def test_to_datetime_barely_out_of_bounds(self): diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index 4544657f79af7..ff13208e4cfb1 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -61,7 +61,7 @@ def test_tdi_shift_minutes(self): def test_tdi_shift_int(self): # GH#8083 - trange = pd.to_timedelta(range(5), unit="d") + pd.offsets.Hour(1) + trange = pd.to_timedelta(range(5), unit="D") + pd.offsets.Hour(1) result = trange.shift(1) expected = TimedeltaIndex( [ @@ -77,7 +77,7 @@ def test_tdi_shift_int(self): def test_tdi_shift_nonstandard_freq(self): # GH#8083 - trange = pd.to_timedelta(range(5), unit="d") + pd.offsets.Hour(1) + trange = pd.to_timedelta(range(5), unit="D") + pd.offsets.Hour(1) result = trange.shift(3, freq="2D 1s") expected = TimedeltaIndex( [ diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 2e00d558958e1..b76bbe374af4d 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -182,7 +182,7 @@ def test_constructor_coverage(self): # NumPy string array strings = np.array(["1 days", "2 days", "3 days"]) result = TimedeltaIndex(strings) - expected = to_timedelta([1, 2, 3], unit="d") + expected = to_timedelta([1, 2, 3], unit="D") tm.assert_index_equal(result, expected) from_ints = TimedeltaIndex(expected.asi8) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index e790a913fcac2..9a7a70d99957d 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -35,7 +35,7 @@ def setup_method(self, method): self.setup_indices() def create_index(self): - return pd.to_timedelta(range(5), unit="d") + pd.offsets.Hour(1) + return pd.to_timedelta(range(5), unit="D") + pd.offsets.Hour(1) def test_numeric_compat(self): # Dummy method to override super's version; this test is now done diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index e37561c865c7a..7b9231ce1809a 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -23,9 +23,9 @@ def setup_method(self, datapath): fname = os.path.join(self.dirpath, "test_sas7bdat_{j}.csv".format(j=j)) df = pd.read_csv(fname) epoch = pd.datetime(1960, 1, 1) - t1 = pd.to_timedelta(df["Column4"], unit="d") + t1 = pd.to_timedelta(df["Column4"], unit="D") df["Column4"] = epoch + t1 - t2 = pd.to_timedelta(df["Column12"], unit="d") + t2 = pd.to_timedelta(df["Column12"], unit="D") df["Column12"] = epoch + t2 for k in range(df.shape[1]): col = df.iloc[:, k] diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 52f32d41a02ff..82982c9d68268 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -68,7 +68,7 @@ def test_td_add_sub_one_day_ten_seconds(self, one_day_ten_secs): @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_datetimelike_scalar(self, op): # GH#19738 - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") result = op(td, datetime(2016, 1, 1)) if op is operator.add: @@ -90,7 +90,7 @@ def test_td_add_datetimelike_scalar(self, op): @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_td(self, op): - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") result = op(td, Timedelta(days=10)) assert isinstance(result, Timedelta) @@ -98,35 +98,35 @@ def test_td_add_td(self, op): @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_pytimedelta(self, op): - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") result = op(td, timedelta(days=9)) assert isinstance(result, Timedelta) assert result == Timedelta(days=19) @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_timedelta64(self, op): - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") result = op(td, np.timedelta64(-4, "D")) assert isinstance(result, Timedelta) assert result == Timedelta(days=6) @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_offset(self, op): - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") result = op(td, pd.offsets.Hour(6)) assert isinstance(result, Timedelta) assert result == Timedelta(days=10, hours=6) def test_td_sub_td(self): - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") expected = Timedelta(0, unit="ns") result = td - td assert isinstance(result, Timedelta) assert result == expected def test_td_sub_pytimedelta(self): - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") expected = Timedelta(0, unit="ns") result = td - td.to_pytimedelta() @@ -138,7 +138,7 @@ def test_td_sub_pytimedelta(self): assert result == expected def test_td_sub_timedelta64(self): - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") expected = Timedelta(0, unit="ns") result = td - td.to_timedelta64() @@ -151,12 +151,12 @@ def test_td_sub_timedelta64(self): def test_td_sub_nat(self): # In this context pd.NaT is treated as timedelta-like - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") result = td - NaT assert result is NaT def test_td_sub_td64_nat(self): - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") td_nat = np.timedelta64("NaT") result = td - td_nat @@ -166,13 +166,13 @@ def test_td_sub_td64_nat(self): assert result is NaT def test_td_sub_offset(self): - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") result = td - pd.offsets.Hour(1) assert isinstance(result, Timedelta) assert result == Timedelta(239, unit="h") def test_td_add_sub_numeric_raises(self): - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") for other in [2, 2.0, np.int64(2), np.float64(2)]: with pytest.raises(TypeError): td + other @@ -184,7 +184,7 @@ def test_td_add_sub_numeric_raises(self): other - td def test_td_rsub_nat(self): - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") result = NaT - td assert result is NaT @@ -192,7 +192,7 @@ def test_td_rsub_nat(self): assert result is NaT def test_td_rsub_offset(self): - result = pd.offsets.Hour(1) - Timedelta(10, unit="d") + result = pd.offsets.Hour(1) - Timedelta(10, unit="D") assert isinstance(result, Timedelta) assert result == Timedelta(-239, unit="h") @@ -263,7 +263,7 @@ class TestTimedeltaMultiplicationDivision: @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) def test_td_mul_nat(self, op, td_nat): # GH#19819 - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") with pytest.raises(TypeError): op(td, td_nat) @@ -271,7 +271,7 @@ def test_td_mul_nat(self, op, td_nat): @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) def test_td_mul_nan(self, op, nan): # np.float64('NaN') has a 'dtype' attr, avoid treating as array - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") result = op(td, nan) assert result is NaT @@ -304,7 +304,7 @@ def test_td_mul_scalar(self, op): def test_td_div_timedeltalike_scalar(self): # GH#19738 - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") result = td / pd.offsets.Hour(1) assert result == 240 @@ -316,7 +316,7 @@ def test_td_div_timedeltalike_scalar(self): def test_td_div_numeric_scalar(self): # GH#19738 - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") result = td / 2 assert isinstance(result, Timedelta) @@ -329,7 +329,7 @@ def test_td_div_numeric_scalar(self): @pytest.mark.parametrize("nan", [np.nan, np.float64("NaN"), float("nan")]) def test_td_div_nan(self, nan): # np.float64('NaN') has a 'dtype' attr, avoid treating as array - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") result = td / nan assert result is NaT @@ -341,7 +341,7 @@ def test_td_div_nan(self, nan): def test_td_rdiv_timedeltalike_scalar(self): # GH#19738 - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") result = pd.offsets.Hour(1) / td assert result == 1 / 240.0 diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index ae1e84576c092..e373e83f0e93d 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -8,8 +8,8 @@ def test_construction(): expected = np.timedelta64(10, "D").astype("m8[ns]").view("i8") - assert Timedelta(10, unit="d").value == expected - assert Timedelta(10.0, unit="d").value == expected + assert Timedelta(10, unit="D").value == expected + assert Timedelta(10.0, unit="D").value == expected assert Timedelta("10 days").value == expected assert Timedelta(days=10).value == expected assert Timedelta(days=10.0).value == expected diff --git a/pandas/tests/scalar/timedelta/test_formats.py b/pandas/tests/scalar/timedelta/test_formats.py index 753186ee4b738..0130ff977390d 100644 --- a/pandas/tests/scalar/timedelta/test_formats.py +++ b/pandas/tests/scalar/timedelta/test_formats.py @@ -6,7 +6,7 @@ @pytest.mark.parametrize( "td, expected_repr", [ - (Timedelta(10, unit="d"), "Timedelta('10 days 00:00:00')"), + (Timedelta(10, unit="D"), "Timedelta('10 days 00:00:00')"), (Timedelta(10, unit="s"), "Timedelta('0 days 00:00:10')"), (Timedelta(10, unit="ms"), "Timedelta('0 days 00:00:00.010000')"), (Timedelta(-10, unit="ms"), "Timedelta('-1 days +23:59:59.990000')"), diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index e4980be49d35f..e0a0fc0225085 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -62,17 +62,17 @@ class Other: assert td.__floordiv__(other) is NotImplemented def test_unary_ops(self): - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") # __neg__, __pos__ - assert -td == Timedelta(-10, unit="d") - assert -td == Timedelta("-10d") - assert +td == Timedelta(10, unit="d") + assert -td == Timedelta(-10, unit="D") + assert -td == Timedelta("-10D") + assert +td == Timedelta(10, unit="D") # __abs__, __abs__(__neg__) assert abs(td) == td assert abs(-td) == td - assert abs(-td) == Timedelta("10d") + assert abs(-td) == Timedelta("10D") class TestTimedeltaComparison: @@ -198,7 +198,7 @@ def test_total_seconds_scalar(self): def test_conversion(self): - for td in [Timedelta(10, unit="d"), Timedelta("1 days, 10:11:12.012345")]: + for td in [Timedelta(10, unit="D"), Timedelta("1 days, 10:11:12.012345")]: pydt = td.to_pytimedelta() assert td == Timedelta(pydt) assert td == pydt @@ -454,7 +454,7 @@ def test_numeric_conversions(self): assert Timedelta(10, unit="us") == np.timedelta64(10, "us") assert Timedelta(10, unit="ms") == np.timedelta64(10, "ms") assert Timedelta(10, unit="s") == np.timedelta64(10, "s") - assert Timedelta(10, unit="d") == np.timedelta64(10, "D") + assert Timedelta(10, unit="D") == np.timedelta64(10, "D") def test_timedelta_conversions(self): assert Timedelta(timedelta(seconds=1)) == np.timedelta64(1, "s").astype( @@ -574,7 +574,7 @@ def test_round(self): def test_contains(self): # Checking for any NaT-like objects # GH 13603 - td = to_timedelta(range(5), unit="d") + pd.offsets.Hour(1) + td = to_timedelta(range(5), unit="D") + pd.offsets.Hour(1) for v in [pd.NaT, None, float("nan"), np.nan]: assert not (v in td) @@ -584,7 +584,7 @@ def test_contains(self): def test_identity(self): - td = Timedelta(10, unit="d") + td = Timedelta(10, unit="D") assert isinstance(td, Timedelta) assert isinstance(td, timedelta) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 1ddaa4692d741..0e96dac3e7edf 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -780,7 +780,7 @@ def test_isin_with_i8(self): assert_series_equal(result, expected) # timedelta64[ns] - s = Series(pd.to_timedelta(range(5), unit="d")) + s = Series(pd.to_timedelta(range(5), unit="D")) result = s.isin(s[0:2]) assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 9be79bf93ece7..f313dab4c8bd5 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -165,7 +165,7 @@ def test_astype_str_cast(self): expected = Series([str("2010-01-04 00:00:00-05:00")]) tm.assert_series_equal(s, expected) - td = Series([Timedelta(1, unit="d")]) + td = Series([Timedelta(1, unit="D")]) s = td.astype(str) expected = Series([str("1 days 00:00:00.000000000")]) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index dfe91b514bbe1..92ce4e654c38b 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -144,8 +144,13 @@ def to_offset(freq): else: delta = None stride_sign = None + + # hack to handle WOM-1MON + opattern = re.compile( + r"([+\-]?\d*|[+\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)" + ) try: - splitted = re.split(libfreqs.opattern, freq) + splitted = re.split(opattern, freq) if splitted[-1] != "" and not splitted[-1].isspace(): # the last element must be blank raise ValueError("last element must be blank")