Skip to content

Commit 125c590

Browse files
committed
BUG: Parsing week-freq Periods
1 parent 47a45a8 commit 125c590

File tree

3 files changed

+67
-13
lines changed

3 files changed

+67
-13
lines changed

doc/source/whatsnew/v2.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -1032,6 +1032,8 @@ Period
10321032
- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, raising ``UnicodeDecodeError`` when a locale-specific directive was passed (:issue:`46319`)
10331033
- Bug in adding a :class:`Period` object to an array of :class:`DateOffset` objects incorrectly raising ``TypeError`` (:issue:`50162`)
10341034
- Bug in :class:`Period` where passing a string with finer resolution than nanosecond would result in a ``KeyError`` instead of dropping the extra precision (:issue:`50417`)
1035+
- Bug in parsing strings representing Week-periods e.g. "2017-01-23/2017-01-29" as minute-frequency instead of week-frequency (:issue:`??`)
1036+
-
10351037

10361038
Plotting
10371039
^^^^^^^^

pandas/_libs/tslibs/period.pyx

+53-13
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import re
2+
13
cimport numpy as cnp
24
from cpython.object cimport (
35
Py_EQ,
@@ -2591,20 +2593,30 @@ class Period(_Period):
25912593
value = value.upper()
25922594

25932595
freqstr = freq.rule_code if freq is not None else None
2594-
dt, reso = parse_datetime_string_with_reso(value, freqstr)
2595-
if reso == "nanosecond":
2596-
nanosecond = dt.nanosecond
2597-
if dt is NaT:
2598-
ordinal = NPY_NAT
25992596

2600-
if freq is None and ordinal != NPY_NAT:
2601-
# Skip NaT, since it doesn't have a resolution
2602-
try:
2603-
freq = attrname_to_abbrevs[reso]
2604-
except KeyError:
2605-
raise ValueError(f"Invalid frequency or could not "
2606-
f"infer: {reso}")
2607-
freq = to_offset(freq)
2597+
try:
2598+
dt, reso = parse_datetime_string_with_reso(value, freqstr)
2599+
except ValueError:
2600+
match = re.search(r"^\d{4}-\d{2}-\d{2}/\d{4}-\d{2}-\d{2}", value)
2601+
if not match:
2602+
raise
2603+
# Case that cannot be parsed (correctly) by our datetime
2604+
# parsing logic
2605+
dt, freq = parse_weekly_str(value, freq)
2606+
else:
2607+
if reso == "nanosecond":
2608+
nanosecond = dt.nanosecond
2609+
if dt is NaT:
2610+
ordinal = NPY_NAT
2611+
2612+
if freq is None and ordinal != NPY_NAT:
2613+
# Skip NaT, since it doesn't have a resolution
2614+
try:
2615+
freq = attrname_to_abbrevs[reso]
2616+
except KeyError:
2617+
raise ValueError(f"Invalid frequency or could not "
2618+
f"infer: {reso}")
2619+
freq = to_offset(freq)
26082620

26092621
elif PyDateTime_Check(value):
26102622
dt = value
@@ -2664,3 +2676,31 @@ def validate_end_alias(how: str) -> str: # Literal["E", "S"]
26642676
if how not in {"S", "E"}:
26652677
raise ValueError("How must be one of S or E")
26662678
return how
2679+
2680+
2681+
cdef parse_weekly_str(value, BaseOffset freq):
2682+
"""
2683+
Parse e.g. "2017-01-23/2017-01-29", which cannot be parsed by the general
2684+
datetime-parsing logic. This ensures that we can round-trip with
2685+
Period.__str__ with weekly freq.
2686+
"""
2687+
match = re.search(r"^\d{4}-\d{2}-\d{2}/\d{4}-\d{2}-\d{2}", value)
2688+
if not match:
2689+
raise ValueError("Could not parse as weekly-freq Period")
2690+
2691+
start, end = value.split("/")
2692+
start = Timestamp(start)
2693+
end = Timestamp(end)
2694+
2695+
if (end - start).days != 6:
2696+
# We are interested in cases where this is str(period)
2697+
# of a Week-freq period
2698+
raise ValueError("Could not parse as weekly-freq Period")
2699+
2700+
if freq is None:
2701+
day_name = end.day_name()[:3].upper()
2702+
freqstr = f"W-{day_name}"
2703+
freq = to_offset(freqstr)
2704+
# We _should_ have freq.is_on_offset(end)
2705+
2706+
return end, freq

pandas/tests/scalar/period/test_period.py

+12
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,18 @@ def test_period_cons_weekly(self, num, day):
399399
assert result == expected
400400
assert isinstance(result, Period)
401401

402+
def test_parse_week_str_roundstrip(self):
403+
per = Period("2017-01-23/2017-01-29")
404+
assert per.freq.freqstr == "W-SUN"
405+
406+
per = Period("2017-01-24/2017-01-30")
407+
assert per.freq.freqstr == "W-MON"
408+
409+
msg = "Could not parse as weekly-freq Period"
410+
with pytest.raises(ValueError, match=msg):
411+
# not 6 days apart
412+
Period("2016-01-23/2017-01-29")
413+
402414
def test_period_from_ordinal(self):
403415
p = Period("2011-01", freq="M")
404416
res = Period._from_ordinal(p.ordinal, freq="M")

0 commit comments

Comments
 (0)