Skip to content

Commit ef3ec1f

Browse files
committed
pythongh-124529: Fix _strptime to make %c/%y accept year with fewer digits
1 parent 6f4d64b commit ef3ec1f

File tree

3 files changed

+282
-8
lines changed

3 files changed

+282
-8
lines changed

Lib/_strptime.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,6 @@ def __init__(self, locale_time=None):
199199
'V': r"(?P<V>5[0-3]|0[1-9]|[1-4]\d|\d)",
200200
# W is set below by using 'U'
201201
'y': r"(?P<y>\d\d)",
202-
#XXX: Does 'Y' need to worry about having less or more than
203-
# 4 digits?
204202
'Y': r"(?P<Y>\d\d\d\d)",
205203
'z': r"(?P<z>[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|(?-i:Z))",
206204
'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
@@ -213,8 +211,10 @@ def __init__(self, locale_time=None):
213211
'Z'),
214212
'%': '%'})
215213
base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
216-
base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
217-
base.__setitem__('x', self.pattern(self.locale_time.LC_date))
214+
base.__setitem__(
215+
'c', self.__pattern_with_lax_year(self.locale_time.LC_date_time))
216+
base.__setitem__(
217+
'x', self.__pattern_with_lax_year(self.locale_time.LC_date))
218218
base.__setitem__('X', self.pattern(self.locale_time.LC_time))
219219

220220
def __seqToRE(self, to_convert, directive):
@@ -236,6 +236,26 @@ def __seqToRE(self, to_convert, directive):
236236
regex = '(?P<%s>%s' % (directive, regex)
237237
return '%s)' % regex
238238

239+
def __pattern_with_lax_year(self, format):
240+
"""Like pattern(), but making %Y and %y accept also fewer digits.
241+
242+
Necessary to ensure that strptime() is able to parse strftime()'s
243+
output when %c or %x is used -- considering that for some locales
244+
and platforms (e.g., 'C.UTF-8' on Linux), formatting with either
245+
%c or %x may produce a year number representation that is shorter
246+
than the usual four or two digits, if the number is small enough
247+
(e.g., '999' instead of `0999', or '9' instead of '09').
248+
249+
Note that this helper is not used to generate the regex patterns
250+
for %Y and %y (these two still match, respectively, only four or
251+
two digits, exactly).
252+
253+
"""
254+
pattern = self.pattern(format)
255+
pattern = pattern.replace(self['Y'], r"(?P<Y>\d{1,4})")
256+
pattern = pattern.replace(self['y'], r"(?P<y>\d{1,2})")
257+
return pattern
258+
239259
def pattern(self, format):
240260
"""Return regex pattern for the format string.
241261
@@ -374,6 +394,7 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
374394
# U, W
375395
# worthless without day of the week
376396
if group_key == 'y':
397+
# 1 or 2 digits (1 only for directive c or x; see TimeRE.__init__)
377398
year = int(found_dict['y'])
378399
# Open Group specification for strptime() states that a %y
379400
#value in the range of [00, 68] is in the century 2000, while
@@ -383,6 +404,7 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
383404
else:
384405
year += 1900
385406
elif group_key == 'Y':
407+
# 1-4 digits (1-3 only for directive c or x; see TimeRE.__init__)
386408
year = int(found_dict['Y'])
387409
elif group_key == 'G':
388410
iso_year = int(found_dict['G'])

Lib/test/datetimetester.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,6 +1185,40 @@ def test_strptime_leap_year(self):
11851185
date.strptime('20-03-14', '%y-%m-%d')
11861186
date.strptime('02-29,2024', '%m-%d,%Y')
11871187

1188+
def test_strftime_strptime_roundtrip(self):
1189+
for fmt in [
1190+
'%c',
1191+
'%x',
1192+
'%Y%m%d',
1193+
'm:%m d:%d y:%y H:%H M:%M S:%S f:%f and some text',
1194+
]:
1195+
with self.subTest(fmt=fmt):
1196+
sample = date(1999, 3, 17).strftime(fmt)
1197+
if '1999' in sample:
1198+
year_seq = [
1199+
1, 9, 10, 99, 100, 999, # <- gh-124529 (ad %c/%x)
1200+
1000, 1410, 1989, 2024, 2095, 9999]
1201+
elif '99' in sample:
1202+
year_seq = [
1203+
1969, 1999,
1204+
2000, 2001, 2009, # <- gh-124529 (ad %c/%x)
1205+
2068]
1206+
else:
1207+
self.skipTest(f"these subtests need locale for which "
1208+
f"{fmt!r} includes year in some variant")
1209+
for year in year_seq:
1210+
for instance in [
1211+
date(year, 1, 1),
1212+
date(year, 6, 4),
1213+
date(year, 12, 31),
1214+
]:
1215+
reason = (f'strftime/strptime roundtrip '
1216+
f'for {fmt=} and {year=}')
1217+
with self.subTest(reason=reason, instance=instance):
1218+
formatted = instance.strftime(fmt)
1219+
parsed = date.strptime(formatted, fmt)
1220+
self.assertEqual(parsed, instance, msg=reason)
1221+
11881222
class SubclassDate(date):
11891223
sub_var = 1
11901224

@@ -2124,6 +2158,35 @@ def test_fromisocalendar_type_errors(self):
21242158
with self.assertRaises(TypeError):
21252159
self.theclass.fromisocalendar(*isocal)
21262160

2161+
def test_strptime_accepting_year_with_fewer_digits(self): # gh-124529
2162+
concerned_formats = '%c', '%x'
2163+
2164+
def run_subtest():
2165+
reason = (f'strptime accepting year with fewer '
2166+
f'digits for {fmt=} and {input_string=}')
2167+
with self.subTest(reason=reason):
2168+
expected = prototype_inst.replace(year=year)
2169+
parsed = self.theclass.strptime(input_string, fmt)
2170+
self.assertEqual(parsed, expected, msg=reason)
2171+
2172+
prototype_inst = self.theclass.strptime('1999', '%Y')
2173+
for fmt in concerned_formats:
2174+
with self.subTest(fmt=fmt):
2175+
sample = prototype_inst.strftime(fmt)
2176+
if (sample_4digits := '1999') in sample:
2177+
for year in [1, 9, 10, 99, 100, 999]:
2178+
y_digits = str(year)
2179+
input_string = sample.replace(sample_4digits, y_digits)
2180+
run_subtest()
2181+
elif (sample_2digits := '99') in sample:
2182+
for year in [2000, 2001, 2009]:
2183+
y_digits = str(year - 2000)
2184+
input_string = sample.replace(sample_2digits, y_digits)
2185+
run_subtest()
2186+
else:
2187+
self.skipTest(f"these subtests need locale for which "
2188+
f"{fmt!r} includes year in some variant")
2189+
21272190

21282191
#############################################################################
21292192
# datetime tests
@@ -2955,6 +3018,48 @@ def test_more_strftime(self):
29553018
except UnicodeEncodeError:
29563019
pass
29573020

3021+
def test_strftime_strptime_roundtrip(self):
3022+
for tz in [
3023+
None,
3024+
UTC,
3025+
timezone(timedelta(hours=2)),
3026+
timezone(timedelta(hours=-7)),
3027+
]:
3028+
fmt_suffix = '' if tz is None else ' %z'
3029+
for fmt in [
3030+
'%c %f',
3031+
'%x %X %f',
3032+
'%Y%m%d%H%M%S%f',
3033+
'm:%m d:%d y:%y H:%H M:%M S:%S f:%f and some text',
3034+
]:
3035+
fmt += fmt_suffix
3036+
with self.subTest(fmt=fmt):
3037+
sample = self.theclass(1999, 3, 17, 0, 0).strftime(fmt)
3038+
if '1999' in sample:
3039+
year_seq = [
3040+
1, 9, 10, 99, 100, 999, # <- gh-124529 (ad %c/%x)
3041+
1000, 1410, 1989, 2024, 2095, 9999]
3042+
elif '99' in sample:
3043+
year_seq = [
3044+
1969, 1999,
3045+
2000, 2001, 2009, # <- gh-124529 (ad %c/%x)
3046+
2068]
3047+
else:
3048+
self.skipTest(f"these subtests need locale for which "
3049+
f"{fmt!r} includes year in some variant")
3050+
for year in year_seq:
3051+
for instance in [
3052+
self.theclass(year, 1, 1, 0, 0, 0, tzinfo=tz),
3053+
self.theclass(year, 6, 4, 1, 42, 7, 99, tzinfo=tz),
3054+
self.theclass(year, 12, 31, 23, 59, 59, tzinfo=tz),
3055+
]:
3056+
reason = (f'strftime/strptime roundtrip '
3057+
f'for {fmt=} and {year=}')
3058+
with self.subTest(reason=reason, instance=instance):
3059+
formatted = instance.strftime(fmt)
3060+
parsed = self.theclass.strptime(formatted, fmt)
3061+
self.assertEqual(parsed, instance, msg=reason)
3062+
29583063
def test_extract(self):
29593064
dt = self.theclass(2002, 3, 4, 18, 45, 3, 1234)
29603065
self.assertEqual(dt.date(), date(2002, 3, 4))
@@ -3901,6 +4006,32 @@ def test_strptime_single_digit(self):
39014006
newdate = self.theclass.strptime(string, format)
39024007
self.assertEqual(newdate, target, msg=reason)
39034008

4009+
def test_strftime_strptime_roundtrip(self):
4010+
for tz in [
4011+
None,
4012+
UTC,
4013+
timezone(timedelta(hours=2)),
4014+
timezone(timedelta(hours=-7)),
4015+
]:
4016+
fmt_suffix = '' if tz is None else ' %z'
4017+
for fmt in [
4018+
'%c %f',
4019+
'%X %f',
4020+
'%H%M%S%f',
4021+
'm:%m d:%d y:%y H:%H M:%M S:%S f:%f and some text',
4022+
]:
4023+
fmt += fmt_suffix
4024+
for instance in [
4025+
self.theclass(0, 0, 0, tzinfo=tz),
4026+
self.theclass(1, 42, 7, tzinfo=tz),
4027+
self.theclass(23, 59, 59, 654321, tzinfo=tz),
4028+
]:
4029+
reason = f'strftime/strptime round trip for {fmt=}'
4030+
with self.subTest(reason=reason, instance=instance):
4031+
formatted = instance.strftime(fmt)
4032+
parsed = self.theclass.strptime(formatted, fmt)
4033+
self.assertEqual(parsed, instance, msg=reason)
4034+
39044035
def test_bool(self):
39054036
# time is always True.
39064037
cls = self.theclass

Lib/test/test_strptime.py

Lines changed: 125 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,11 +161,42 @@ def test_compile(self):
161161
for directive in ('a','A','b','B','c','d','G','H','I','j','m','M','p',
162162
'S','u','U','V','w','W','x','X','y','Y','Z','%'):
163163
fmt = "%d %Y" if directive == 'd' else "%" + directive
164+
input_string = time.strftime(fmt)
164165
compiled = self.time_re.compile(fmt)
165-
found = compiled.match(time.strftime(fmt))
166-
self.assertTrue(found, "Matching failed on '%s' using '%s' regex" %
167-
(time.strftime(fmt),
168-
compiled.pattern))
166+
found = compiled.match(input_string)
167+
self.assertTrue(found,
168+
(f"Matching failed on '{input_string}' "
169+
f"using '{compiled.pattern}' regex"))
170+
for directive in ('c', 'x'):
171+
fmt = "%" + directive
172+
with self.subTest(f"{fmt!r} should match input containing "
173+
f"year with fewer digits than usual"):
174+
# gh-124529
175+
params = _input_str_and_expected_year_for_few_digits_year(fmt)
176+
if params is None:
177+
self.skipTest(f"this subtest needs locale for which "
178+
f"{fmt!r} includes year in some variant")
179+
input_string, _ = params
180+
compiled = self.time_re.compile(fmt)
181+
found = compiled.match(input_string)
182+
self.assertTrue(found,
183+
(f"Matching failed on '{input_string}' "
184+
f"using '{compiled.pattern}' regex"))
185+
for directive in ('y', 'Y'):
186+
fmt = "%" + directive
187+
with self.subTest(f"{fmt!r} should not match input containing "
188+
f"year with fewer digits than usual"):
189+
params = _input_str_and_expected_year_for_few_digits_year(fmt)
190+
if params is None:
191+
self.skipTest(f"this subtest needs locale for which "
192+
f"{fmt!r} includes year in some variant")
193+
input_string, _ = params
194+
compiled = self.time_re.compile(fmt)
195+
found = compiled.match(input_string)
196+
self.assertFalse(found,
197+
(f"Matching unexpectedly succeeded "
198+
f"on '{input_string}' using "
199+
f"'{compiled.pattern}' regex"))
169200

170201
def test_blankpattern(self):
171202
# Make sure when tuple or something has no values no regex is generated.
@@ -299,6 +330,25 @@ def helper(self, directive, position):
299330
(directive, strf_output, strp_output[position],
300331
self.time_tuple[position]))
301332

333+
def helper_for_directives_accepting_few_digits_year(self, directive):
334+
fmt = "%" + directive
335+
params = _input_str_and_expected_year_for_few_digits_year(fmt)
336+
if params is None:
337+
self.skipTest(f"test needs locale for which {fmt!r} "
338+
f"includes year in some variant")
339+
input_string, expected_year = params
340+
try:
341+
output_year = _strptime._strptime(input_string, fmt)[0][0]
342+
except ValueError as exc:
343+
# See: gh-124529
344+
self.fail(f"testing of {directive!r} directive failed; "
345+
f"{input_string!r} -> exception: {exc!r}")
346+
else:
347+
self.assertEqual(output_year, expected_year,
348+
(f"testing of {directive!r} directive failed; "
349+
f"{input_string!r} -> output including year "
350+
f"{output_year!r} != {expected_year!r}"))
351+
302352
def test_year(self):
303353
# Test that the year is handled properly
304354
for directive in ('y', 'Y'):
@@ -312,6 +362,17 @@ def test_year(self):
312362
"'y' test failed; passed in '%s' "
313363
"and returned '%s'" % (bound, strp_output[0]))
314364

365+
def test_bad_year(self):
366+
for directive, bad_inputs in (
367+
('y', ('9', '100', 'ni')),
368+
('Y', ('7', '42', '999', '10000', 'SPAM')),
369+
):
370+
fmt = "%" + directive
371+
for input_val in bad_inputs:
372+
with self.subTest(directive=directive, input_val=input_val):
373+
with self.assertRaises(ValueError):
374+
_strptime._strptime_time(input_val, fmt)
375+
315376
def test_month(self):
316377
# Test for month directives
317378
for directive in ('B', 'b', 'm'):
@@ -454,11 +515,21 @@ def test_date_time(self):
454515
for position in range(6):
455516
self.helper('c', position)
456517

518+
def test_date_time_accepting_few_digits_year(self): # gh-124529
519+
# Test %c directive with input containing year
520+
# number consisting of fewer digits than usual
521+
self.helper_for_directives_accepting_few_digits_year('c')
522+
457523
def test_date(self):
458524
# Test %x directive
459525
for position in range(0,3):
460526
self.helper('x', position)
461527

528+
def test_date_accepting_few_digits_year(self): # gh-124529
529+
# Test %x directive with input containing year
530+
# number consisting of fewer digits than usual
531+
self.helper_for_directives_accepting_few_digits_year('x')
532+
462533
def test_time(self):
463534
# Test %X directive
464535
for position in range(3,6):
@@ -769,5 +840,55 @@ def test_TimeRE_recreation_timezone(self):
769840
_strptime._strptime_time(oldtzname[1], '%Z')
770841

771842

843+
def _input_str_and_expected_year_for_few_digits_year(fmt):
844+
# This helper, for the given format string (fmt), returns a 2-tuple:
845+
# (<strptime input string>, <expected year>)
846+
# where:
847+
# * <strptime input string> -- is a `strftime(fmt)`-result-like str
848+
# containing a year number which is *shorter* than the usual four
849+
# or two digits (namely: the contained year number consist of just
850+
# one digit: 7; the choice of this particular digit is arbitrary);
851+
# * <expected year> -- is an int representing the year number that
852+
# is expected to be part of the result of a `strptime(<strptime
853+
# input string>, fmt)` call (namely: either 7 or 2007, depending
854+
# on the given format string and current locale...); however, it
855+
# is None if <strptime input string> does *not* contain the year
856+
# part (for the given format string and current locale).
857+
858+
# 1. Prepare auxiliary *magic* time data (note that the magic values
859+
# we use here are guaranteed to be compatible with `time.strftime()`
860+
# and also well distinguishable within a formatted string, thanks to
861+
# the fact that the amount of overloaded numbers is minimized, as in
862+
# `_strptime.LocaleTime.__calc_date_time()`...):
863+
magic_year = 1999
864+
magic_tt = (magic_year, 3, 17, 22, 44, 55, 2, 76, 0)
865+
magic_4digits = str(magic_year)
866+
magic_2digits = magic_4digits[-2:]
867+
868+
# 2. Pick our example year whose representation
869+
# is shorter than the usual four or two digits:
870+
input_year_str = '7'
871+
872+
# 3. Determine the <strptime input string> part of the return value:
873+
input_string = time.strftime(fmt, magic_tt)
874+
if (index_4digits := input_string.find(magic_4digits)) != -1:
875+
# `input_string` contains up-to-4-digit year representation
876+
input_string = input_string.replace(magic_4digits, input_year_str)
877+
if (index_2digits := input_string.find(magic_2digits)) != -1:
878+
# `input_string` contains up-to-2-digit year representation
879+
input_string = input_string.replace(magic_2digits, input_year_str)
880+
881+
# 4. Determine the <expected year> part of the return value:
882+
if index_4digits > index_2digits:
883+
expected_year = int(input_year_str)
884+
elif index_4digits < index_2digits:
885+
expected_year = 2000 + int(input_year_str)
886+
else:
887+
assert index_4digits == index_2digits == -1
888+
expected_year = None
889+
890+
return input_string, expected_year
891+
892+
772893
if __name__ == '__main__':
773894
unittest.main()

0 commit comments

Comments
 (0)