Skip to content

Commit a0eb809

Browse files
Issue #13169: The maximal repetition number in a regular expression has been
increased from 65534 to 2147483647 (on 32-bit platform) or 4294967294 (on 64-bit).
2 parents 293ab97 + 70ca021 commit a0eb809

File tree

7 files changed

+62
-13
lines changed

7 files changed

+62
-13
lines changed

Lib/sre_compile.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import _sre, sys
1414
import sre_parse
1515
from sre_constants import *
16+
from _sre import MAXREPEAT
1617

1718
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
1819

Lib/sre_constants.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@
1515

1616
MAGIC = 20031017
1717

18-
# max code word in this release
19-
20-
MAXREPEAT = 65535
21-
2218
# SRE standard exception (access as sre.error)
2319
# should this really be here?
2420

Lib/sre_parse.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import sys
1616

1717
from sre_constants import *
18+
from _sre import MAXREPEAT
1819

1920
SPECIAL_CHARS = ".\\[{()*+?^$|"
2021
REPEAT_CHARS = "*+?{"
@@ -537,10 +538,14 @@ def _parse(source, state):
537538
continue
538539
if lo:
539540
min = int(lo)
541+
if min >= MAXREPEAT:
542+
raise OverflowError("the repetition number is too large")
540543
if hi:
541544
max = int(hi)
542-
if max < min:
543-
raise error("bad repeat interval")
545+
if max >= MAXREPEAT:
546+
raise OverflowError("the repetition number is too large")
547+
if max < min:
548+
raise error("bad repeat interval")
544549
else:
545550
raise error("not supported")
546551
# figure out which item to repeat

Lib/test/test_re.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G
1+
from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
2+
cpython_only
23
import io
34
import re
45
from re import Scanner
@@ -980,6 +981,37 @@ def test_bug_16688(self):
980981
self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
981982
self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
982983

984+
def test_repeat_minmax_overflow(self):
985+
# Issue #13169
986+
string = "x" * 100000
987+
self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
988+
self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
989+
self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
990+
self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
991+
self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
992+
self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
993+
# 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
994+
self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
995+
self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
996+
self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
997+
self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
998+
999+
@cpython_only
1000+
def test_repeat_minmax_overflow_maxrepeat(self):
1001+
try:
1002+
from _sre import MAXREPEAT
1003+
except ImportError:
1004+
self.skipTest('requires _sre.MAXREPEAT constant')
1005+
string = "x" * 100000
1006+
self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
1007+
self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
1008+
(0, 100000))
1009+
self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
1010+
self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
1011+
self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
1012+
self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
1013+
1014+
9831015
def run_re_tests():
9841016
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
9851017
if verbose:

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,10 @@ Core and Builtins
178178
Library
179179
-------
180180

181+
- Issue #13169: The maximal repetition number in a regular expression has been
182+
increased from 65534 to 2147483647 (on 32-bit platform) or 4294967294 (on
183+
64-bit).
184+
181185
- Issue #17143: Fix a missing import in the trace module. Initial patch by
182186
Berker Peksag.
183187

Modules/_sre.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
492492
Py_ssize_t i;
493493

494494
/* adjust end */
495-
if (maxcount < (end - ptr) / state->charsize && maxcount != 65535)
495+
if (maxcount < (end - ptr) / state->charsize && maxcount != SRE_MAXREPEAT)
496496
end = ptr + maxcount*state->charsize;
497497

498498
switch (pattern[0]) {
@@ -1109,7 +1109,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
11091109
} else {
11101110
/* general case */
11111111
LASTMARK_SAVE();
1112-
while ((Py_ssize_t)ctx->pattern[2] == 65535
1112+
while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
11131113
|| ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
11141114
state->ptr = ctx->ptr;
11151115
DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
@@ -1195,7 +1195,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
11951195
}
11961196

11971197
if ((ctx->count < ctx->u.rep->pattern[2] ||
1198-
ctx->u.rep->pattern[2] == 65535) &&
1198+
ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
11991199
state->ptr != ctx->u.rep->last_ptr) {
12001200
/* we may have enough matches, but if we can
12011201
match another item, do so */
@@ -1273,7 +1273,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
12731273
LASTMARK_RESTORE();
12741274

12751275
if (ctx->count >= ctx->u.rep->pattern[2]
1276-
&& ctx->u.rep->pattern[2] != 65535)
1276+
&& ctx->u.rep->pattern[2] != SRE_MAXREPEAT)
12771277
RETURN_FAILURE;
12781278

12791279
ctx->u.rep->count = ctx->count;
@@ -3037,7 +3037,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
30373037
GET_ARG; max = arg;
30383038
if (min > max)
30393039
FAIL;
3040-
if (max > 65535)
3040+
if (max > SRE_MAXREPEAT)
30413041
FAIL;
30423042
if (!_validate_inner(code, code+skip-4, groups))
30433043
FAIL;
@@ -3056,7 +3056,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
30563056
GET_ARG; max = arg;
30573057
if (min > max)
30583058
FAIL;
3059-
if (max > 65535)
3059+
if (max > SRE_MAXREPEAT)
30603060
FAIL;
30613061
if (!_validate_inner(code, code+skip-3, groups))
30623062
FAIL;
@@ -3942,6 +3942,12 @@ PyMODINIT_FUNC PyInit__sre(void)
39423942
Py_DECREF(x);
39433943
}
39443944

3945+
x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
3946+
if (x) {
3947+
PyDict_SetItemString(d, "MAXREPEAT", x);
3948+
Py_DECREF(x);
3949+
}
3950+
39453951
x = PyUnicode_FromString(copyright);
39463952
if (x) {
39473953
PyDict_SetItemString(d, "copyright", x);

Modules/sre.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
/* size of a code word (must be unsigned short or larger, and
1717
large enough to hold a UCS4 character) */
1818
#define SRE_CODE Py_UCS4
19+
#if SIZEOF_SIZE_T > 4
20+
# define SRE_MAXREPEAT (~(SRE_CODE)0)
21+
#else
22+
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u)
23+
#endif
1924

2025
typedef struct {
2126
PyObject_VAR_HEAD

0 commit comments

Comments
 (0)