Skip to content

gh-56166: Deprecate passing confusing positional arguments in re functions #107778

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions Doc/library/re.rst
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ Functions
['Words', 'words', 'words', '']
>>> re.split(r'(\W+)', 'Words, words, words.')
['Words', ', ', 'words', ', ', 'words', '.', '']
>>> re.split(r'\W+', 'Words, words, words.', 1)
>>> re.split(r'\W+', 'Words, words, words.', maxsplit=1)
['Words', 'words, words.']
>>> re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)
['0', '3', '9']
Expand Down Expand Up @@ -929,6 +929,11 @@ Functions
.. versionchanged:: 3.7
Added support of splitting on a pattern that could match an empty string.

.. deprecated:: 3.13
Passing *maxsplit* and *flags* as positional arguments is deprecated.
In future Python versions they will be
:ref:`keyword-only parameters <keyword-only_parameter>`.


.. function:: findall(pattern, string, flags=0)

Expand Down Expand Up @@ -1027,8 +1032,6 @@ Functions
.. versionchanged:: 3.7
Unknown escapes in *repl* consisting of ``'\'`` and an ASCII letter
now are errors.

.. versionchanged:: 3.7
Empty matches for the pattern are replaced when adjacent to a previous
non-empty match.

Expand All @@ -1037,18 +1040,17 @@ Functions
In :class:`bytes` replacement strings, group *name* can only contain bytes
in the ASCII range (``b'\x00'``-``b'\x7f'``).

.. deprecated:: 3.13
Passing *count* and *flags* as positional arguments is deprecated.
In future Python versions they will be
:ref:`keyword-only parameters <keyword-only_parameter>`.


.. function:: subn(pattern, repl, string, count=0, flags=0)

Perform the same operation as :func:`sub`, but return a tuple ``(new_string,
number_of_subs_made)``.

.. versionchanged:: 3.1
Added the optional flags argument.

.. versionchanged:: 3.5
Unmatched groups are replaced with an empty string.


.. function:: escape(pattern)

Expand Down Expand Up @@ -1656,7 +1658,7 @@ because the address has spaces, our splitting pattern, in it:
.. doctest::
:options: +NORMALIZE_WHITESPACE

>>> [re.split(":? ", entry, 3) for entry in entries]
>>> [re.split(":? ", entry, maxsplit=3) for entry in entries]
[['Ross', 'McFluff', '834.345.1254', '155 Elm Street'],
['Ronald', 'Heathmore', '892.345.3428', '436 Finley Avenue'],
['Frank', 'Burger', '925.541.7625', '662 South Dogwood Way'],
Expand All @@ -1669,7 +1671,7 @@ house number from the street name:
.. doctest::
:options: +NORMALIZE_WHITESPACE

>>> [re.split(":? ", entry, 4) for entry in entries]
>>> [re.split(":? ", entry, maxsplit=4) for entry in entries]
[['Ross', 'McFluff', '834.345.1254', '155', 'Elm Street'],
['Ronald', 'Heathmore', '892.345.3428', '436', 'Finley Avenue'],
['Frank', 'Burger', '925.541.7625', '662', 'South Dogwood Way'],
Expand Down
7 changes: 7 additions & 0 deletions Doc/whatsnew/3.13.rst
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,13 @@ Porting to Python 3.13
Deprecated
----------

* Passing optional arguments *maxsplit*, *count* and *flags* in module-level
functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional
arguments is now deprecated.
In future Python versions these parameters will be
:ref:`keyword-only <keyword-only_parameter>`.
(Contributed by Serhiy Storchaka in :gh:`56166`.)

* Deprecate the old ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` types: use directly
the :c:type:`wchar_t` type instead. Since Python 3.3, ``Py_UNICODE`` and
``PY_UNICODE_TYPE`` are just aliases to :c:type:`wchar_t`.
Expand Down
67 changes: 64 additions & 3 deletions Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,16 +175,39 @@ def search(pattern, string, flags=0):
a Match object, or None if no match was found."""
return _compile(pattern, flags).search(string)

def sub(pattern, repl, string, count=0, flags=0):
class _ZeroSentinel(int):
pass
_zero_sentinel = _ZeroSentinel()

def sub(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
"""Return the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable;
if a string, backslash escapes in it are processed. If it is
a callable, it's passed the Match object and must return
a replacement string to be used."""
if args:
if count is not _zero_sentinel:
raise TypeError("sub() got multiple values for argument 'count'")
count, *args = args
if args:
if flags is not _zero_sentinel:
raise TypeError("sub() got multiple values for argument 'flags'")
flags, *args = args
if args:
raise TypeError("sub() takes from 3 to 5 positional arguments "
"but %d were given" % (5 + len(args)))

import warnings
warnings.warn(
"'count' is passed as positional argument",
DeprecationWarning, stacklevel=2
)

return _compile(pattern, flags).sub(repl, string, count)
sub.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'

def subn(pattern, repl, string, count=0, flags=0):
def subn(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
"""Return a 2-tuple containing (new_string, number).
new_string is the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in the source
Expand All @@ -193,17 +216,55 @@ def subn(pattern, repl, string, count=0, flags=0):
callable; if a string, backslash escapes in it are processed.
If it is a callable, it's passed the Match object and must
return a replacement string to be used."""
if args:
if count is not _zero_sentinel:
raise TypeError("subn() got multiple values for argument 'count'")
count, *args = args
if args:
if flags is not _zero_sentinel:
raise TypeError("subn() got multiple values for argument 'flags'")
flags, *args = args
if args:
raise TypeError("subn() takes from 3 to 5 positional arguments "
"but %d were given" % (5 + len(args)))

import warnings
warnings.warn(
"'count' is passed as positional argument",
DeprecationWarning, stacklevel=2
)

return _compile(pattern, flags).subn(repl, string, count)
subn.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'

def split(pattern, string, maxsplit=0, flags=0):
def split(pattern, string, *args, maxsplit=_zero_sentinel, flags=_zero_sentinel):
"""Split the source string by the occurrences of the pattern,
returning a list containing the resulting substrings. If
capturing parentheses are used in pattern, then the text of all
groups in the pattern are also returned as part of the resulting
list. If maxsplit is nonzero, at most maxsplit splits occur,
and the remainder of the string is returned as the final element
of the list."""
if args:
if maxsplit is not _zero_sentinel:
raise TypeError("split() got multiple values for argument 'maxsplit'")
maxsplit, *args = args
if args:
if flags is not _zero_sentinel:
raise TypeError("split() got multiple values for argument 'flags'")
flags, *args = args
if args:
raise TypeError("split() takes from 2 to 4 positional arguments "
"but %d were given" % (4 + len(args)))

import warnings
warnings.warn(
"'maxsplit' is passed as positional argument",
DeprecationWarning, stacklevel=2
)

return _compile(pattern, flags).split(string, maxsplit)
split.__text_signature__ = '(pattern, string, maxsplit=0, flags=0)'

def findall(pattern, string, flags=0):
"""Return a list of all non-overlapping matches in the string.
Expand Down
71 changes: 66 additions & 5 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,10 @@ def test_basic_re_sub(self):
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
'9.3 -3 24x100y')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
'9.3 -3 23x99y')
with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
'9.3 -3 23x99y')
self.assertEqual(w.filename, __file__)
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
'9.3 -3 23x99y')

Expand Down Expand Up @@ -235,9 +237,42 @@ def test_sub_template_numeric_escape(self):

def test_qualified_re_sub(self):
self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
self.assertEqual(w.filename, __file__)
self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')

with self.assertRaisesRegex(TypeError,
r"sub\(\) got multiple values for argument 'count'"):
re.sub('a', 'b', 'aaaaa', 1, count=1)
with self.assertRaisesRegex(TypeError,
r"sub\(\) got multiple values for argument 'flags'"):
re.sub('a', 'b', 'aaaaa', 1, 0, flags=0)
with self.assertRaisesRegex(TypeError,
r"sub\(\) takes from 3 to 5 positional arguments but 6 "
r"were given"):
re.sub('a', 'b', 'aaaaa', 1, 0, 0)

def test_misuse_flags(self):
with self.assertWarns(DeprecationWarning) as w:
result = re.sub('a', 'b', 'aaaaa', re.I)
self.assertEqual(result, re.sub('a', 'b', 'aaaaa', count=int(re.I)))
self.assertEqual(str(w.warning),
"'count' is passed as positional argument")
self.assertEqual(w.filename, __file__)
with self.assertWarns(DeprecationWarning) as w:
result = re.subn("b*", "x", "xyz", re.I)
self.assertEqual(result, re.subn("b*", "x", "xyz", count=int(re.I)))
self.assertEqual(str(w.warning),
"'count' is passed as positional argument")
self.assertEqual(w.filename, __file__)
with self.assertWarns(DeprecationWarning) as w:
result = re.split(":", ":a:b::c", re.I)
self.assertEqual(result, re.split(":", ":a:b::c", maxsplit=int(re.I)))
self.assertEqual(str(w.warning),
"'maxsplit' is passed as positional argument")
self.assertEqual(w.filename, __file__)

def test_bug_114660(self):
self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
'hello there')
Expand Down Expand Up @@ -344,9 +379,22 @@ def test_re_subn(self):
self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
self.assertEqual(w.filename, __file__)
self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))

with self.assertRaisesRegex(TypeError,
r"subn\(\) got multiple values for argument 'count'"):
re.subn('a', 'b', 'aaaaa', 1, count=1)
with self.assertRaisesRegex(TypeError,
r"subn\(\) got multiple values for argument 'flags'"):
re.subn('a', 'b', 'aaaaa', 1, 0, flags=0)
with self.assertRaisesRegex(TypeError,
r"subn\(\) takes from 3 to 5 positional arguments but 6 "
r"were given"):
re.subn('a', 'b', 'aaaaa', 1, 0, 0)

def test_re_split(self):
for string in ":a:b::c", S(":a:b::c"):
self.assertTypedEqual(re.split(":", string),
Expand Down Expand Up @@ -401,7 +449,9 @@ def test_re_split(self):
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)

def test_qualified_re_split(self):
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
self.assertEqual(w.filename, __file__)
self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
Expand All @@ -411,6 +461,17 @@ def test_qualified_re_split(self):
self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
['', ':', '', '', 'a:b::c'])

with self.assertRaisesRegex(TypeError,
r"split\(\) got multiple values for argument 'maxsplit'"):
re.split(":", ":a:b::c", 2, maxsplit=2)
with self.assertRaisesRegex(TypeError,
r"split\(\) got multiple values for argument 'flags'"):
re.split(":", ":a:b::c", 2, 0, flags=0)
with self.assertRaisesRegex(TypeError,
r"split\(\) takes from 2 to 4 positional arguments but 5 "
r"were given"):
re.split(":", ":a:b::c", 2, 0, 0)

def test_re_findall(self):
self.assertEqual(re.findall(":+", "abc"), [])
for string in "a:b::c:::d", S("a:b::c:::d"):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Deprecate passing optional arguments *maxsplit*, *count* and *flags* in
module-level functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional.
They should only be passed by keyword.