Skip to content

Commit ea21389

Browse files
epicfaacemiss-islington
authored andcommitted
[3.7] bpo-37764: Fix infinite loop when parsing unstructured email headers. (GH-15239) (GH-15654)
…aders. (GH-15239) Fixes a case in which email._header_value_parser.get_unstructured hangs the system for some invalid headers. This covers the cases in which the header contains either: - a case without trailing whitespace - an invalid encoded word https://bugs.python.org/issue37764 This fix should also be backported to 3.7 and 3.8 https://bugs.python.org/issue37764 (cherry picked from commit c5b242f) Co-authored-by: Ashwin Ramaswami <[email protected]> https://bugs.python.org/issue37764
1 parent 4805805 commit ea21389

File tree

5 files changed

+55
-3
lines changed

5 files changed

+55
-3
lines changed

Lib/email/_header_value_parser.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -933,6 +933,10 @@ def __str__(self):
933933
return ''
934934

935935

936+
class _InvalidEwError(errors.HeaderParseError):
937+
"""Invalid encoded word found while parsing headers."""
938+
939+
936940
# XXX these need to become classes and used as instances so
937941
# that a program can't change them in a parse tree and screw
938942
# up other parse trees. Maybe should have tests for that, too.
@@ -1037,7 +1041,10 @@ def get_encoded_word(value):
10371041
raise errors.HeaderParseError(
10381042
"expected encoded word but found {}".format(value))
10391043
remstr = ''.join(remainder)
1040-
if len(remstr) > 1 and remstr[0] in hexdigits and remstr[1] in hexdigits:
1044+
if (len(remstr) > 1 and
1045+
remstr[0] in hexdigits and
1046+
remstr[1] in hexdigits and
1047+
tok.count('?') < 2):
10411048
# The ? after the CTE was followed by an encoded word escape (=XX).
10421049
rest, *remainder = remstr.split('?=', 1)
10431050
tok = tok + '?=' + rest
@@ -1049,7 +1056,7 @@ def get_encoded_word(value):
10491056
try:
10501057
text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
10511058
except ValueError:
1052-
raise errors.HeaderParseError(
1059+
raise _InvalidEwError(
10531060
"encoded word format invalid: '{}'".format(ew.cte))
10541061
ew.charset = charset
10551062
ew.lang = lang
@@ -1099,9 +1106,12 @@ def get_unstructured(value):
10991106
token, value = get_fws(value)
11001107
unstructured.append(token)
11011108
continue
1109+
valid_ew = True
11021110
if value.startswith('=?'):
11031111
try:
11041112
token, value = get_encoded_word(value)
1113+
except _InvalidEwError:
1114+
valid_ew = False
11051115
except errors.HeaderParseError:
11061116
# XXX: Need to figure out how to register defects when
11071117
# appropriate here.
@@ -1123,7 +1133,10 @@ def get_unstructured(value):
11231133
# Split in the middle of an atom if there is a rfc2047 encoded word
11241134
# which does not have WSP on both sides. The defect will be registered
11251135
# the next time through the loop.
1126-
if rfc2047_matcher.search(tok):
1136+
# This needs to only be performed when the encoded word is valid;
1137+
# otherwise, performing it on an invalid encoded word can cause
1138+
# the parser to go in an infinite loop.
1139+
if valid_ew and rfc2047_matcher.search(tok):
11271140
tok, *remainder = value.partition('=?')
11281141
vtext = ValueTerminal(tok, 'vtext')
11291142
_validate_xtext(vtext)

Lib/test/test_email/test__header_value_parser.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,22 @@ def test_get_unstructured_ew_without_trailing_whitespace(self):
383383
[errors.InvalidHeaderDefect],
384384
'')
385385

386+
def test_get_unstructured_without_trailing_whitespace_hang_case(self):
387+
self._test_get_x(self._get_unst,
388+
'=?utf-8?q?somevalue?=aa',
389+
'somevalueaa',
390+
'somevalueaa',
391+
[errors.InvalidHeaderDefect],
392+
'')
393+
394+
def test_get_unstructured_invalid_ew(self):
395+
self._test_get_x(self._get_unst,
396+
'=?utf-8?q?=somevalue?=',
397+
'=?utf-8?q?=somevalue?=',
398+
'=?utf-8?q?=somevalue?=',
399+
[],
400+
'')
401+
386402
# get_qp_ctext
387403

388404
def test_get_qp_ctext_only(self):

Lib/test/test_email/test_email.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5381,6 +5381,27 @@ def test_rfc2231_unencoded_then_encoded_segments(self):
53815381
eq(language, 'en-us')
53825382
eq(s, 'My Document For You')
53835383

5384+
def test_should_not_hang_on_invalid_ew_messages(self):
5385+
messages = ["""From: [email protected]
5386+
5387+
Bad-Header:
5388+
=?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?=
5389+
=?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?=
5390+
=?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?=
5391+
5392+
Hello!
5393+
""", """From: ����� �������� <xxx@xxx>
5394+
To: "xxx" <xxx@xxx>
5395+
Subject: ��� ���������� ����� ����� � ��������� �� ����
5396+
MIME-Version: 1.0
5397+
Content-Type: text/plain; charset="windows-1251";
5398+
Content-Transfer-Encoding: 8bit
5399+
5400+
�� ����� � ���� ������ ��� ��������
5401+
"""]
5402+
for m in messages:
5403+
with self.subTest(m=m):
5404+
msg = email.message_from_string(m)
53845405

53855406

53865407
# Tests to ensure that signed parts of an email are completely preserved, as

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,6 +1307,7 @@ Burton Radons
13071307
Abhilash Raj
13081308
Shorya Raj
13091309
Dhushyanth Ramasamy
1310+
Ashwin Ramaswami
13101311
Jeff Ramnani
13111312
Bayard Randel
13121313
Varpu Rantala
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixes email._header_value_parser.get_unstructured going into an infinite loop for a specific case in which the email header does not have trailing whitespace, and the case in which it contains an invalid encoded word. Patch by Ashwin Ramaswami.

0 commit comments

Comments
 (0)