From 18ecceba9dd158dd3b899a1378c9488a01cf25f1 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Wed, 8 Jun 2022 14:16:06 +0200 Subject: [PATCH 1/3] gh-93575: Use correct way to calculate PyUnicode struct sizes --- Lib/test/test_unicode.py | 7 ++++--- .../Tests/2022-06-08-14-17-59.gh-issue-93575.Xb2LNB.rst | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2022-06-08-14-17-59.gh-issue-93575.Xb2LNB.rst diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 64abc0c761b3c8..0fa6097008cf6d 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2370,9 +2370,10 @@ def test_expandtabs_optimization(self): self.assertIs(s.expandtabs(), s) def test_raiseMemError(self): - null_byte = 1 - ascii_struct_size = sys.getsizeof("a") - len("a") - null_byte - compact_struct_size = sys.getsizeof("\xff") - len("\xff") - null_byte + asciifields = "nnb" + compactfields = asciifields + "nP" + ascii_struct_size = support.calcobjsize(asciifields) + compact_struct_size = support.calcobjsize(compactfields) for char in ('a', '\xe9', '\u20ac', '\U0010ffff'): code = ord(char) diff --git a/Misc/NEWS.d/next/Tests/2022-06-08-14-17-59.gh-issue-93575.Xb2LNB.rst b/Misc/NEWS.d/next/Tests/2022-06-08-14-17-59.gh-issue-93575.Xb2LNB.rst new file mode 100644 index 00000000000000..98d15328a087ae --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2022-06-08-14-17-59.gh-issue-93575.Xb2LNB.rst @@ -0,0 +1,4 @@ +Fix issue with test_unicode test_raiseMemError. The test case now use +``test.support.calcobjsize`` to calculate size of PyUnicode structs. +:func:`sys.getsizeof` may return different size when string has UTF-8 +memory. From 5b2d9b0d8b991f098d50d8ac502b0b8d340100e9 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Wed, 8 Jun 2022 18:36:14 +0200 Subject: [PATCH 2/3] Add comment to keep test_sys and test_unicode in sync --- Lib/test/test_sys.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 94a09ff549331a..1dc10d8b0a39ac 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1539,6 +1539,7 @@ class newstyleclass(object): pass samples = ['1'*100, '\xff'*50, '\u0100'*40, '\uffff'*100, '\U00010000'*30, '\U0010ffff'*100] + # also update field definitions in test_unicode.test_raiseMemError asciifields = "nnb" compactfields = asciifields + "nP" unicodefields = compactfields + "P" From 52ecb606b827227212a7cf01381d4fe224b091de Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Wed, 8 Jun 2022 19:40:45 +0200 Subject: [PATCH 3/3] Fix case code < 256 --- Lib/test/test_unicode.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 0fa6097008cf6d..9765ed97a60a44 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2377,9 +2377,12 @@ def test_raiseMemError(self): for char in ('a', '\xe9', '\u20ac', '\U0010ffff'): code = ord(char) - if code < 0x100: + if code < 0x80: char_size = 1 # sizeof(Py_UCS1) struct_size = ascii_struct_size + elif code < 0x100: + char_size = 1 # sizeof(Py_UCS1) + struct_size = compact_struct_size elif code < 0x10000: char_size = 2 # sizeof(Py_UCS2) struct_size = compact_struct_size @@ -2391,7 +2394,16 @@ def test_raiseMemError(self): # be allocatable, given enough memory. maxlen = ((sys.maxsize - struct_size) // char_size) alloc = lambda: char * maxlen - with self.subTest(char=char): + with self.subTest( + char=char, + struct_size=struct_size, + char_size=char_size + ): + # self-check + self.assertEqual( + sys.getsizeof(char * 42), + struct_size + (char_size * (42 + 1)) + ) self.assertRaises(MemoryError, alloc) self.assertRaises(MemoryError, alloc)