From 79eaa092d99b89b7bd20718336b5fa86e02d21d5 Mon Sep 17 00:00:00 2001 From: Gihwan Kim Date: Mon, 20 Feb 2023 18:40:32 +0900 Subject: [PATCH 1/9] gh-101961 Do not pass encoding if binary mode --- Lib/fileinput.py | 2 ++ Lib/test/test_fileinput.py | 40 ++++++++++++++++++++++++++------------ 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/Lib/fileinput.py b/Lib/fileinput.py index e234dc9ea65f15..a88047bab24423 100644 --- a/Lib/fileinput.py +++ b/Lib/fileinput.py @@ -409,6 +409,8 @@ def hook_compressed(filename, mode, *, encoding=None, errors=None): import bz2 stream = bz2.BZ2File(filename, mode) else: + if "b" in mode: + return open(filename, mode, errors=errors) return open(filename, mode, encoding=encoding, errors=errors) # gzip and bz2 are binary mode by default. diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py index ac20c74baa09e2..c6e6771731c273 100644 --- a/Lib/test/test_fileinput.py +++ b/Lib/test/test_fileinput.py @@ -855,29 +855,29 @@ def setUp(self): self.fake_open = InvocationRecorder() def test_empty_string(self): - self.do_test_use_builtin_open("", 1) + self.do_test_use_builtin_open_text("", "r") def test_no_ext(self): - self.do_test_use_builtin_open("abcd", 2) + self.do_test_use_builtin_open_text("abcd", "r") @unittest.skipUnless(gzip, "Requires gzip and zlib") def test_gz_ext_fake(self): original_open = gzip.open gzip.open = self.fake_open try: - result = fileinput.hook_compressed("test.gz", "3") + result = fileinput.hook_compressed("test.gz", "r") finally: gzip.open = original_open self.assertEqual(self.fake_open.invocation_count, 1) - self.assertEqual(self.fake_open.last_invocation, (("test.gz", "3"), {})) + self.assertEqual(self.fake_open.last_invocation, (("test.gz", "r"), {})) @unittest.skipUnless(gzip, "Requires gzip and zlib") def test_gz_with_encoding_fake(self): original_open = gzip.open gzip.open = lambda filename, mode: io.BytesIO(b'Ex-binary string') try: - result = fileinput.hook_compressed("test.gz", "3", encoding="utf-8") + result = fileinput.hook_compressed("test.gz", "r", encoding="utf-8") finally: gzip.open = original_open self.assertEqual(list(result), ['Ex-binary string']) @@ -887,23 +887,40 @@ def test_bz2_ext_fake(self): original_open = bz2.BZ2File bz2.BZ2File = self.fake_open try: - result = fileinput.hook_compressed("test.bz2", "4") + result = fileinput.hook_compressed("test.bz2", "r") finally: bz2.BZ2File = original_open self.assertEqual(self.fake_open.invocation_count, 1) - self.assertEqual(self.fake_open.last_invocation, (("test.bz2", "4"), {})) + self.assertEqual(self.fake_open.last_invocation, (("test.bz2", "r"), {})) def test_blah_ext(self): - self.do_test_use_builtin_open("abcd.blah", "5") + self.do_test_use_builtin_open_binary("abcd.blah", "rb") def test_gz_ext_builtin(self): - self.do_test_use_builtin_open("abcd.Gz", "6") + self.do_test_use_builtin_open_binary("abcd.Gz", "rb") def test_bz2_ext_builtin(self): - self.do_test_use_builtin_open("abcd.Bz2", "7") + self.do_test_use_builtin_open_binary("abcd.Bz2", "rb") - def do_test_use_builtin_open(self, filename, mode): + def test_binary_mode_encoding(self): + self.do_test_use_builtin_open_binary("abcd", "rb") + + def test_text_mode_encoding(self): + self.do_test_use_builtin_open_text("abcd", "r") + + def do_test_use_builtin_open_binary(self, filename, mode): + original_open = self.replace_builtin_open(self.fake_open) + try: + result = fileinput.hook_compressed(filename, mode) + finally: + self.replace_builtin_open(original_open) + + self.assertEqual(self.fake_open.invocation_count, 1) + self.assertEqual(self.fake_open.last_invocation, + ((filename, mode), {'errors': None})) + + def do_test_use_builtin_open_text(self, filename, mode): original_open = self.replace_builtin_open(self.fake_open) try: result = fileinput.hook_compressed(filename, mode) @@ -980,7 +997,6 @@ def check(mode, expected_lines): with self.assertRaises(ValueError): check('rb', ['A\n', 'B\r\n', 'C\r', 'D\u20ac']) - class MiscTest(unittest.TestCase): def test_all(self): From a1121ea9a94fd04e76e5c26b43acb595b3c5e31e Mon Sep 17 00:00:00 2001 From: Gihwan Kim Date: Mon, 20 Feb 2023 18:46:45 +0900 Subject: [PATCH 2/9] gh-101961 Update `Misc/ACKS` --- Misc/ACKS | 1 + 1 file changed, 1 insertion(+) diff --git a/Misc/ACKS b/Misc/ACKS index ca92608868f23f..8e3089681635d7 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1001,6 +1001,7 @@ Ilia Kurenkov Vladimir Kushnir Erno Kuusela Kabir Kwatra +Gihwan Kim Ross Lagerwall Cameron Laird Loïc Lajeanne From 0b63c66e7e01fabd8d917845ba88de83c0f71269 Mon Sep 17 00:00:00 2001 From: Gihwan Kim Date: Tue, 21 Feb 2023 09:27:47 +0900 Subject: [PATCH 3/9] Add --- .../next/Library/2023-02-21-09-27-08.gh-issue-101961.OHSaJh.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-02-21-09-27-08.gh-issue-101961.OHSaJh.rst diff --git a/Misc/NEWS.d/next/Library/2023-02-21-09-27-08.gh-issue-101961.OHSaJh.rst b/Misc/NEWS.d/next/Library/2023-02-21-09-27-08.gh-issue-101961.OHSaJh.rst new file mode 100644 index 00000000000000..8634ddba81cb32 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-02-21-09-27-08.gh-issue-101961.OHSaJh.rst @@ -0,0 +1,2 @@ +`fileinput.hook_compressed` doesn't pass `encoding` if the given `mode` +contains `b`. From c81a2d1c9d74f11941a4cb277049acd203f67e92 Mon Sep 17 00:00:00 2001 From: Gihwan Kim Date: Tue, 21 Feb 2023 09:34:10 +0900 Subject: [PATCH 4/9] Address comments by @corona10 --- Lib/fileinput.py | 4 +--- Lib/test/test_fileinput.py | 2 +- Misc/ACKS | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Lib/fileinput.py b/Lib/fileinput.py index a88047bab24423..1b25f28f3d3432 100644 --- a/Lib/fileinput.py +++ b/Lib/fileinput.py @@ -399,7 +399,7 @@ def isstdin(self): def hook_compressed(filename, mode, *, encoding=None, errors=None): - if encoding is None: # EncodingWarning is emitted in FileInput() already. + if encoding is None and "b" not in mode: # EncodingWarning is emitted in FileInput() already. encoding = "locale" ext = os.path.splitext(filename)[1] if ext == '.gz': @@ -409,8 +409,6 @@ def hook_compressed(filename, mode, *, encoding=None, errors=None): import bz2 stream = bz2.BZ2File(filename, mode) else: - if "b" in mode: - return open(filename, mode, errors=errors) return open(filename, mode, encoding=encoding, errors=errors) # gzip and bz2 are binary mode by default. diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py index c6e6771731c273..62441bf3399174 100644 --- a/Lib/test/test_fileinput.py +++ b/Lib/test/test_fileinput.py @@ -918,7 +918,7 @@ def do_test_use_builtin_open_binary(self, filename, mode): self.assertEqual(self.fake_open.invocation_count, 1) self.assertEqual(self.fake_open.last_invocation, - ((filename, mode), {'errors': None})) + ((filename, mode), {'encoding': None, 'errors': None})) def do_test_use_builtin_open_text(self, filename, mode): original_open = self.replace_builtin_open(self.fake_open) diff --git a/Misc/ACKS b/Misc/ACKS index 8e3089681635d7..a9d4e453c7b59c 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -927,6 +927,7 @@ Tyler Kieft Mads Kiilerich Jason Killen Derek D. Kim +Gihwan Kim Jan Kim Taek Joo Kim Sam Kimbrel @@ -1001,7 +1002,6 @@ Ilia Kurenkov Vladimir Kushnir Erno Kuusela Kabir Kwatra -Gihwan Kim Ross Lagerwall Cameron Laird Loïc Lajeanne From b9e4275c4acbbc32ac36ae3e633938fab02691f3 Mon Sep 17 00:00:00 2001 From: Gihwan Kim Date: Tue, 21 Feb 2023 09:38:23 +0900 Subject: [PATCH 5/9] Update `Misc/NEWS` --- .../next/Library/2023-02-21-09-27-08.gh-issue-101961.OHSaJh.rst | 2 -- .../next/Library/2023-02-21-09-38-13.gh-issue-101961.E4WnAC.rst | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2023-02-21-09-27-08.gh-issue-101961.OHSaJh.rst create mode 100644 Misc/NEWS.d/next/Library/2023-02-21-09-38-13.gh-issue-101961.E4WnAC.rst diff --git a/Misc/NEWS.d/next/Library/2023-02-21-09-27-08.gh-issue-101961.OHSaJh.rst b/Misc/NEWS.d/next/Library/2023-02-21-09-27-08.gh-issue-101961.OHSaJh.rst deleted file mode 100644 index 8634ddba81cb32..00000000000000 --- a/Misc/NEWS.d/next/Library/2023-02-21-09-27-08.gh-issue-101961.OHSaJh.rst +++ /dev/null @@ -1,2 +0,0 @@ -`fileinput.hook_compressed` doesn't pass `encoding` if the given `mode` -contains `b`. diff --git a/Misc/NEWS.d/next/Library/2023-02-21-09-38-13.gh-issue-101961.E4WnAC.rst b/Misc/NEWS.d/next/Library/2023-02-21-09-38-13.gh-issue-101961.E4WnAC.rst new file mode 100644 index 00000000000000..7c2d0863bf6817 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-02-21-09-38-13.gh-issue-101961.E4WnAC.rst @@ -0,0 +1,2 @@ +`fileinput.hook_compressed` doesn't set the given `None` `encoding` to +`locale` if the given `mode` contains `b`. From 6123b80d79b7f1605a664617dfa108eabe40ac33 Mon Sep 17 00:00:00 2001 From: Gihwan Kim Date: Tue, 21 Feb 2023 10:06:16 +0900 Subject: [PATCH 6/9] Fix backtick --- .../next/Library/2023-02-21-09-38-13.gh-issue-101961.E4WnAC.rst | 2 -- .../next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2023-02-21-09-38-13.gh-issue-101961.E4WnAC.rst create mode 100644 Misc/NEWS.d/next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst diff --git a/Misc/NEWS.d/next/Library/2023-02-21-09-38-13.gh-issue-101961.E4WnAC.rst b/Misc/NEWS.d/next/Library/2023-02-21-09-38-13.gh-issue-101961.E4WnAC.rst deleted file mode 100644 index 7c2d0863bf6817..00000000000000 --- a/Misc/NEWS.d/next/Library/2023-02-21-09-38-13.gh-issue-101961.E4WnAC.rst +++ /dev/null @@ -1,2 +0,0 @@ -`fileinput.hook_compressed` doesn't set the given `None` `encoding` to -`locale` if the given `mode` contains `b`. diff --git a/Misc/NEWS.d/next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst b/Misc/NEWS.d/next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst new file mode 100644 index 00000000000000..e178fd82c7e66f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst @@ -0,0 +1,2 @@ +``fileinput.hook_compressed`` doesn't set the given ``None`` ``encoding`` to +``locale`` if the given ``mode`` contains ``b``. From e91e0ec3137968dbf19e7e17188385fac22e095f Mon Sep 17 00:00:00 2001 From: Gihwan Kim Date: Tue, 21 Feb 2023 10:55:29 +0900 Subject: [PATCH 7/9] Fix wrong change --- Lib/test/test_fileinput.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py index 62441bf3399174..786d9186634305 100644 --- a/Lib/test/test_fileinput.py +++ b/Lib/test/test_fileinput.py @@ -997,6 +997,7 @@ def check(mode, expected_lines): with self.assertRaises(ValueError): check('rb', ['A\n', 'B\r\n', 'C\r', 'D\u20ac']) + class MiscTest(unittest.TestCase): def test_all(self): From 511bac3297b49a4ea761a5c8dd400e2db17cd810 Mon Sep 17 00:00:00 2001 From: Gihwan Kim Date: Tue, 21 Feb 2023 11:19:47 +0900 Subject: [PATCH 8/9] Update Misc/NEWS.d/next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst Co-authored-by: Dong-hee Na --- .../Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst b/Misc/NEWS.d/next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst index e178fd82c7e66f..5496ea5d7af104 100644 --- a/Misc/NEWS.d/next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst +++ b/Misc/NEWS.d/next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst @@ -1,2 +1,2 @@ -``fileinput.hook_compressed`` doesn't set the given ``None`` ``encoding`` to -``locale`` if the given ``mode`` contains ``b``. +For the binary mode, :func:`file input.hookcompressed` doesn't set the ``encoding`` value +even if the value is ``None``. Patch by Gihwan Kim. From b49c1670baed3e7746fb7a67a3b2c16c51ed2fbc Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Tue, 21 Feb 2023 11:45:19 +0900 Subject: [PATCH 9/9] Update Misc/NEWS.d/next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst --- .../next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst b/Misc/NEWS.d/next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst index 5496ea5d7af104..a3d4119e7cbdce 100644 --- a/Misc/NEWS.d/next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst +++ b/Misc/NEWS.d/next/Library/2023-02-21-10-05-33.gh-issue-101961.7e56jh.rst @@ -1,2 +1,2 @@ -For the binary mode, :func:`file input.hookcompressed` doesn't set the ``encoding`` value +For the binary mode, :func:`fileinput.hookcompressed` doesn't set the ``encoding`` value even if the value is ``None``. Patch by Gihwan Kim.