From 85362450a39771c9379c3e35f9a97461a2a559b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 28 Jan 2021 14:50:03 -0500 Subject: [PATCH] Backport PR #39440: REGR: prefer user-provided mode --- doc/source/whatsnew/v1.2.2.rst | 1 + pandas/io/common.py | 9 ++++++--- pandas/tests/io/test_common.py | 32 +++++++++++++++++++++++++++++++- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst index 656e779055486..baa0cc2ac9e18 100644 --- a/doc/source/whatsnew/v1.2.2.rst +++ b/doc/source/whatsnew/v1.2.2.rst @@ -16,6 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`) - Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`) +- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`) - .. --------------------------------------------------------------------------- diff --git a/pandas/io/common.py b/pandas/io/common.py index 90622ef0c0f2c..be353fefdd1ef 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -845,12 +845,15 @@ def file_exists(filepath_or_buffer: FilePathOrBuffer) -> bool: def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool: """Whether the handle is opened in binary mode""" + # specified by user + if "t" in mode or "b" in mode: + return "b" in mode + # classes that expect string but have 'b' in mode - text_classes = (codecs.StreamReaderWriter,) - if isinstance(handle, text_classes): + text_classes = (codecs.StreamWriter, codecs.StreamReader, codecs.StreamReaderWriter) + if issubclass(type(handle), text_classes): return False # classes that expect bytes binary_classes = (BufferedIOBase, RawIOBase) - return isinstance(handle, binary_classes) or "b" in getattr(handle, "mode", mode) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 80e2b36764ba0..540f12841de1b 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -2,7 +2,7 @@ Tests for the pandas.io.common functionalities """ import codecs -from io import StringIO +from io import BytesIO, StringIO import mmap import os from pathlib import Path @@ -443,3 +443,33 @@ def test_codecs_encoding(encoding, format): else: df = pd.read_json(handle) tm.assert_frame_equal(expected, df) + + +def test_codecs_get_writer_reader(): + # GH39247 + expected = tm.makeDataFrame() + with tm.ensure_clean() as path: + with open(path, "wb") as handle: + with codecs.getwriter("utf-8")(handle) as encoded: + expected.to_csv(encoded) + with open(path, "rb") as handle: + with codecs.getreader("utf-8")(handle) as encoded: + df = pd.read_csv(encoded, index_col=0) + tm.assert_frame_equal(expected, df) + + +@pytest.mark.parametrize( + "io_class,mode,msg", + [ + (BytesIO, "t", "a bytes-like object is required, not 'str'"), + (StringIO, "b", "string argument expected, got 'bytes'"), + ], +) +def test_explicit_encoding(io_class, mode, msg): + # GH39247; this test makes sure that if a user provides mode="*t" or "*b", + # it is used. In the case of this test it leads to an error as intentionally the + # wrong mode is requested + expected = tm.makeDataFrame() + with io_class() as buffer: + with pytest.raises(TypeError, match=msg): + expected.to_csv(buffer, mode=f"w{mode}")