From 43c6c790d342e9ce4fd5c0b0aa1c1dee3963ba5e Mon Sep 17 00:00:00 2001 From: minggli Date: Tue, 29 May 2018 17:22:07 +0100 Subject: [PATCH 1/6] accept filehandle for to_csv compression --- pandas/io/formats/csvs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 29b8d29af0808..0be2a180fbfa2 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -154,9 +154,9 @@ def save(self): # GH 17778 handles compression for byte strings. if not close and self.compression: f.close() - with open(self.path_or_buf, 'r') as f: + with open(f.name, 'r') as f: data = f.read() - f, handles = _get_handle(self.path_or_buf, self.mode, + f, handles = _get_handle(f.name, self.mode, encoding=encoding, compression=self.compression) f.write(data) From 00712b5b763e854986a164c293feea6036d89963 Mon Sep 17 00:00:00 2001 From: minggli Date: Tue, 29 May 2018 18:26:35 +0100 Subject: [PATCH 2/6] test filehandle compression for to_csv --- pandas/tests/test_common.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index bb7ee1b911fee..6df51dc6cad24 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -241,3 +241,24 @@ def test_compression_size(obj, method, compression): getattr(obj, method)(filename, compression=None) uncompressed = os.path.getsize(filename) assert uncompressed > compressed + + +@pytest.mark.parametrize('obj', [ + DataFrame(100 * [[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + columns=['X', 'Y', 'Z']), + Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) +@pytest.mark.parametrize('method', ['to_csv']) +def test_compression_size_fh(obj, method, compression): + if not compression: + pytest.skip("only test compression case.") + + with tm.ensure_clean() as filename: + with open(filename, 'w') as fh: + getattr(obj, method)(fh, compression=compression) + compressed = os.path.getsize(filename) + with tm.ensure_clean() as filename: + with open(filename, 'w') as fh: + getattr(obj, method)(fh, compression=None) + uncompressed = os.path.getsize(filename) + assert uncompressed > compressed From 799596f77dfcb1d8e62851f05e3bbab90c3a3e0c Mon Sep 17 00:00:00 2001 From: Ming Li Date: Tue, 29 May 2018 19:46:12 +0100 Subject: [PATCH 3/6] get filesize outside context manager --- pandas/tests/test_common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 6df51dc6cad24..1470781d26556 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -256,9 +256,9 @@ def test_compression_size_fh(obj, method, compression): with tm.ensure_clean() as filename: with open(filename, 'w') as fh: getattr(obj, method)(fh, compression=compression) - compressed = os.path.getsize(filename) + compressed = os.path.getsize(filename) with tm.ensure_clean() as filename: with open(filename, 'w') as fh: getattr(obj, method)(fh, compression=None) - uncompressed = os.path.getsize(filename) + uncompressed = os.path.getsize(filename) assert uncompressed > compressed From 6a90205cac46d05910f9892fc49f90b246ba7d23 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Tue, 29 May 2018 20:31:35 +0100 Subject: [PATCH 4/6] update docs --- pandas/core/frame.py | 3 +-- pandas/core/series.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 22677b19192e1..0899e9cd87aba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1689,8 +1689,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. compression : string, optional A string representing the compression to use in the output file. - Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only - used when the first argument is a filename. + Allowed values are 'gzip', 'bz2', 'zip', 'xz'. line_terminator : string, default ``'\n'`` The newline character or character sequence to use in the output file diff --git a/pandas/core/series.py b/pandas/core/series.py index c9329e8b9e572..f25f73513df30 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3761,8 +3761,7 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='', non-ascii, for python versions prior to 3 compression : string, optional A string representing the compression to use in the output file. - Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only - used when the first argument is a filename. + Allowed values are 'gzip', 'bz2', 'zip', 'xz'. date_format: string, default None Format string for datetime objects. decimal: string, default '.' From ebf9d2819b70ba8d37048de8fa2ad40e02066e73 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Wed, 30 May 2018 22:24:01 +0100 Subject: [PATCH 5/6] refactor test --- pandas/conftest.py | 10 ++++++++++ pandas/tests/test_common.py | 15 +++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index b09cb872a12fb..a463f573c82e0 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -105,6 +105,16 @@ def compression(request): return request.param +@pytest.fixture(params=['gzip', 'bz2', 'zip', + pytest.param('xz', marks=td.skip_if_no_lzma)]) +def compression_only(request): + """ + Fixture for trying common compression types in compression tests excluding + uncompressed case + """ + return request.param + + @pytest.fixture(scope='module') def datetime_tz_utc(): from datetime import timezone diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 1470781d26556..88e469731060d 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -231,12 +231,10 @@ def test_standardize_mapping(): columns=['X', 'Y', 'Z']), Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) @pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv']) -def test_compression_size(obj, method, compression): - if not compression: - pytest.skip("only test compression case.") +def test_compression_size(obj, method, compression_only): with tm.ensure_clean() as filename: - getattr(obj, method)(filename, compression=compression) + getattr(obj, method)(filename, compression=compression_only) compressed = os.path.getsize(filename) getattr(obj, method)(filename, compression=None) uncompressed = os.path.getsize(filename) @@ -249,16 +247,17 @@ def test_compression_size(obj, method, compression): columns=['X', 'Y', 'Z']), Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) @pytest.mark.parametrize('method', ['to_csv']) -def test_compression_size_fh(obj, method, compression): - if not compression: - pytest.skip("only test compression case.") +def test_compression_size_fh(obj, method, compression_only): with tm.ensure_clean() as filename: with open(filename, 'w') as fh: - getattr(obj, method)(fh, compression=compression) + getattr(obj, method)(fh, compression=compression_only) + # GH 17778 + assert fh.closed compressed = os.path.getsize(filename) with tm.ensure_clean() as filename: with open(filename, 'w') as fh: getattr(obj, method)(fh, compression=None) + assert not fh.closed uncompressed = os.path.getsize(filename) assert uncompressed > compressed From 8d0c45b91632f12a65f858db4765bac85be98bca Mon Sep 17 00:00:00 2001 From: Ming Li Date: Wed, 30 May 2018 22:34:08 +0100 Subject: [PATCH 6/6] add whatsnew --- doc/source/whatsnew/v0.24.0.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index e931450cb5c01..55e76512b2440 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -13,7 +13,7 @@ New features Other Enhancements ^^^^^^^^^^^^^^^^^^ - :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`) -- +- :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`) - .. _whatsnew_0240.api_breaking: @@ -184,4 +184,3 @@ Other - - - -