diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index e931450cb5c01..55e76512b2440 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -13,7 +13,7 @@ New features Other Enhancements ^^^^^^^^^^^^^^^^^^ - :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`) -- +- :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`) - .. _whatsnew_0240.api_breaking: @@ -184,4 +184,3 @@ Other - - - - diff --git a/pandas/conftest.py b/pandas/conftest.py index b09cb872a12fb..a463f573c82e0 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -105,6 +105,16 @@ def compression(request): return request.param +@pytest.fixture(params=['gzip', 'bz2', 'zip', + pytest.param('xz', marks=td.skip_if_no_lzma)]) +def compression_only(request): + """ + Fixture for trying common compression types in compression tests excluding + uncompressed case + """ + return request.param + + @pytest.fixture(scope='module') def datetime_tz_utc(): from datetime import timezone diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 22677b19192e1..0899e9cd87aba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1689,8 +1689,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. compression : string, optional A string representing the compression to use in the output file. - Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only - used when the first argument is a filename. + Allowed values are 'gzip', 'bz2', 'zip', 'xz'. line_terminator : string, default ``'\n'`` The newline character or character sequence to use in the output file diff --git a/pandas/core/series.py b/pandas/core/series.py index c9329e8b9e572..f25f73513df30 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3761,8 +3761,7 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='', non-ascii, for python versions prior to 3 compression : string, optional A string representing the compression to use in the output file. - Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only - used when the first argument is a filename. + Allowed values are 'gzip', 'bz2', 'zip', 'xz'. date_format: string, default None Format string for datetime objects. decimal: string, default '.' diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 29b8d29af0808..0be2a180fbfa2 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -154,9 +154,9 @@ def save(self): # GH 17778 handles compression for byte strings. if not close and self.compression: f.close() - with open(self.path_or_buf, 'r') as f: + with open(f.name, 'r') as f: data = f.read() - f, handles = _get_handle(self.path_or_buf, self.mode, + f, handles = _get_handle(f.name, self.mode, encoding=encoding, compression=self.compression) f.write(data) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index bb7ee1b911fee..88e469731060d 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -231,13 +231,33 @@ def test_standardize_mapping(): columns=['X', 'Y', 'Z']), Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) @pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv']) -def test_compression_size(obj, method, compression): - if not compression: - pytest.skip("only test compression case.") +def test_compression_size(obj, method, compression_only): with tm.ensure_clean() as filename: - getattr(obj, method)(filename, compression=compression) + getattr(obj, method)(filename, compression=compression_only) compressed = os.path.getsize(filename) getattr(obj, method)(filename, compression=None) uncompressed = os.path.getsize(filename) assert uncompressed > compressed + + +@pytest.mark.parametrize('obj', [ + DataFrame(100 * [[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + columns=['X', 'Y', 'Z']), + Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) +@pytest.mark.parametrize('method', ['to_csv']) +def test_compression_size_fh(obj, method, compression_only): + + with tm.ensure_clean() as filename: + with open(filename, 'w') as fh: + getattr(obj, method)(fh, compression=compression_only) + # GH 17778 + assert fh.closed + compressed = os.path.getsize(filename) + with tm.ensure_clean() as filename: + with open(filename, 'w') as fh: + getattr(obj, method)(fh, compression=None) + assert not fh.closed + uncompressed = os.path.getsize(filename) + assert uncompressed > compressed