-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG/REG: file-handle object handled incorrectly in to_csv #21478
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
805739e
1185281
5e66ee2
d11dcae
d3dac82
9567900
6accf04
d5d976e
05bbd81
6f5bcf1
c754329
23317af
ab37993
5392bcc
f34ab4e
08f8972
689f011
c472a5b
cf3afac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,11 +5,13 @@ | |
|
||
from __future__ import print_function | ||
|
||
import warnings | ||
|
||
import csv as csvlib | ||
from zipfile import ZipFile | ||
import numpy as np | ||
|
||
from pandas.core.dtypes.missing import notna | ||
from pandas.core.dtypes.inference import is_file_like | ||
from pandas.core.index import Index, MultiIndex | ||
from pandas import compat | ||
from pandas.compat import (StringIO, range, zip) | ||
|
@@ -128,19 +130,31 @@ def save(self): | |
else: | ||
encoding = self.encoding | ||
|
||
# PR 21300 uses string buffer to receive csv writing and dump into | ||
# file-like output with compression as option. GH 21241, 21118 | ||
f = StringIO() | ||
if not is_file_like(self.path_or_buf): | ||
# path_or_buf is path | ||
path_or_buf = self.path_or_buf | ||
elif hasattr(self.path_or_buf, 'name'): | ||
# path_or_buf is file handle | ||
path_or_buf = self.path_or_buf.name | ||
else: | ||
# path_or_buf is file-like IO objects. | ||
# GH 21227 internal compression is not used when file-like passed. | ||
if self.compression and hasattr(self.path_or_buf, 'write'): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can uou add a comment here (sure the warning says it all, but helpful nonetheless), also an issue reference |
||
msg = ("compression has no effect when passing file-like " | ||
"object as input.") | ||
warnings.warn(msg, RuntimeWarning, stacklevel=2) | ||
|
||
# when zip compression is called. | ||
is_zip = isinstance(self.path_or_buf, ZipFile) or ( | ||
not hasattr(self.path_or_buf, 'write') | ||
and self.compression == 'zip') | ||
|
||
if is_zip: | ||
# zipfile doesn't support writing string to archive. uses string | ||
# buffer to receive csv writing and dump into zip compression | ||
# file handle. GH 21241, 21118 | ||
f = StringIO() | ||
close = False | ||
elif hasattr(self.path_or_buf, 'write'): | ||
f = self.path_or_buf | ||
path_or_buf = None | ||
close = False | ||
else: | ||
f, handles = _get_handle(self.path_or_buf, self.mode, | ||
encoding=encoding, | ||
compression=self.compression) | ||
close = True | ||
|
||
try: | ||
writer_kwargs = dict(lineterminator=self.line_terminator, | ||
|
@@ -157,13 +171,19 @@ def save(self): | |
self._save() | ||
|
||
finally: | ||
# GH 17778 handles zip compression for byte strings separately. | ||
buf = f.getvalue() | ||
if path_or_buf: | ||
f, handles = _get_handle(path_or_buf, self.mode, | ||
encoding=encoding, | ||
compression=self.compression) | ||
f.write(buf) | ||
if is_zip: | ||
# GH 17778 handles zip compression separately. | ||
buf = f.getvalue() | ||
try: | ||
self.path_or_buf.write(buf) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They say that it's sometimes easier to ask for forgiveness than it is for permission, hence the Where you ask for permission instead of forgiveness. In addition, the logic seems somewhat similar. Can we potentially deduplicate it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see your point. changed to if else to make it consistent. |
||
except AttributeError: | ||
# when path_or_buf is not file-like, create handle. | ||
f, handles = _get_handle(self.path_or_buf, self.mode, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a comment on when the except happens? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
encoding=encoding, | ||
compression=self.compression) | ||
f.write(buf) | ||
close = True | ||
if close: | ||
f.close() | ||
for _fh in handles: | ||
_fh.close() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ | |
from pandas.compat import range, lmap | ||
import pandas.core.common as com | ||
from pandas.core import ops | ||
from pandas.io.common import _get_handle | ||
import pandas.util.testing as tm | ||
|
||
|
||
|
@@ -246,19 +247,32 @@ def test_compression_size(obj, method, compression_only): | |
[12.32112, 123123.2, 321321.2]], | ||
columns=['X', 'Y', 'Z']), | ||
Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) | ||
@pytest.mark.parametrize('method', ['to_csv']) | ||
@pytest.mark.parametrize('method', ['to_csv', 'to_json']) | ||
def test_compression_size_fh(obj, method, compression_only): | ||
|
||
with tm.ensure_clean() as filename: | ||
with open(filename, 'w') as fh: | ||
getattr(obj, method)(fh, compression=compression_only) | ||
assert not fh.closed | ||
assert fh.closed | ||
f, _handles = _get_handle(filename, 'w', compression=compression_only) | ||
with f: | ||
getattr(obj, method)(f) | ||
assert not f.closed | ||
compressed = os.path.getsize(filename) | ||
with tm.ensure_clean() as filename: | ||
with open(filename, 'w') as fh: | ||
getattr(obj, method)(fh, compression=None) | ||
assert not fh.closed | ||
assert fh.closed | ||
f, _handles = _get_handle(filename, 'w', compression=None) | ||
with f: | ||
getattr(obj, method)(f) | ||
assert not f.closed | ||
uncompressed = os.path.getsize(filename) | ||
assert uncompressed > compressed | ||
|
||
|
||
# GH 21227 | ||
def test_compression_warning(compression_only): | ||
df = DataFrame(100 * [[0.123456, 0.234567, 0.567567], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add the issue number There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added. |
||
[12.32112, 123123.2, 321321.2]], | ||
columns=['X', 'Y', 'Z']) | ||
with tm.ensure_clean() as filename: | ||
f, _handles = _get_handle(filename, 'w', compression=compression_only) | ||
with tm.assert_produces_warning(RuntimeWarning, | ||
check_stacklevel=False): | ||
with f: | ||
df.to_csv(f, compression=compression_only) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: "Regression" --> "regression"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.