diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 19e8acdaa7384..f17cd354d40a3 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -407,6 +407,7 @@ I/O - Bug in :meth:`read_csv` was causing a segfault when there were blank lines between the header and data rows (:issue:`28071`) - Bug in :meth:`read_csv` was raising a misleading exception on a permissions issue (:issue:`23784`) - Bug in :meth:`read_csv` was raising an ``IndexError`` when header=None and 2 extra data columns +- Bug in :meth:`read_sas` was raising an ``AttributeError`` when reading files from Google Cloud Storage (issue:`33069`) - Bug in :meth:`DataFrame.to_sql` where an ``AttributeError`` was raised when saving an out of bounds date (:issue:`26761`) Plotting diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index e67d68f7e0975..85b7fd497cedd 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -9,7 +9,6 @@ """ from collections import abc from datetime import datetime -from io import BytesIO import struct import warnings @@ -263,13 +262,9 @@ def __init__( if isinstance(filepath_or_buffer, (str, bytes)): self.filepath_or_buffer = open(filepath_or_buffer, "rb") else: - # Copy to BytesIO, and ensure no encoding - contents = filepath_or_buffer.read() - try: - contents = contents.encode(self._encoding) - except UnicodeEncodeError: - pass - self.filepath_or_buffer = BytesIO(contents) + # Since xport files include non-text byte sequences, xport files + # should already be opened in binary mode in Python 3. + self.filepath_or_buffer = filepath_or_buffer self._read_header() diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index ee97f08ef9400..2682bafedb8f1 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -26,6 +26,7 @@ def setup_method(self, datapath): self.dirpath = datapath("io", "sas", "data") self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt") self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt") + self.file02b = open(os.path.join(self.dirpath, "SSHSV1_A.xpt"), "rb") self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt") self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt") @@ -119,6 +120,16 @@ def test2(self): data = read_sas(self.file02) tm.assert_frame_equal(data, data_csv) + def test2_binary(self): + # Test with SSHSV1_A.xpt, read as a binary file + + # Compare to this + data_csv = pd.read_csv(self.file02.replace(".xpt", ".csv")) + numeric_as_float(data_csv) + + data = read_sas(self.file02b, format="xport") + tm.assert_frame_equal(data, data_csv) + def test_multiple_types(self): # Test with DRXFCD_G.xpt (contains text and numeric variables)