diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 49e992b14293e..3392b64890cb7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2028,9 +2028,9 @@ def _repr_data_resource_(self): def to_excel( self, excel_writer, - sheet_name="Sheet1", - na_rep="", - float_format=None, + sheet_name: str = "Sheet1", + na_rep: str = "", + float_format: Optional[str] = None, columns=None, header=True, index=True, @@ -2043,6 +2043,7 @@ def to_excel( inf_rep="inf", verbose=True, freeze_panes=None, + storage_options: StorageOptions = None, ) -> None: """ Write {klass} to an Excel sheet. @@ -2059,7 +2060,7 @@ def to_excel( Parameters ---------- - excel_writer : str or ExcelWriter object + excel_writer : path-like, file-like, or ExcelWriter object File path or existing ExcelWriter. sheet_name : str, default 'Sheet1' Name of sheet which will contain DataFrame. @@ -2100,6 +2101,12 @@ def to_excel( freeze_panes : tuple of int (length 2), optional Specifies the one-based bottommost row and rightmost column that is to be frozen. + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc., if using a URL that will + be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". + + .. versionadded:: 1.2.0 See Also -------- @@ -2174,6 +2181,7 @@ def to_excel( startcol=startcol, freeze_panes=freeze_panes, engine=engine, + storage_options=storage_options, ) @final diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index c2e9828e3ea42..425b1da33dbb9 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -3,12 +3,12 @@ from io import BufferedIOBase, BytesIO, RawIOBase import os from textwrap import fill -from typing import Any, Mapping, Union +from typing import Any, Dict, Mapping, Union, cast from pandas._config import config from pandas._libs.parsers import STR_NA_VALUES -from pandas._typing import StorageOptions +from pandas._typing import Buffer, FilePathOrBuffer, StorageOptions from pandas.errors import EmptyDataError from pandas.util._decorators import Appender, deprecate_nonkeyword_arguments @@ -567,6 +567,12 @@ class ExcelWriter(metaclass=abc.ABCMeta): File mode to use (write or append). Append does not work with fsspec URLs. .. versionadded:: 0.24.0 + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc., if using a URL that will + be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". + + .. versionadded:: 1.2.0 Attributes ---------- @@ -710,11 +716,12 @@ def save(self): def __init__( self, - path, + path: Union[FilePathOrBuffer, "ExcelWriter"], engine=None, date_format=None, datetime_format=None, - mode="w", + mode: str = "w", + storage_options: StorageOptions = None, **engine_kwargs, ): # validate that this engine can handle the extension @@ -729,10 +736,13 @@ def __init__( # the excel backend first read the existing file and then write any data to it mode = mode.replace("a", "r+") - self.handles = IOHandles(path, compression={"copression": None}) + # cast ExcelWriter to avoid adding 'if self.handles is not None' + self.handles = IOHandles(cast(Buffer, path), compression={"copression": None}) if not isinstance(path, ExcelWriter): - self.handles = get_handle(path, mode, is_text=False) - self.sheets = {} + self.handles = get_handle( + path, mode, storage_options=storage_options, is_text=False + ) + self.sheets: Dict[str, Any] = {} self.cur_sheet = None if date_format is None: diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index c19d51540d2dd..f9a08bf862644 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -3,6 +3,7 @@ from typing import Any, DefaultDict, Dict, List, Optional, Tuple, Union import pandas._libs.json as json +from pandas._typing import StorageOptions from pandas.io.excel._base import ExcelWriter from pandas.io.excel._util import validate_freeze_panes @@ -14,7 +15,12 @@ class ODSWriter(ExcelWriter): supported_extensions = (".ods",) def __init__( - self, path: str, engine: Optional[str] = None, mode: str = "w", **engine_kwargs + self, + path: str, + engine: Optional[str] = None, + mode: str = "w", + storage_options: StorageOptions = None, + **engine_kwargs, ): from odf.opendocument import OpenDocumentSpreadsheet @@ -23,7 +29,9 @@ def __init__( if mode == "a": raise ValueError("Append mode is not supported with odf!") - super().__init__(path, mode=mode, **engine_kwargs) + super().__init__( + path, mode=mode, storage_options=storage_options, **engine_kwargs + ) self.book = OpenDocumentSpreadsheet() self._style_dict: Dict[str, str] = {} diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index f643037dc216a..7de958df206d5 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -16,11 +16,20 @@ class OpenpyxlWriter(ExcelWriter): engine = "openpyxl" supported_extensions = (".xlsx", ".xlsm") - def __init__(self, path, engine=None, mode="w", **engine_kwargs): + def __init__( + self, + path, + engine=None, + mode: str = "w", + storage_options: StorageOptions = None, + **engine_kwargs, + ): # Use the openpyxl module as the Excel writer. from openpyxl.workbook import Workbook - super().__init__(path, mode=mode, **engine_kwargs) + super().__init__( + path, mode=mode, storage_options=storage_options, **engine_kwargs + ) # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from # the file and later write to it diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index 77b631a41371e..d7bbec578d89d 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -1,6 +1,7 @@ from typing import Dict, List, Tuple import pandas._libs.json as json +from pandas._typing import StorageOptions from pandas.io.excel._base import ExcelWriter from pandas.io.excel._util import validate_freeze_panes @@ -168,7 +169,8 @@ def __init__( engine=None, date_format=None, datetime_format=None, - mode="w", + mode: str = "w", + storage_options: StorageOptions = None, **engine_kwargs, ): # Use the xlsxwriter module as the Excel writer. @@ -183,6 +185,7 @@ def __init__( date_format=date_format, datetime_format=datetime_format, mode=mode, + storage_options=storage_options, **engine_kwargs, ) diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py index 7f0ce3844c099..9ede7cd0c2b95 100644 --- a/pandas/io/excel/_xlwt.py +++ b/pandas/io/excel/_xlwt.py @@ -1,6 +1,7 @@ from typing import TYPE_CHECKING, Dict import pandas._libs.json as json +from pandas._typing import StorageOptions from pandas.io.excel._base import ExcelWriter from pandas.io.excel._util import validate_freeze_panes @@ -13,7 +14,15 @@ class XlwtWriter(ExcelWriter): engine = "xlwt" supported_extensions = (".xls",) - def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs): + def __init__( + self, + path, + engine=None, + encoding=None, + mode: str = "w", + storage_options: StorageOptions = None, + **engine_kwargs, + ): # Use the xlwt module as the Excel writer. import xlwt @@ -22,7 +31,9 @@ def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs): if mode == "a": raise ValueError("Append mode is not supported with xlwt!") - super().__init__(path, mode=mode, **engine_kwargs) + super().__init__( + path, mode=mode, storage_options=storage_options, **engine_kwargs + ) if encoding is None: encoding = "ascii" diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 0916494d8ab60..c6179f5c034c7 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -10,7 +10,7 @@ import numpy as np -from pandas._typing import Label +from pandas._typing import Label, StorageOptions from pandas.core.dtypes import missing from pandas.core.dtypes.common import is_float, is_scalar @@ -19,7 +19,6 @@ from pandas import DataFrame, Index, MultiIndex, PeriodIndex import pandas.core.common as com -from pandas.io.common import stringify_path from pandas.io.formats.css import CSSResolver, CSSWarning from pandas.io.formats.format import get_level_lengths from pandas.io.formats.printing import pprint_thing @@ -785,9 +784,10 @@ def write( startcol=0, freeze_panes=None, engine=None, + storage_options: StorageOptions = None, ): """ - writer : string or ExcelWriter object + writer : path-like, file-like, or ExcelWriter object File path or existing ExcelWriter sheet_name : string, default 'Sheet1' Name of sheet which will contain DataFrame @@ -802,6 +802,12 @@ def write( write engine to use if writer is a path - you can also set this via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and ``io.excel.xlsm.writer``. + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc., if using a URL that will + be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". + + .. versionadded:: 1.2.0 """ from pandas.io.excel import ExcelWriter @@ -819,7 +825,7 @@ def write( # abstract class 'ExcelWriter' with abstract attributes 'engine', # 'save', 'supported_extensions' and 'write_cells' [abstract] writer = ExcelWriter( # type: ignore[abstract] - stringify_path(writer), engine=engine + writer, engine=engine, storage_options=storage_options ) need_save = True diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index 312ea5abdfe39..c5767c5080ddd 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -124,6 +124,18 @@ def test_csv_options(fsspectest): assert fsspectest.test[0] == "csv_read" +@pytest.mark.parametrize("extension", ["xlsx", "xls"]) +def test_excel_options(fsspectest, extension): + df = DataFrame({"a": [0]}) + + path = f"testmem://test/test.{extension}" + + df.to_excel(path, storage_options={"test": "write"}, index=False) + assert fsspectest.test[0] == "write" + read_excel(path, storage_options={"test": "read"}) + assert fsspectest.test[0] == "read" + + @td.skip_if_no("fastparquet") def test_to_parquet_new_file(monkeypatch, cleared_fs): """Regression test for writing to a not-yet-existent GCS Parquet file."""