Skip to content

ENH: storage_options for to_excel #37818

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2028,9 +2028,9 @@ def _repr_data_resource_(self):
def to_excel(
self,
excel_writer,
sheet_name="Sheet1",
na_rep="",
float_format=None,
sheet_name: str = "Sheet1",
na_rep: str = "",
float_format: Optional[str] = None,
columns=None,
header=True,
index=True,
Expand All @@ -2043,6 +2043,7 @@ def to_excel(
inf_rep="inf",
verbose=True,
freeze_panes=None,
storage_options: StorageOptions = None,
) -> None:
"""
Write {klass} to an Excel sheet.
Expand All @@ -2059,7 +2060,7 @@ def to_excel(

Parameters
----------
excel_writer : str or ExcelWriter object
excel_writer : path-like, file-like, or ExcelWriter object
File path or existing ExcelWriter.
sheet_name : str, default 'Sheet1'
Name of sheet which will contain DataFrame.
Expand Down Expand Up @@ -2100,6 +2101,12 @@ def to_excel(
freeze_panes : tuple of int (length 2), optional
Specifies the one-based bottommost row and rightmost column that
is to be frozen.
storage_options : dict, optional
Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc., if using a URL that will
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".

.. versionadded:: 1.2.0

See Also
--------
Expand Down Expand Up @@ -2174,6 +2181,7 @@ def to_excel(
startcol=startcol,
freeze_panes=freeze_panes,
engine=engine,
storage_options=storage_options,
)

@final
Expand Down
24 changes: 17 additions & 7 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
from io import BufferedIOBase, BytesIO, RawIOBase
import os
from textwrap import fill
from typing import Any, Mapping, Union
from typing import Any, Dict, Mapping, Union, cast

from pandas._config import config

from pandas._libs.parsers import STR_NA_VALUES
from pandas._typing import StorageOptions
from pandas._typing import Buffer, FilePathOrBuffer, StorageOptions
from pandas.errors import EmptyDataError
from pandas.util._decorators import Appender, deprecate_nonkeyword_arguments

Expand Down Expand Up @@ -567,6 +567,12 @@ class ExcelWriter(metaclass=abc.ABCMeta):
File mode to use (write or append). Append does not work with fsspec URLs.

.. versionadded:: 0.24.0
storage_options : dict, optional
Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc., if using a URL that will
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".

.. versionadded:: 1.2.0

Attributes
----------
Expand Down Expand Up @@ -710,11 +716,12 @@ def save(self):

def __init__(
self,
path,
path: Union[FilePathOrBuffer, "ExcelWriter"],
engine=None,
date_format=None,
datetime_format=None,
mode="w",
mode: str = "w",
storage_options: StorageOptions = None,
**engine_kwargs,
):
# validate that this engine can handle the extension
Expand All @@ -729,10 +736,13 @@ def __init__(
# the excel backend first read the existing file and then write any data to it
mode = mode.replace("a", "r+")

self.handles = IOHandles(path, compression={"copression": None})
# cast ExcelWriter to avoid adding 'if self.handles is not None'
self.handles = IOHandles(cast(Buffer, path), compression={"copression": None})
if not isinstance(path, ExcelWriter):
self.handles = get_handle(path, mode, is_text=False)
self.sheets = {}
self.handles = get_handle(
path, mode, storage_options=storage_options, is_text=False
)
self.sheets: Dict[str, Any] = {}
self.cur_sheet = None

if date_format is None:
Expand Down
12 changes: 10 additions & 2 deletions pandas/io/excel/_odswriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Any, DefaultDict, Dict, List, Optional, Tuple, Union

import pandas._libs.json as json
from pandas._typing import StorageOptions

from pandas.io.excel._base import ExcelWriter
from pandas.io.excel._util import validate_freeze_panes
Expand All @@ -14,7 +15,12 @@ class ODSWriter(ExcelWriter):
supported_extensions = (".ods",)

def __init__(
self, path: str, engine: Optional[str] = None, mode: str = "w", **engine_kwargs
self,
path: str,
engine: Optional[str] = None,
mode: str = "w",
storage_options: StorageOptions = None,
**engine_kwargs,
):
from odf.opendocument import OpenDocumentSpreadsheet

Expand All @@ -23,7 +29,9 @@ def __init__(
if mode == "a":
raise ValueError("Append mode is not supported with odf!")

super().__init__(path, mode=mode, **engine_kwargs)
super().__init__(
path, mode=mode, storage_options=storage_options, **engine_kwargs
)

self.book = OpenDocumentSpreadsheet()
self._style_dict: Dict[str, str] = {}
Expand Down
13 changes: 11 additions & 2 deletions pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,20 @@ class OpenpyxlWriter(ExcelWriter):
engine = "openpyxl"
supported_extensions = (".xlsx", ".xlsm")

def __init__(self, path, engine=None, mode="w", **engine_kwargs):
def __init__(
self,
path,
engine=None,
mode: str = "w",
storage_options: StorageOptions = None,
**engine_kwargs,
):
# Use the openpyxl module as the Excel writer.
from openpyxl.workbook import Workbook

super().__init__(path, mode=mode, **engine_kwargs)
super().__init__(
path, mode=mode, storage_options=storage_options, **engine_kwargs
)

# ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from
# the file and later write to it
Expand Down
5 changes: 4 additions & 1 deletion pandas/io/excel/_xlsxwriter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Dict, List, Tuple

import pandas._libs.json as json
from pandas._typing import StorageOptions

from pandas.io.excel._base import ExcelWriter
from pandas.io.excel._util import validate_freeze_panes
Expand Down Expand Up @@ -168,7 +169,8 @@ def __init__(
engine=None,
date_format=None,
datetime_format=None,
mode="w",
mode: str = "w",
storage_options: StorageOptions = None,
**engine_kwargs,
):
# Use the xlsxwriter module as the Excel writer.
Expand All @@ -183,6 +185,7 @@ def __init__(
date_format=date_format,
datetime_format=datetime_format,
mode=mode,
storage_options=storage_options,
**engine_kwargs,
)

Expand Down
15 changes: 13 additions & 2 deletions pandas/io/excel/_xlwt.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import TYPE_CHECKING, Dict

import pandas._libs.json as json
from pandas._typing import StorageOptions

from pandas.io.excel._base import ExcelWriter
from pandas.io.excel._util import validate_freeze_panes
Expand All @@ -13,7 +14,15 @@ class XlwtWriter(ExcelWriter):
engine = "xlwt"
supported_extensions = (".xls",)

def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs):
def __init__(
self,
path,
engine=None,
encoding=None,
mode: str = "w",
storage_options: StorageOptions = None,
**engine_kwargs,
):
# Use the xlwt module as the Excel writer.
import xlwt

Expand All @@ -22,7 +31,9 @@ def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs):
if mode == "a":
raise ValueError("Append mode is not supported with xlwt!")

super().__init__(path, mode=mode, **engine_kwargs)
super().__init__(
path, mode=mode, storage_options=storage_options, **engine_kwargs
)

if encoding is None:
encoding = "ascii"
Expand Down
14 changes: 10 additions & 4 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import numpy as np

from pandas._typing import Label
from pandas._typing import Label, StorageOptions

from pandas.core.dtypes import missing
from pandas.core.dtypes.common import is_float, is_scalar
Expand All @@ -19,7 +19,6 @@
from pandas import DataFrame, Index, MultiIndex, PeriodIndex
import pandas.core.common as com

from pandas.io.common import stringify_path
from pandas.io.formats.css import CSSResolver, CSSWarning
from pandas.io.formats.format import get_level_lengths
from pandas.io.formats.printing import pprint_thing
Expand Down Expand Up @@ -785,9 +784,10 @@ def write(
startcol=0,
freeze_panes=None,
engine=None,
storage_options: StorageOptions = None,
):
"""
writer : string or ExcelWriter object
writer : path-like, file-like, or ExcelWriter object
File path or existing ExcelWriter
sheet_name : string, default 'Sheet1'
Name of sheet which will contain DataFrame
Expand All @@ -802,6 +802,12 @@ def write(
write engine to use if writer is a path - you can also set this
via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``,
and ``io.excel.xlsm.writer``.
storage_options : dict, optional
Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc., if using a URL that will
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".

.. versionadded:: 1.2.0
"""
from pandas.io.excel import ExcelWriter

Expand All @@ -819,7 +825,7 @@ def write(
# abstract class 'ExcelWriter' with abstract attributes 'engine',
# 'save', 'supported_extensions' and 'write_cells' [abstract]
writer = ExcelWriter( # type: ignore[abstract]
stringify_path(writer), engine=engine
writer, engine=engine, storage_options=storage_options
)
need_save = True

Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/io/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,18 @@ def test_csv_options(fsspectest):
assert fsspectest.test[0] == "csv_read"


@pytest.mark.parametrize("extension", ["xlsx", "xls"])
def test_excel_options(fsspectest, extension):
df = DataFrame({"a": [0]})

path = f"testmem://test/test.{extension}"

df.to_excel(path, storage_options={"test": "write"}, index=False)
assert fsspectest.test[0] == "write"
read_excel(path, storage_options={"test": "read"})
assert fsspectest.test[0] == "read"


@td.skip_if_no("fastparquet")
def test_to_parquet_new_file(monkeypatch, cleared_fs):
"""Regression test for writing to a not-yet-existent GCS Parquet file."""
Expand Down