diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4f79a37eb4b..4059b5a1ae3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,6 +28,9 @@ New Features By `Justus Magin `_. - Added ``**kwargs`` argument to :py:meth:`open_rasterio` to access overviews (:issue:`3269`). By `Pushkar Kopparla `_. +- Added ``storage_options`` argument to :py:meth:`to_zarr` (:issue:`5601`). + By `Ray Bell `_, `Zachary Blackwood `_ and + `Nathan Lis `_. Breaking changes diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 9b4fa8fce5a..2c9b25f860f 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1319,6 +1319,7 @@ def to_zarr( append_dim: Hashable = None, region: Mapping[str, slice] = None, safe_chunks: bool = True, + storage_options: Dict[str, str] = None, ): """This function creates an appropriate datastore for writing a dataset to a zarr ztore @@ -1330,6 +1331,22 @@ def to_zarr( store = _normalize_path(store) chunk_store = _normalize_path(chunk_store) + if storage_options is None: + mapper = store + chunk_mapper = chunk_store + else: + from fsspec import get_mapper + + if not isinstance(store, str): + raise ValueError( + f"store must be a string to use storage_options. Got {type(store)}" + ) + mapper = get_mapper(store, **storage_options) + if chunk_store is not None: + chunk_mapper = get_mapper(chunk_store, **storage_options) + else: + chunk_mapper = chunk_store + if encoding is None: encoding = {} @@ -1372,13 +1389,13 @@ def to_zarr( already_consolidated = False consolidate_on_close = consolidated or consolidated is None zstore = backends.ZarrStore.open_group( - store=store, + store=mapper, mode=mode, synchronizer=synchronizer, group=group, consolidated=already_consolidated, consolidate_on_close=consolidate_on_close, - chunk_store=chunk_store, + chunk_store=chunk_mapper, append_dim=append_dim, write_region=region, safe_chunks=safe_chunks, diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index aec12d2b154..12499103fb9 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -713,6 +713,9 @@ def open_zarr( falling back to read non-consolidated metadata if that fails. chunk_store : MutableMapping, optional A separate Zarr store only for chunk data. + storage_options : dict, optional + Any additional parameters for the storage backend (ignored for local + paths). decode_timedelta : bool, optional If True, decode variables and coordinates with time units in {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 90c395ed39b..a5eaa82cfdd 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1922,6 +1922,7 @@ def to_zarr( append_dim: Hashable = None, region: Mapping[str, slice] = None, safe_chunks: bool = True, + storage_options: Dict[str, str] = None, ) -> "ZarrStore": """Write dataset contents to a zarr group. @@ -1941,10 +1942,10 @@ def to_zarr( Parameters ---------- store : MutableMapping, str or Path, optional - Store or path to directory in file system. + Store or path to directory in local or remote file system. chunk_store : MutableMapping, str or Path, optional - Store or path to directory in file system only for Zarr array chunks. - Requires zarr-python v2.4.0 or later. + Store or path to directory in local or remote file system only for Zarr + array chunks. Requires zarr-python v2.4.0 or later. mode : {"w", "w-", "a", "r+", None}, optional Persistence mode: "w" means create (overwrite if exists); "w-" means create (fail if exists); @@ -1999,6 +2000,9 @@ def to_zarr( if Zarr arrays are written in parallel. This option may be useful in combination with ``compute=False`` to initialize a Zarr from an existing Dataset with aribtrary chunk structure. + storage_options : dict, optional + Any additional parameters for the storage backend (ignored for local + paths). References ---------- @@ -2031,6 +2035,7 @@ def to_zarr( self, store=store, chunk_store=chunk_store, + storage_options=storage_options, mode=mode, synchronizer=synchronizer, group=group, diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index d757fb451cc..f610941914b 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -77,6 +77,7 @@ def LooseVersion(vstring): has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis") has_rasterio, requires_rasterio = _importorskip("rasterio") has_zarr, requires_zarr = _importorskip("zarr") +has_zarr_2_5_0, requires_zarr_2_5_0 = _importorskip("zarr", minversion="2.5.0") has_fsspec, requires_fsspec = _importorskip("fsspec") has_iris, requires_iris = _importorskip("iris") has_cfgrib, requires_cfgrib = _importorskip("cfgrib") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3bbc2c93b31..3ca20cade56 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -71,6 +71,7 @@ requires_scipy, requires_scipy_or_netCDF4, requires_zarr, + requires_zarr_2_5_0, ) from .test_coding_times import ( _ALL_CALENDARS, @@ -2388,6 +2389,17 @@ def create_zarr_target(self): yield tmp +@requires_fsspec +@requires_zarr_2_5_0 +def test_zarr_storage_options(): + pytest.importorskip("aiobotocore") + ds = create_test_data() + store_target = "memory://test.zarr" + ds.to_zarr(store_target, storage_options={"test": "zarr_write"}) + ds_a = xr.open_zarr(store_target, storage_options={"test": "zarr_read"}) + assert_identical(ds, ds_a) + + @requires_scipy class TestScipyInMemoryData(CFEncodedBase, NetCDF3Only): engine = "scipy"