Skip to content

Commit 76d4a67

Browse files
ulijhdcherian
authored andcommitted
Allow invalid_netcdf=True in to_netcdf() (pydata#3221)
* to_netcdf: invalid_netcdf kwarg seems working OK * backends: Added test for invalid_netcdf kwarg * dataset: add docstring for invalid_netcdf kwarg * Formatting: Applying Black. * h5netcdf: More explicit kwarg and exception * to_netcdf: Better test for kwarg invalid_netcdf * test_complex: More clear arg names * test invalid_netcdf=True raises with wrong engines * Doc and What's new: invalid_netcdf kwarg * Making Black happy. * docs: Add h5netcdf to environment.
1 parent 52a16a6 commit 76d4a67

File tree

7 files changed

+72
-7
lines changed

7 files changed

+72
-7
lines changed

doc/environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ dependencies:
1111
- dask=1.1.0
1212
- ipython=7.2.0
1313
- netCDF4=1.4.2
14+
- h5netcdf=0.7.4
1415
- cartopy=0.17.0
1516
- rasterio=1.0.24
1617
- zarr=2.2.0

doc/io.rst

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,31 @@ supported by netCDF4-python: 'standard', 'gregorian', 'proleptic_gregorian' 'nol
366366
By default, xarray uses the 'proleptic_gregorian' calendar and units of the smallest time
367367
difference between values, with a reference time of the first time value.
368368

369+
Invalid netCDF files
370+
~~~~~~~~~~~~~~~~~~~~
371+
372+
The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't
373+
allowed in netCDF4 (see
374+
`h5netcdf documentation <https://github.com/shoyer/h5netcdf#invalid-netcdf-files)>`_.
375+
This feature is availabe through :py:func:`DataArray.to_netcdf` and
376+
:py:func:`Dataset.to_netcdf` when used with ``engine="h5netcdf"``
377+
and currently raises a warning unless ``invalid_netcdf=True`` is set:
378+
379+
.. ipython:: python
380+
381+
# Writing complex valued data
382+
da = xr.DataArray([1.+1.j, 2.+2.j, 3.+3.j])
383+
da.to_netcdf("complex.nc", engine="h5netcdf", invalid_netcdf=True)
384+
385+
# Reading it back
386+
xr.open_dataarray("complex.nc", engine="h5netcdf")
387+
388+
389+
.. warning::
390+
391+
Note that this produces a file that is likely to be not readable by other netCDF
392+
libraries!
393+
369394
.. _io.iris:
370395

371396
Iris

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ Enhancements
6464
- In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if
6565
``append_dim`` is set, as it will automatically be set to ``'a'`` internally.
6666
By `David Brochart <https://github.com/davidbrochart>`_.
67+
- :py:func:`~xarray.Dataset.to_netcdf()` now supports the ``invalid_netcdf`` kwarg when used
68+
with ``engine="h5netcdf"``. It is passed to :py:func:`h5netcdf.File`.
69+
By `Ulrich Herter <https://github.com/ulijh>`_.
6770

6871
- :py:meth:`~xarray.Dataset.drop` now supports keyword arguments; dropping index
6972
labels by specifying both ``dim`` and ``labels`` is deprecated (:issue:`2910`).

xarray/backends/api.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -982,6 +982,7 @@ def to_netcdf(
982982
unlimited_dims: Iterable[Hashable] = None,
983983
compute: bool = True,
984984
multifile: bool = False,
985+
invalid_netcdf: bool = False,
985986
) -> Union[Tuple[ArrayWriter, AbstractDataStore], bytes, "Delayed", None]:
986987
"""This function creates an appropriate datastore for writing a dataset to
987988
disk as a netCDF file
@@ -1043,6 +1044,13 @@ def to_netcdf(
10431044

10441045
target = path_or_file if path_or_file is not None else BytesIO()
10451046
kwargs = dict(autoclose=True) if autoclose else {}
1047+
if invalid_netcdf:
1048+
if engine == "h5netcdf":
1049+
kwargs["invalid_netcdf"] = invalid_netcdf
1050+
else:
1051+
raise ValueError(
1052+
"unrecognized option 'invalid_netcdf' for engine %s" % engine
1053+
)
10461054
store = store_open(target, mode, format, group, **kwargs)
10471055

10481056
if unlimited_dims is None:

xarray/backends/h5netcdf_.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,25 @@ class H5NetCDFStore(WritableCFDataStore):
7171
"""
7272

7373
def __init__(
74-
self, filename, mode="r", format=None, group=None, lock=None, autoclose=False
74+
self,
75+
filename,
76+
mode="r",
77+
format=None,
78+
group=None,
79+
lock=None,
80+
autoclose=False,
81+
invalid_netcdf=None,
7582
):
7683
import h5netcdf
7784

7885
if format not in [None, "NETCDF4"]:
7986
raise ValueError("invalid format for h5netcdf backend")
8087

81-
self._manager = CachingFileManager(h5netcdf.File, filename, mode=mode)
88+
kwargs = {"invalid_netcdf": invalid_netcdf}
89+
90+
self._manager = CachingFileManager(
91+
h5netcdf.File, filename, mode=mode, kwargs=kwargs
92+
)
8293

8394
if lock is None:
8495
if mode == "r":

xarray/core/dataset.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,6 +1443,7 @@ def to_netcdf(
14431443
encoding: Mapping = None,
14441444
unlimited_dims: Iterable[Hashable] = None,
14451445
compute: bool = True,
1446+
invalid_netcdf: bool = False,
14461447
) -> Union[bytes, "Delayed", None]:
14471448
"""Write dataset contents to a netCDF file.
14481449
@@ -1506,6 +1507,10 @@ def to_netcdf(
15061507
compute: boolean
15071508
If true compute immediately, otherwise return a
15081509
``dask.delayed.Delayed`` object that can be computed later.
1510+
invalid_netcdf: boolean
1511+
Only valid along with engine='h5netcdf'. If True, allow writing
1512+
hdf5 files which are valid netcdf as described in
1513+
https://github.com/shoyer/h5netcdf. Default: False.
15091514
"""
15101515
if encoding is None:
15111516
encoding = {}
@@ -1521,6 +1526,7 @@ def to_netcdf(
15211526
encoding=encoding,
15221527
unlimited_dims=unlimited_dims,
15231528
compute=compute,
1529+
invalid_netcdf=invalid_netcdf,
15241530
)
15251531

15261532
def to_zarr(

xarray/tests/test_backends.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2172,13 +2172,17 @@ def create_store(self):
21722172
yield backends.H5NetCDFStore(tmp_file, "w")
21732173

21742174
@pytest.mark.filterwarnings("ignore:complex dtypes are supported by h5py")
2175-
def test_complex(self):
2175+
@pytest.mark.parametrize(
2176+
"invalid_netcdf, warns, num_warns",
2177+
[(None, FutureWarning, 1), (False, FutureWarning, 1), (True, None, 0)],
2178+
)
2179+
def test_complex(self, invalid_netcdf, warns, num_warns):
21762180
expected = Dataset({"x": ("y", np.ones(5) + 1j * np.ones(5))})
2177-
with pytest.warns(FutureWarning):
2178-
# TODO: make it possible to write invalid netCDF files from xarray
2179-
# without a warning
2180-
with self.roundtrip(expected) as actual:
2181+
save_kwargs = {"invalid_netcdf": invalid_netcdf}
2182+
with pytest.warns(warns) as record:
2183+
with self.roundtrip(expected, save_kwargs=save_kwargs) as actual:
21812184
assert_equal(expected, actual)
2185+
assert len(record) == num_warns
21822186

21832187
def test_cross_engine_read_write_netcdf4(self):
21842188
# Drop dim3, because its labels include strings. These appear to be
@@ -4398,3 +4402,10 @@ def test_use_cftime_false_nonstandard_calendar(calendar, units_year):
43984402
original.to_netcdf(tmp_file)
43994403
with pytest.raises((OutOfBoundsDatetime, ValueError)):
44004404
open_dataset(tmp_file, use_cftime=False)
4405+
4406+
4407+
@pytest.mark.parametrize("engine", ["netcdf4", "scipy"])
4408+
def test_invalid_netcdf_raises(engine):
4409+
data = create_test_data()
4410+
with raises_regex(ValueError, "unrecognized option 'invalid_netcdf'"):
4411+
data.to_netcdf("foo.nc", engine=engine, invalid_netcdf=True)

0 commit comments

Comments
 (0)