Skip to content

Add support in the "zarr" backend for reading NCZarr data #6420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 34 commits into from
Apr 14, 2022
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
dbf76a9
add support for NCZarr
malmans2 Mar 25, 2022
07a334f
restore original format
malmans2 Mar 25, 2022
d1e9120
add test_nczarr
malmans2 Mar 25, 2022
ee69a5c
better comment
malmans2 Mar 28, 2022
fd60da8
test reading with zarr
malmans2 Mar 28, 2022
e35d793
decode zarray
malmans2 Mar 28, 2022
eac9b3b
use public store and test nczarr only
malmans2 Mar 28, 2022
8af176c
restore tests
malmans2 Mar 28, 2022
ac32ac8
install netcdf-c fixing bug
malmans2 Mar 30, 2022
44ef220
add env
malmans2 Mar 30, 2022
b72e1d4
fix ci
malmans2 Mar 30, 2022
fd84283
try build netcdf-c on windows
malmans2 Mar 30, 2022
3355eb7
fix typo
malmans2 Mar 30, 2022
9af4401
install netcdf-c first
malmans2 Mar 30, 2022
12ef991
install netcdf-c dep with conda
malmans2 Mar 30, 2022
71eca46
fix ci
malmans2 Mar 30, 2022
d3e9182
try win env again
malmans2 Mar 30, 2022
316153b
fix Nan in tests
malmans2 Mar 30, 2022
978f753
edit zarray
malmans2 Mar 30, 2022
5be903b
loop over all variables
malmans2 Mar 30, 2022
f520e7f
edit Nan in zattrs and zarray
malmans2 Mar 30, 2022
b5609a1
check path exists
malmans2 Mar 30, 2022
bded882
Merge branch 'main' into nczarr
malmans2 Mar 30, 2022
3a22ac8
must use netcdf-c>=4.8.1
malmans2 Mar 30, 2022
7f19413
skip 4.8.1 and Windows
malmans2 Mar 30, 2022
b5704bd
revisions
malmans2 Apr 7, 2022
8ee5d19
Merge branch 'main' into nczarr
malmans2 Apr 7, 2022
2c12935
better testing
malmans2 Apr 7, 2022
286d72c
revisions
malmans2 Apr 9, 2022
c5bde72
Merge branch 'main' into nczarr
malmans2 Apr 9, 2022
b823675
add what's new
malmans2 Apr 9, 2022
b6cfad3
update docs
malmans2 Apr 10, 2022
eb92cde
[skip ci] Mention netCDF and GDAL in user-guide
malmans2 Apr 11, 2022
470210a
[skip ci] reword
malmans2 Apr 11, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 27 additions & 6 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,19 +178,40 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
raise AssertionError("We should never get here. Function logic must be wrong.")


def _get_nczarr_dims(zarr_obj):
# NCZarr defines dimensions through metadata in .zarray
zarray_path = os.path.join(zarr_obj.path, ".zarray")
zarray = zarr.util.json_loads(zarr_obj.store[zarray_path])
# NCZarr uses Fully Qualified Names
dimensions = [os.path.basename(dim) for dim in zarray["_NCZARR_ARRAY"]["dimrefs"]]
return dimensions


def _hide_nczarr_attrs(attrs):
return HiddenKeyDict(attrs, [attr for attr in attrs if attr.startswith("_NC")])


def _get_zarr_dims_and_attrs(zarr_obj, dimension_key):
# Zarr arrays do not have dimensions. To get around this problem, we add
# an attribute that specifies the dimension. We have to hide this attribute
# when we send the attributes to the user.
# zarr_obj can be either a zarr group or zarr array
attributes = _hide_nczarr_attrs(zarr_obj.attrs)
try:
# Xarray-Zarr
dimensions = zarr_obj.attrs[dimension_key]
except KeyError:
raise KeyError(
f"Zarr object is missing the attribute `{dimension_key}`, which is "
"required for xarray to determine variable dimensions."
)
attributes = HiddenKeyDict(zarr_obj.attrs, [dimension_key])
try:
# NCZarr
dimensions = _get_nczarr_dims(zarr_obj)
attributes = dict(attributes)
attributes[dimension_key] = dimensions
except KeyError:
raise KeyError(
f"Zarr object is missing the attribute `{dimension_key}`, which is "
"required for xarray to determine variable dimensions."
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

attributes = HiddenKeyDict(attributes, [dimension_key])
return dimensions, attributes


Expand Down Expand Up @@ -430,7 +451,7 @@ def get_variables(self):
)

def get_attrs(self):
return dict(self.zarr_group.attrs.asdict())
return dict(_hide_nczarr_attrs(self.zarr_group.attrs.asdict()))

def get_dimensions(self):
dimensions = {}
Expand Down
24 changes: 24 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import math
import os.path
import pickle
import platform
import re
import shutil
import sys
Expand Down Expand Up @@ -5427,3 +5428,26 @@ def test_write_file_from_np_str(str_type, tmpdir) -> None:
txr = tdf.to_xarray()

txr.to_netcdf(tmpdir.join("test.nc"))


@requires_zarr
@requires_netCDF4
def test_nczarr():
netcdfc_version = Version(nc4.getlibversion().split()[0])
if netcdfc_version < Version("4.8.1"):
pytest.skip("requires netcdf-c>=4.8.1")
if (platform.system() == "Windows") and (netcdfc_version == Version("4.8.1")):
# Bug in netcdf-c==4.8.1 (typo: Nan instead of NaN)
# https://github.com/Unidata/netcdf-c/issues/2265
pytest.skip("netcdf-c==4.8.1 has issues on Windows")

expected = create_test_data()
# Drop dim3: netcdf-c does not support dtype='<U1'
# https://github.com/Unidata/netcdf-c/issues/2259
expected = expected.drop_vars("dim3")
with create_tmp_file(suffix=".zarr") as tmp:
# netcdf-c>4.8.1 will add _ARRAY_DIMENSIONS by default
mode = "nczarr" if netcdfc_version == Version("4.8.1") else "nczarr,noxarray"
expected.to_netcdf(f"file://{tmp}#mode={mode}")
actual = xr.open_zarr(tmp, consolidated=False)
assert_identical(expected, actual)