mllam · observingClouds · Jan 21, 2025 · Jan 14, 2025 · Jan 14, 2025 · Jan 16, 2025
diff --git a/.github/workflows/python-package-pip.yml b/.github/workflows/python-package-pip.yml
@@ -10,18 +10,20 @@ on:
 
 jobs:
   test:
-    name: Test pip install python ${{ matrix.python-version }} on ${{ matrix.os }}
+    name: Test pip install python ${{ matrix.python-version }} on ${{ matrix.os }} with zarr ${{ matrix.zarr-version }}
 
     runs-on: ${{ matrix.os }}
     strategy:
+      fail-fast: false
       matrix:
         os: [ubuntu-latest]
         python-version: ["3.8", "3.9", "3.10"]
+        zarr-version: [">=2,<3", ">2,<=3"]
     steps:
       - uses: actions/checkout@v2
       - name: Install package with pip
         run: |
-          python -m pip install .
+          python -m pip install . "zarr${{ matrix.zarr-version }}"
           python -m pip install pytest
 
       - name: Run tests

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - add github PR template to guide development process on github [\#44](https://github.com/mllam/mllam-data-prep/pull/44), @leifdenby
+- add support for zarr 3.0.0 and above [\#51](https://github.com/mllam/mllam-data-prep/pull/51), @kashif
 
 ## [v0.5.0](https://github.com/mllam/mllam-data-prep/releases/tag/v0.5.0)
 

diff --git a/mllam_data_prep/__main__.py b/mllam_data_prep/__main__.py
@@ -64,7 +64,7 @@
         )
 
         logger.info(
-            f"Setting up dask.distributed.LocalCluster with {n_local_cores} cores and {memory_per_worker/1024/1024:0.0f} MB of memory per worker"
+            f"Setting up dask.distributed.LocalCluster with {n_local_cores} cores and {memory_per_worker / 1024 / 1024:0.0f} MB of memory per worker"
         )
 
         cluster = LocalCluster(

diff --git a/mllam_data_prep/create_dataset.py b/mllam_data_prep/create_dataset.py
@@ -5,8 +5,9 @@
 
 import numpy as np
 import xarray as xr
+import zarr
 from loguru import logger
-from numcodecs import Blosc
+from packaging.version import Version
 
 from . import __version__
 from .config import Config, InvalidConfigException
@@ -15,6 +16,11 @@
 from .ops.selection import select_by_kwargs
 from .ops.statistics import calc_stats
 
+if Version(zarr.__version__) >= Version("3"):
+    from zarr.codecs import BloscCodec, BloscShuffle
+else:
+    from numcodecs import Blosc
+
 # the `extra` field in the config that was added between v0.2.0 and v0.5.0 is
 # optional, so we can support both v0.2.0 and v0.5.0
 SUPPORTED_CONFIG_VERSIONS = ["v0.2.0", "v0.5.0"]
@@ -271,8 +277,12 @@ def create_dataset_zarr(fp_config, fp_zarr: str = None):
 
     # use zstd compression since it has a good balance of speed and compression ratio
     # https://engineering.fb.com/2016/08/31/core-infra/smaller-and-faster-data-compression-with-zstandard/
-    compressor = Blosc(cname="zstd", clevel=1, shuffle=Blosc.BITSHUFFLE)
-    encoding = {v: {"compressor": compressor} for v in ds.data_vars}
+    if Version(zarr.__version__) >= Version("3"):
+        compressor = BloscCodec(cname="zstd", clevel=3, shuffle=BloscShuffle.bitshuffle)
+        encoding = {v: {"compressors": compressor} for v in ds.data_vars}
+    else:
+        compressor = Blosc(cname="zstd", clevel=1, shuffle=Blosc.BITSHUFFLE)
+        encoding = {v: {"compressor": compressor} for v in ds.data_vars}
 
     ds.to_zarr(fp_zarr, consolidated=True, mode="w", encoding=encoding)
     logger.info(f"Wrote training-ready dataset to {fp_zarr}")

diff --git a/pyproject.toml b/pyproject.toml
@@ -8,6 +8,7 @@ authors = [
     {name = "Emy Alerskans", email = "ea@dmi.dk"},
     {name = "Eleni Briola", email = "elb@dmi.dk"},
     {name = "Joel Oskarsson", email = "joel.oskarsson@liu.se"},
+    {name = "Kashif Rasul", email = "kashif.rasul@gmail.com"},
 ]
 dependencies = [
     "xarray>=2024.2.0",
@@ -22,6 +23,7 @@ dependencies = [
     "rich>=13.7.1",
     "dask>=2024.2.1",
     "psutil>=5.7.2",
+    "packaging>=23.1",
 ]
 requires-python = ">=3.9"
 readme = "README.md"