From 3213876f32a47f1ca3fc2a5b6fd058d4e282ce69 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 6 Dec 2022 12:47:46 +0100 Subject: [PATCH 1/3] setitem: use ensure_bytes --- zarr/storage.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 4acf637330..2437334886 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1378,13 +1378,16 @@ def __getitem__(self, key): def setitems(self, values): if self.mode == 'r': raise ReadOnlyError() - values = {self._normalize_key(key): val for key, val in values.items()} + + # Normalize keys and make sure the values are bytes + values = {self._normalize_key(key): ensure_bytes(val) for key, val in values.items()} self.map.setitems(values) def __setitem__(self, key, value): if self.mode == 'r': raise ReadOnlyError() key = self._normalize_key(key) + value = ensure_bytes(value) path = self.dir_path(key) try: if self.fs.isdir(path): From 48111819d3e9e6f9a1495db17674414dfbc0e0cb Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 6 Dec 2022 13:57:05 +0100 Subject: [PATCH 2/3] test --- zarr/tests/test_core.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index e32026e662..eb00fddb4d 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -17,6 +17,7 @@ from numpy.testing import assert_array_almost_equal, assert_array_equal from pkg_resources import parse_version +import zarr from zarr._storage.store import ( v3_api_available, ) @@ -3377,3 +3378,27 @@ def test_array_mismatched_store_versions(): Array(store_v3, path='dataset', read_only=False, chunk_store=chunk_store_v2) with pytest.raises(ValueError): Array(store_v2, path='dataset', read_only=False, chunk_store=chunk_store_v3) + + +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +def test_issue_1279(tmpdir): + """See """ + + data = np.arange(25).reshape((5, 5)) + ds = zarr.create( + shape=data.shape, + chunks=(5, 5), + dtype=data.dtype, + compressor=(None), + store=FSStore(url=str(tmpdir), mode="a"), + order="F", + ) + + ds[:] = data + + ds_reopened = zarr.open_array( + store=FSStore(url=str(tmpdir), mode="r") + ) + + written_data = ds_reopened[:] + assert_array_equal(data, written_data) From 5caca83426579ce9726c324bd908bd5dc4121d86 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Mon, 19 Dec 2022 13:42:01 +0100 Subject: [PATCH 3/3] impl. and use ensure_contiguous_ndarray_or_bytes() --- zarr/storage.py | 11 +++++++---- zarr/util.py | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 2437334886..2f54e5e4a8 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -55,8 +55,8 @@ from zarr.util import (buffer_size, json_loads, nolock, normalize_chunks, normalize_dimension_separator, normalize_dtype, normalize_fill_value, normalize_order, - normalize_shape, normalize_storage_path, retry_call - ) + normalize_shape, normalize_storage_path, retry_call, + ensure_contiguous_ndarray_or_bytes) from zarr._storage.absstore import ABSStore # noqa: F401 from zarr._storage.store import (_get_hierarchy_metadata, # noqa: F401 @@ -1380,14 +1380,17 @@ def setitems(self, values): raise ReadOnlyError() # Normalize keys and make sure the values are bytes - values = {self._normalize_key(key): ensure_bytes(val) for key, val in values.items()} + values = { + self._normalize_key(key): ensure_contiguous_ndarray_or_bytes(val) + for key, val in values.items() + } self.map.setitems(values) def __setitem__(self, key, value): if self.mode == 'r': raise ReadOnlyError() key = self._normalize_key(key) - value = ensure_bytes(value) + value = ensure_contiguous_ndarray_or_bytes(value) path = self.dir_path(key) try: if self.fs.isdir(path): diff --git a/zarr/util.py b/zarr/util.py index 9fcdac9df7..dfbb551651 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -5,17 +5,22 @@ from textwrap import TextWrapper import mmap import time +from typing import Any, Callable, Dict, Optional, Tuple, Union import numpy as np from asciitree import BoxStyle, LeftAligned from asciitree.traversal import Traversal from collections.abc import Iterable -from numcodecs.compat import ensure_text, ensure_ndarray_like +from numcodecs.compat import ( + ensure_text, + ensure_ndarray_like, + ensure_bytes, + ensure_contiguous_ndarray_like +) +from numcodecs.ndarray_like import NDArrayLike from numcodecs.registry import codec_registry from numcodecs.blosc import cbuffer_sizes, cbuffer_metainfo -from typing import Any, Callable, Dict, Optional, Tuple, Union - def flatten(arg: Iterable) -> Iterable: for element in arg: @@ -696,3 +701,28 @@ def all_equal(value: Any, array: Any): # using == raises warnings from numpy deprecated pattern, but # using np.equal() raises type errors for structured dtypes... return np.all(value == array) + + +def ensure_contiguous_ndarray_or_bytes(buf) -> Union[NDArrayLike, bytes]: + """Convenience function to coerce `buf` to ndarray-like array or bytes. + + First check if `buf` can be zero-copy converted to a contiguous array. + If not, `buf` will be copied to a newly allocated `bytes` object. + + Parameters + ---------- + buf : ndarray-like, array-like, or bytes-like + A numpy array like object such as numpy.ndarray, cupy.ndarray, or + any object exporting a buffer interface. + + Returns + ------- + arr : NDArrayLike or bytes + A ndarray-like or bytes object + """ + + try: + return ensure_contiguous_ndarray_like(buf) + except TypeError: + # An error is raised if `buf` couldn't be zero-copy converted + return ensure_bytes(buf)