-
-
Notifications
You must be signed in to change notification settings - Fork 349
Optimize setitem with chunk equal to fill_value, round 2 #738
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 18 commits
8153810
c84839e
750d696
6ac2349
eb36713
053ad4c
3375bf0
30c3a30
d2fc396
e4e4012
bd27b9a
814d009
769f5a6
cd56b35
bcbaac4
9096f2c
044a9b8
74e0852
b2ec5ad
62a55ab
dbc32fd
cd28aff
160c2dc
b7fe1fe
af715fe
e17993a
7c9a041
59328f0
72488a8
7489ae9
10199b3
88b4811
3c69719
8aa93fa
0dae9da
40c3f14
7dde846
7a45fd2
99f59ef
a6ba3c7
6f8b6c4
054399e
7025d19
bbabe5c
3abcbc3
c3b4455
ea3356c
b81c14a
05716a3
23bfc1e
b921b34
48c38c7
7e4fbad
b063f52
020475b
a81ac83
f8d8415
1c29fe8
710b875
8a06884
7f859c3
0a7a3cc
1a0f41c
94d5d0a
a918f1d
3dd1afd
2165164
4a4adb1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -33,6 +33,7 @@ | |||||
from zarr.util import ( | ||||||
InfoReporter, | ||||||
check_array_shape, | ||||||
flatten, | ||||||
human_readable_size, | ||||||
is_total_slice, | ||||||
nolock, | ||||||
|
@@ -74,6 +75,12 @@ class Array: | |||||
If True and while the chunk_store is a FSStore and the compresion used | ||||||
is Blosc, when getting data from the array chunks will be partially | ||||||
read and decompressed when possible. | ||||||
write_empty_chunks : bool, optional | ||||||
Determines chunk writing behavior for chunks filled with `fill_value` ("empty" chunks). | ||||||
If True (default), all chunks will be written regardless of their contents. | ||||||
If False, empty chunks will not be written, and the `store` entry for | ||||||
the chunk key of an empty chunk will be deleted. Note that setting this option to False | ||||||
will incur additional overhead per chunk write. | ||||||
|
||||||
.. versionadded:: 2.7 | ||||||
|
||||||
|
@@ -138,6 +145,7 @@ def __init__( | |||||
cache_metadata=True, | ||||||
cache_attrs=True, | ||||||
partial_decompress=False, | ||||||
write_empty_chunks=True, | ||||||
): | ||||||
# N.B., expect at this point store is fully initialized with all | ||||||
# configuration metadata fully specified and normalized | ||||||
|
@@ -154,6 +162,7 @@ def __init__( | |||||
self._cache_metadata = cache_metadata | ||||||
self._is_view = False | ||||||
self._partial_decompress = partial_decompress | ||||||
self._write_empty_chunks = write_empty_chunks | ||||||
|
||||||
# initialize metadata | ||||||
self._load_metadata() | ||||||
|
@@ -1586,6 +1595,17 @@ def _set_basic_selection_zd(self, selection, value, fields=None): | |||||
else: | ||||||
chunk[selection] = value | ||||||
|
||||||
# clear chunk if it only contains the fill value | ||||||
if self._chunk_isempty(chunk): | ||||||
try: | ||||||
del self.chunk_store[ckey] | ||||||
return | ||||||
except KeyError: | ||||||
return | ||||||
except Exception: | ||||||
# deleting failed, fallback to overwriting | ||||||
pass | ||||||
|
||||||
# encode and store | ||||||
cdata = self._encode_chunk(chunk) | ||||||
self.chunk_store[ckey] = cdata | ||||||
|
@@ -1859,9 +1879,38 @@ def _chunk_setitems(self, lchunk_coords, lchunk_selection, values, fields=None): | |||||
ckeys = [self._chunk_key(co) for co in lchunk_coords] | ||||||
cdatas = [self._process_for_setitem(key, sel, val, fields=fields) | ||||||
for key, sel, val in zip(ckeys, lchunk_selection, values)] | ||||||
values = {k: v for k, v in zip(ckeys, cdatas)} | ||||||
values = {} | ||||||
if not self._write_empty_chunks: | ||||||
for ckey, cdata in zip(ckeys, cdatas): | ||||||
if self._chunk_isempty(cdata) and not self._chunk_delitem(ckey): | ||||||
values[ckey] = self._encode_chunk(cdata) | ||||||
else: | ||||||
values = dict(zip(ckeys, map(self._encode_chunk, cdatas))) | ||||||
self.chunk_store.setitems(values) | ||||||
|
||||||
def _chunk_isempty(self, chunk): | ||||||
if self.dtype == 'object': | ||||||
# we have to flatten the result of np.equal to handle outputs like | ||||||
# [np.array([True,True]), True, True] | ||||||
is_empty = all(flatten(np.equal(chunk, self.fill_value, dtype='object'))) | ||||||
else: | ||||||
is_empty = np.all(chunk == self._fill_value) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this work properly if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nope! Do you have any advice for making that work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe something like this?
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. " |
||||||
return is_empty | ||||||
|
||||||
def _chunk_delitem(self, ckey): | ||||||
""" | ||||||
Attempt to delete the value associated with ckey. | ||||||
Returns True if deletion succeeds or KeyError is raised. | ||||||
Returns False if any other exception is raised. | ||||||
""" | ||||||
try: | ||||||
del self.chunk_store[ckey] | ||||||
return True | ||||||
except KeyError: | ||||||
return True | ||||||
except Exception: | ||||||
return False | ||||||
|
||||||
def _chunk_setitem(self, chunk_coords, chunk_selection, value, fields=None): | ||||||
"""Replace part or whole of a chunk. | ||||||
|
||||||
|
@@ -1889,10 +1938,17 @@ def _chunk_setitem(self, chunk_coords, chunk_selection, value, fields=None): | |||||
fields=fields) | ||||||
|
||||||
def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=None): | ||||||
do_store = True | ||||||
ckey = self._chunk_key(chunk_coords) | ||||||
cdata = self._process_for_setitem(ckey, chunk_selection, value, fields=fields) | ||||||
|
||||||
# clear chunk if it only contains the fill value | ||||||
if (not self._write_empty_chunks) and self._chunk_isempty(cdata): | ||||||
do_store = not self._chunk_delitem(ckey) | ||||||
|
||||||
# store | ||||||
self.chunk_store[ckey] = cdata | ||||||
if do_store: | ||||||
self.chunk_store[ckey] = self._encode_chunk(cdata) | ||||||
|
||||||
def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): | ||||||
if is_total_slice(chunk_selection, self._chunks) and not fields: | ||||||
|
@@ -1948,8 +2004,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): | |||||
else: | ||||||
chunk[chunk_selection] = value | ||||||
|
||||||
# encode chunk | ||||||
return self._encode_chunk(chunk) | ||||||
return chunk | ||||||
|
||||||
def _chunk_key(self, chunk_coords): | ||||||
return self._key_prefix + '.'.join(map(str, chunk_coords)) | ||||||
|
@@ -2169,7 +2224,8 @@ def hexdigest(self, hashname="sha1"): | |||||
|
||||||
def __getstate__(self): | ||||||
return (self._store, self._path, self._read_only, self._chunk_store, | ||||||
self._synchronizer, self._cache_metadata, self._attrs.cache) | ||||||
self._synchronizer, self._cache_metadata, self._attrs.cache, | ||||||
self._partial_decompress, self._write_empty_chunks) | ||||||
|
||||||
def __setstate__(self, state): | ||||||
self.__init__(*state) | ||||||
|
Uh oh!
There was an error while loading. Please reload this page.