From 3beeee3e70ec0c830059b8f8921aaa084a787862 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 22 Feb 2017 14:29:01 -0500 Subject: [PATCH 1/3] Add visitor pattern methods to Group. Provides `visit` and `visititem` that behave identically to their h5py counterparts. This should make it easier to traverse the full hierarchy of a Zarr group. Also should make it easier for users with h5py code to try out using Zarr with fewer changes. --- docs/api/hierarchy.rst | 2 + zarr/hierarchy.py | 94 ++++++++++++++++++++++++++++++++++++ zarr/tests/test_hierarchy.py | 69 ++++++++++++++++++++++++++ 3 files changed, 165 insertions(+) diff --git a/docs/api/hierarchy.rst b/docs/api/hierarchy.rst index 73db5bbc34..245a8f3d8e 100644 --- a/docs/api/hierarchy.rst +++ b/docs/api/hierarchy.rst @@ -15,6 +15,8 @@ Groups (``zarr.hierarchy``) .. automethod:: groups .. automethod:: array_keys .. automethod:: arrays + .. automethod:: visit + .. automethod:: visititems .. automethod:: create_group .. automethod:: require_group .. automethod:: create_groups diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 9c92f4b122..f07dfdaab2 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division from collections import MutableMapping +from itertools import islice import numpy as np @@ -55,6 +56,8 @@ class Group(MutableMapping): groups array_keys arrays + visit + visititems create_group require_group create_groups @@ -414,6 +417,97 @@ def arrays(self): chunk_store=self._chunk_store, synchronizer=self._synchronizer) + def visit(self, func): + """Run ``func`` on each object's path. + + Note: If ``func`` returns ``None`` (or doesn't return), + iteration continues. However, if ``func`` returns + anything else, it ceases and returns that value. + + Examples + -------- + >>> import zarr + >>> g1 = zarr.group() + >>> g2 = g1.create_group('foo') + >>> g3 = g1.create_group('bar') + >>> g4 = g3.create_group('baz') + >>> g5 = g3.create_group('quux') + >>> def print_visitor(name): + ... print(name) + >>> g1.visit(print_visitor) + bar + bar/baz + bar/quux + foo + >>> g3.visit(print_visitor) + baz + quux + + """ + + def _visit(obj): + yield obj + + keys = sorted(getattr(obj, "keys", lambda : [])()) + for each_key in keys: + for each_obj in _visit(obj[each_key]): + yield each_obj + + base_len = len(self.name) + for each_obj in islice(_visit(self), 1, None): + value = func(each_obj.name[base_len:].lstrip("/")) + if value is not None: + return value + + def visititems(self, func): + """Run ``func`` on each object's path and the object itself. + + Note: If ``func`` returns ``None`` (or doesn't return), + iteration continues. However, if ``func`` returns + anything else, it ceases and returns that value. + + Examples + -------- + >>> import zarr + >>> g1 = zarr.group() + >>> g2 = g1.create_group('foo') + >>> g3 = g1.create_group('bar') + >>> g4 = g3.create_group('baz') + >>> g5 = g3.create_group('quux') + >>> def print_visitor(name, obj): + ... print((name, obj)) + >>> g1.visititems(print_visitor) + ('bar', Group(/bar, 2) + groups: 2; baz, quux + store: DictStore) + ('bar/baz', Group(/bar/baz, 0) + store: DictStore) + ('bar/quux', Group(/bar/quux, 0) + store: DictStore) + ('foo', Group(/foo, 0) + store: DictStore) + >>> g3.visititems(print_visitor) + ('baz', Group(/bar/baz, 0) + store: DictStore) + ('quux', Group(/bar/quux, 0) + store: DictStore) + + """ + + def _visit(obj): + yield obj + + keys = sorted(getattr(obj, "keys", lambda : [])()) + for each_key in keys: + for each_obj in _visit(obj[each_key]): + yield each_obj + + base_len = len(self.name) + for each_obj in islice(_visit(self), 1, None): + value = func(each_obj.name[base_len:].lstrip("/"), each_obj) + if value is not None: + return value + def _write_op(self, f, *args, **kwargs): # guard condition diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index a13fb29e05..5d90b67f80 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -473,6 +473,75 @@ def test_getitem_contains_iterators(self): eq('baz', arrays[0][0]) eq(g1['foo']['baz'], arrays[0][1]) + # visitor collection tests + items = [] + + def visitor2(name, obj=None): + items.append(name) + + def visitor3(name, obj): + items.append((name, obj)) + + del items[:] + g1.visit(visitor2) + eq([ + "a", + "a/b", + "a/b/c", + "foo", + "foo/bar", + "foo/baz", + ], items) + + del items[:] + g1["foo"].visit(visitor2) + eq([ + "bar", + "baz", + ], items) + + del items[:] + g1.visititems(visitor2) + eq([ + "a", + "a/b", + "a/b/c", + "foo", + "foo/bar", + "foo/baz", + ], items) + + del items[:] + g1["foo"].visititems(visitor2) + eq([ + "bar", + "baz", + ], items) + + del items[:] + g1.visititems(visitor3) + for n, o in items: + eq(g1[n], o) + + del items[:] + g1["foo"].visititems(visitor3) + for n, o in items: + eq(g1["foo"][n], o) + + # visitor filter tests + def visitor0(name, obj=None): + if name == "a/b/c/d": + return True # pragma: no cover + + def visitor1(name, obj=None): + if name == "a/b/c": + return True + + eq(None, g1.visit(visitor0)) + eq(None, g1.visititems(visitor0)) + eq(True, g1.visit(visitor1)) + eq(True, g1.visititems(visitor1)) + def test_empty_getitem_contains_iterators(self): # setup g = self.create_group() From ab37a6dad066a66f64e63da8e4deaa623018d445 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Wed, 22 Feb 2017 22:19:47 -0500 Subject: [PATCH 2/3] Refactor out `visitvalues`. This method seems to be at the core of `visit` and `visititems`. So it makes sense to refactor it out. Though it also provides usable functionality of its own. So it makes sense to expose it as part of the API too. --- docs/api/hierarchy.rst | 1 + zarr/hierarchy.py | 75 ++++++++++++++++++++++++------------ zarr/tests/test_hierarchy.py | 49 +++++++++++++++++------ 3 files changed, 90 insertions(+), 35 deletions(-) diff --git a/docs/api/hierarchy.rst b/docs/api/hierarchy.rst index 245a8f3d8e..12a78d6d10 100644 --- a/docs/api/hierarchy.rst +++ b/docs/api/hierarchy.rst @@ -16,6 +16,7 @@ Groups (``zarr.hierarchy``) .. automethod:: array_keys .. automethod:: arrays .. automethod:: visit + .. automethod:: visitvalues .. automethod:: visititems .. automethod:: create_group .. automethod:: require_group diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index f07dfdaab2..acd92a3302 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -57,6 +57,7 @@ class Group(MutableMapping): array_keys arrays visit + visitvalues visititems create_group require_group @@ -417,6 +418,54 @@ def arrays(self): chunk_store=self._chunk_store, synchronizer=self._synchronizer) + def visitvalues(self, func): + """Run ``func`` on each object. + + Note: If ``func`` returns ``None`` (or doesn't return), + iteration continues. However, if ``func`` returns + anything else, it ceases and returns that value. + + Examples + -------- + >>> import zarr + >>> g1 = zarr.group() + >>> g2 = g1.create_group('foo') + >>> g3 = g1.create_group('bar') + >>> g4 = g3.create_group('baz') + >>> g5 = g3.create_group('quux') + >>> def print_visitor(obj): + ... print(obj) + >>> g1.visitvalues(print_visitor) + Group(/bar, 2) + groups: 2; baz, quux + store: DictStore + Group(/bar/baz, 0) + store: DictStore + Group(/bar/quux, 0) + store: DictStore + Group(/foo, 0) + store: DictStore + >>> g3.visitvalues(print_visitor) + Group(/bar/baz, 0) + store: DictStore + Group(/bar/quux, 0) + store: DictStore + + """ + + def _visit(obj): + yield obj + + keys = sorted(getattr(obj, "keys", lambda : [])()) + for each_key in keys: + for each_obj in _visit(obj[each_key]): + yield each_obj + + for each_obj in islice(_visit(self), 1, None): + value = func(each_obj) + if value is not None: + return value + def visit(self, func): """Run ``func`` on each object's path. @@ -445,19 +494,8 @@ def visit(self, func): """ - def _visit(obj): - yield obj - - keys = sorted(getattr(obj, "keys", lambda : [])()) - for each_key in keys: - for each_obj in _visit(obj[each_key]): - yield each_obj - base_len = len(self.name) - for each_obj in islice(_visit(self), 1, None): - value = func(each_obj.name[base_len:].lstrip("/")) - if value is not None: - return value + return self.visitvalues(lambda o: func(o.name[base_len:].lstrip("/"))) def visititems(self, func): """Run ``func`` on each object's path and the object itself. @@ -494,19 +532,8 @@ def visititems(self, func): """ - def _visit(obj): - yield obj - - keys = sorted(getattr(obj, "keys", lambda : [])()) - for each_key in keys: - for each_obj in _visit(obj[each_key]): - yield each_obj - base_len = len(self.name) - for each_obj in islice(_visit(self), 1, None): - value = func(each_obj.name[base_len:].lstrip("/"), each_obj) - if value is not None: - return value + return self.visitvalues(lambda o: func(o.name[base_len:].lstrip("/"), o)) def _write_op(self, f, *args, **kwargs): diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 5d90b67f80..2dbde57958 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -476,14 +476,35 @@ def test_getitem_contains_iterators(self): # visitor collection tests items = [] - def visitor2(name, obj=None): + def visitor2(obj): + items.append(obj.path) + + def visitor3(name, obj=None): items.append(name) - def visitor3(name, obj): + def visitor4(name, obj): items.append((name, obj)) del items[:] - g1.visit(visitor2) + g1.visitvalues(visitor2) + eq([ + "a", + "a/b", + "a/b/c", + "foo", + "foo/bar", + "foo/baz", + ], items) + + del items[:] + g1["foo"].visitvalues(visitor2) + eq([ + "foo/bar", + "foo/baz", + ], items) + + del items[:] + g1.visit(visitor3) eq([ "a", "a/b", @@ -494,14 +515,14 @@ def visitor3(name, obj): ], items) del items[:] - g1["foo"].visit(visitor2) + g1["foo"].visit(visitor3) eq([ "bar", "baz", ], items) del items[:] - g1.visititems(visitor2) + g1.visititems(visitor3) eq([ "a", "a/b", @@ -512,34 +533,40 @@ def visitor3(name, obj): ], items) del items[:] - g1["foo"].visititems(visitor2) + g1["foo"].visititems(visitor3) eq([ "bar", "baz", ], items) del items[:] - g1.visititems(visitor3) + g1.visititems(visitor4) for n, o in items: eq(g1[n], o) del items[:] - g1["foo"].visititems(visitor3) + g1["foo"].visititems(visitor4) for n, o in items: eq(g1["foo"][n], o) # visitor filter tests - def visitor0(name, obj=None): + def visitor0(val, *args): + name = getattr(val, "path", val) + if name == "a/b/c/d": return True # pragma: no cover - def visitor1(name, obj=None): + def visitor1(val, *args): + name = getattr(val, "path", val) + if name == "a/b/c": - return True + return True # pragma: no cover eq(None, g1.visit(visitor0)) + eq(None, g1.visitvalues(visitor0)) eq(None, g1.visititems(visitor0)) eq(True, g1.visit(visitor1)) + eq(True, g1.visitvalues(visitor1)) eq(True, g1.visititems(visitor1)) def test_empty_getitem_contains_iterators(self): From 143954af50d17d318f504e6b40a361c34d2857e3 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 23 Feb 2017 12:15:09 -0500 Subject: [PATCH 3/3] Add `visitkeys` as an alias to `visit`. --- docs/api/hierarchy.rst | 1 + zarr/hierarchy.py | 7 +++++++ zarr/tests/test_hierarchy.py | 20 ++++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/docs/api/hierarchy.rst b/docs/api/hierarchy.rst index 12a78d6d10..799657c8d0 100644 --- a/docs/api/hierarchy.rst +++ b/docs/api/hierarchy.rst @@ -16,6 +16,7 @@ Groups (``zarr.hierarchy``) .. automethod:: array_keys .. automethod:: arrays .. automethod:: visit + .. automethod:: visitkeys .. automethod:: visitvalues .. automethod:: visititems .. automethod:: create_group diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index acd92a3302..fbf853bcf8 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -57,6 +57,7 @@ class Group(MutableMapping): array_keys arrays visit + visitkeys visitvalues visititems create_group @@ -497,6 +498,12 @@ def visit(self, func): base_len = len(self.name) return self.visitvalues(lambda o: func(o.name[base_len:].lstrip("/"))) + def visitkeys(self, func): + """An alias for :py:meth:`~Group.visit`. + """ + + return self.visit(func) + def visititems(self, func): """Run ``func`` on each object's path and the object itself. diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 2dbde57958..ee3a7e2eae 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -521,6 +521,24 @@ def visitor4(name, obj): "baz", ], items) + del items[:] + g1.visitkeys(visitor3) + eq([ + "a", + "a/b", + "a/b/c", + "foo", + "foo/bar", + "foo/baz", + ], items) + + del items[:] + g1["foo"].visitkeys(visitor3) + eq([ + "bar", + "baz", + ], items) + del items[:] g1.visititems(visitor3) eq([ @@ -563,9 +581,11 @@ def visitor1(val, *args): return True # pragma: no cover eq(None, g1.visit(visitor0)) + eq(None, g1.visitkeys(visitor0)) eq(None, g1.visitvalues(visitor0)) eq(None, g1.visititems(visitor0)) eq(True, g1.visit(visitor1)) + eq(True, g1.visitkeys(visitor1)) eq(True, g1.visitvalues(visitor1)) eq(True, g1.visititems(visitor1))