From b99e1dbc936d869620045d9190aab132b2da939f Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Mon, 10 Dec 2018 14:15:44 -0500 Subject: [PATCH 1/5] Fix `getsize` to be recursive Make sure the `DirectoryStore`'s `getsize` method checks each directory recursively when computing the storage size of a directory. --- zarr/storage.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index e7d70ea7bc..129dca297c 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -843,11 +843,10 @@ def getsize(self, path=None): if os.path.isfile(fs_path): return os.path.getsize(fs_path) elif os.path.isdir(fs_path): - children = os.listdir(fs_path) size = 0 - for child in children: - child_fs_path = os.path.join(fs_path, child) - if os.path.isfile(child_fs_path): + for dirname, dirs, children in os.walk(fs_path): + for child in children: + child_fs_path = os.path.join(dirname, child) size += os.path.getsize(child_fs_path) return size else: From 4bad8a4e14b2b3fc1e4050f104205257345e92de Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Mon, 10 Dec 2018 14:15:46 -0500 Subject: [PATCH 2/5] Drop copy-pasted comment in `test_nbytes_stored` --- zarr/tests/test_core.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index cbad222edb..9d9dfd7a74 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1278,8 +1278,6 @@ def create_array(read_only=False, **kwargs): cache_attrs=cache_attrs) def test_nbytes_stored(self): - - # dict as store z = self.create_array(shape=1000, chunks=100) expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) assert expect_nbytes_stored == z.nbytes_stored From 8e3547987be2f0cc398427f90111702d5c31b8f3 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Mon, 10 Dec 2018 14:15:47 -0500 Subject: [PATCH 3/5] Test a 2-D `DirectoryStore` case --- zarr/tests/test_core.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 9d9dfd7a74..7fc7bbbc39 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1285,6 +1285,13 @@ def test_nbytes_stored(self): expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) assert expect_nbytes_stored == z.nbytes_stored + z2 = self.create_array(shape=(1000, 1100), chunks=100) + expect_nbytes_stored = sum(buffer_size(v) for v in z2.store.values()) + assert expect_nbytes_stored == z2.nbytes_stored + z2[:] = 42 + expect_nbytes_stored = sum(buffer_size(v) for v in z2.store.values()) + assert expect_nbytes_stored == z2.nbytes_stored + class TestArrayWithNestedDirectoryStore(TestArrayWithDirectoryStore): From 8d5403832dcae8f5aac02d29ffe3fe4d1d2d50cc Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Mon, 10 Dec 2018 14:17:49 -0500 Subject: [PATCH 4/5] Note `getsize` fix for `DirectoryStore`, etc --- docs/release.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index e07a82cef7..12a5f1177e 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -38,6 +38,9 @@ Bug fixes * Ensure ``DictStore`` contains only ``bytes`` to facilitate comparisons and protect against writes. By :user:`John Kirkham `, :issue:`350` +* Fix size computation of ``DirectoryStore`` and subclasses to be recursive. + By :user:`John Kirkham `, :issue:`253`, :issue:`362` + Maintenance ~~~~~~~~~~~ From 0aec9c5361f645899274a67734d142ec773ad796 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Mon, 10 Dec 2018 15:01:57 -0500 Subject: [PATCH 5/5] Correct `getsize` test values to be recursive As users expect `getsize` to be recursive, update our tests to ensure `getsize`'s results are checked against what the recursive result would be. --- zarr/tests/test_storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 0416470dd7..e72bbfb912 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -211,10 +211,10 @@ def test_hierarchy(self): # test getsize (optional) if hasattr(store, 'getsize'): - assert 6 == store.getsize() + assert 15 == store.getsize() assert 3 == store.getsize('a') assert 3 == store.getsize('b') - assert 3 == store.getsize('c') + assert 9 == store.getsize('c') assert 3 == store.getsize('c/d') assert 6 == store.getsize('c/e') assert 3 == store.getsize('c/e/f')