diff --git a/docs/release.rst b/docs/release.rst index 5b1ede2e41..7f013d2aff 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -55,6 +55,13 @@ Upcoming Release * Add documentation build to CI. By :user:`James Bourbeau `; :issue:`516`. +Bug fixes +~~~~~~~~~ + +* Fix '/' prepend bug in ``ABSStore``. + By :user:`Shikhar Goenka `; :issue:`525`. + + .. _release_2.3.2: 2.3.2 diff --git a/zarr/storage.py b/zarr/storage.py index 5a33940ad5..ef687c44fd 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1964,7 +1964,7 @@ class ABSStore(MutableMapping): In order to use this store, you must install the Microsoft Azure Storage SDK for Python. """ - def __init__(self, container, prefix, account_name=None, account_key=None, + def __init__(self, container, prefix='', account_name=None, account_key=None, blob_service_kwargs=None): from azure.storage.blob import BlockBlobService self.container = container @@ -1990,21 +1990,25 @@ def __setstate__(self, state): self.client = BlockBlobService(self.account_name, self.account_key, **self.blob_service_kwargs) - @staticmethod - def _append_path_to_prefix(path, prefix): - return '/'.join([normalize_storage_path(prefix), - normalize_storage_path(path)]) + def _append_path_to_prefix(self, path): + if self.prefix == '': + return normalize_storage_path(path) + else: + return '/'.join([self.prefix, normalize_storage_path(path)]) @staticmethod def _strip_prefix_from_path(path, prefix): # normalized things will not have any leading or trailing slashes path_norm = normalize_storage_path(path) prefix_norm = normalize_storage_path(prefix) - return path_norm[(len(prefix_norm)+1):] + if prefix: + return path_norm[(len(prefix_norm)+1):] + else: + return path_norm def __getitem__(self, key): from azure.common import AzureMissingResourceHttpError - blob_name = '/'.join([self.prefix, key]) + blob_name = self._append_path_to_prefix(key) try: blob = self.client.get_blob_to_bytes(self.container, blob_name) return blob.content @@ -2013,13 +2017,13 @@ def __getitem__(self, key): def __setitem__(self, key, value): value = ensure_bytes(value) - blob_name = '/'.join([self.prefix, key]) + blob_name = self._append_path_to_prefix(key) self.client.create_blob_from_bytes(self.container, blob_name, value) def __delitem__(self, key): from azure.common import AzureMissingResourceHttpError try: - self.client.delete_blob(self.container, '/'.join([self.prefix, key])) + self.client.delete_blob(self.container, self._append_path_to_prefix(key)) except AzureMissingResourceHttpError: raise KeyError('Blob %s not found' % key) @@ -2034,53 +2038,62 @@ def keys(self): return list(self.__iter__()) def __iter__(self): - for blob in self.client.list_blobs(self.container, self.prefix + '/'): + if self.prefix: + list_blobs_prefix = self.prefix + '/' + else: + list_blobs_prefix = None + for blob in self.client.list_blobs(self.container, list_blobs_prefix): yield self._strip_prefix_from_path(blob.name, self.prefix) def __len__(self): return len(self.keys()) def __contains__(self, key): - blob_name = '/'.join([self.prefix, key]) + blob_name = self._append_path_to_prefix(key) if self.client.exists(self.container, blob_name): return True else: return False def listdir(self, path=None): - store_path = normalize_storage_path(path) - # prefix is normalized to not have a trailing slash - dir_path = self.prefix - if store_path: - dir_path = dir_path + '/' + store_path - dir_path += '/' + from azure.storage.blob import Blob + dir_path = normalize_storage_path(self._append_path_to_prefix(path)) + if dir_path: + dir_path += '/' items = list() for blob in self.client.list_blobs(self.container, prefix=dir_path, delimiter='/'): - if '/' in blob.name[len(dir_path):]: + if type(blob) == Blob: + items.append(self._strip_prefix_from_path(blob.name, dir_path)) + else: items.append(self._strip_prefix_from_path( blob.name[:blob.name.find('/', len(dir_path))], dir_path)) - else: - items.append(self._strip_prefix_from_path(blob.name, dir_path)) return items def rmdir(self, path=None): - dir_path = normalize_storage_path(self._append_path_to_prefix(path, self.prefix)) + '/' + dir_path = normalize_storage_path(self._append_path_to_prefix(path)) + if dir_path: + dir_path += '/' for blob in self.client.list_blobs(self.container, prefix=dir_path): self.client.delete_blob(self.container, blob.name) def getsize(self, path=None): + from azure.storage.blob import Blob store_path = normalize_storage_path(path) fs_path = self.prefix if store_path: - fs_path = self._append_path_to_prefix(store_path, self.prefix) + fs_path = self._append_path_to_prefix(store_path) if self.client.exists(self.container, fs_path): return self.client.get_blob_properties(self.container, fs_path).properties.content_length else: size = 0 - for blob in self.client.list_blobs(self.container, prefix=fs_path + '/', + if fs_path == '': + fs_path = None + else: + fs_path += '/' + for blob in self.client.list_blobs(self.container, prefix=fs_path, delimiter='/'): - if '/' not in blob.name[len(fs_path + '/'):]: + if type(blob) == Blob: size += blob.properties.content_length return size diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 6fc8ee4c88..33c8835868 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1404,8 +1404,8 @@ def absstore(): blob_client = asb.BlockBlobService(is_emulated=True) blob_client.delete_container('test') blob_client.create_container('test') - store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', - account_key='bar', blob_service_kwargs={'is_emulated': True}) + store = ABSStore(container='test', account_name='foo', account_key='bar', + blob_service_kwargs={'is_emulated': True}) store.rmdir() return store diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index db12dc3a94..9464948dd6 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -894,8 +894,8 @@ def create_store(): blob_client = asb.BlockBlobService(is_emulated=True) blob_client.delete_container('test') blob_client.create_container('test') - store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', - account_key='bar', blob_service_kwargs={'is_emulated': True}) + store = ABSStore(container='test', account_name='foo', account_key='bar', + blob_service_kwargs={'is_emulated': True}) store.rmdir() return store, None diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 5488eeab52..a9b60e7781 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1473,16 +1473,41 @@ def test_format_compatibility(): @skip_test_env_var("ZARR_TEST_ABS") class TestABSStore(StoreTests, unittest.TestCase): - def create_store(self): + def create_store(self, prefix=None): asb = pytest.importorskip("azure.storage.blob") blob_client = asb.BlockBlobService(is_emulated=True) blob_client.delete_container('test') blob_client.create_container('test') - store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', + store = ABSStore(container='test', prefix=prefix, account_name='foo', account_key='bar', blob_service_kwargs={'is_emulated': True}) store.rmdir() return store + def test_iterators_with_prefix(self): + for prefix in ['test_prefix', '/test_prefix', 'test_prefix/', 'test/prefix', '', None]: + store = self.create_store(prefix=prefix) + + # test iterator methods on empty store + assert 0 == len(store) + assert set() == set(store) + assert set() == set(store.keys()) + assert set() == set(store.values()) + assert set() == set(store.items()) + + # setup some values + store['a'] = b'aaa' + store['b'] = b'bbb' + store['c/d'] = b'ddd' + store['c/e/f'] = b'fff' + + # test iterators on store with data + assert 4 == len(store) + assert {'a', 'b', 'c/d', 'c/e/f'} == set(store) + assert {'a', 'b', 'c/d', 'c/e/f'} == set(store.keys()) + assert {b'aaa', b'bbb', b'ddd', b'fff'} == set(store.values()) + assert ({('a', b'aaa'), ('b', b'bbb'), ('c/d', b'ddd'), ('c/e/f', b'fff')} == + set(store.items())) + class TestConsolidatedMetadataStore(unittest.TestCase):