Skip to content

Update ABSStore for compatibility with newer azure.storage.blob. #759

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Jun 1, 2021
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ Bug fixes

* FSStore: default to normalize_keys=False
By :user:`Josh Moore <joshmoore>`; :issue:`755`.
* ABSStore: compatibility with ``azure.storage.python>=12``
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for adding this!

By :user:`Tom Augspurger <tomaugspurger>`; :issue:`618`


.. _release_2.8.2:

Expand Down
4 changes: 3 additions & 1 deletion docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,9 @@ The class is :class:`zarr.storage.ABSStore` (requires
`azure-storage-blob <https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python>`_
to be installed)::

>>> store = zarr.ABSStore(container='test', prefix='zarr-testing', blob_service_kwargs={'is_emulated': True}) # doctest: +SKIP
>>> import azure.storage.blob
>>> container_client = azure.storage.blob.ContainerClient(...) # doctest: +SKIP
>>> store = zarr.ABSStore(client=container_client, prefix='zarr-testing') # doctest: +SKIP
>>> root = zarr.group(store=store, overwrite=True) # doctest: +SKIP
>>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') # doctest: +SKIP
>>> z[:] = 42 # doctest: +SKIP
Expand Down
2 changes: 1 addition & 1 deletion requirements_dev_optional.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ipytree==0.2.1
# optional library requirements for services
# don't let pyup change pinning for azure-storage-blob, need to pin to older
# version to get compatibility with azure storage emulator on appveyor (FIXME)
azure-storage-blob==2.0.1 # pyup: ignore
azure-storage-blob==12.5.0 # pyup: ignore
redis==3.5.3
pymongo==3.11.4
# optional test requirements
Expand Down
152 changes: 87 additions & 65 deletions zarr/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2211,50 +2211,84 @@ class ABSStore(MutableMapping):
----------
container : string
The name of the ABS container to use.
.. deprecated::
Use ``client`` instead.
prefix : string
Location of the "directory" to use as the root of the storage hierarchy
within the container.
account_name : string
The Azure blob storage account name.
.. deprecated:: 2.8.3
Use ``client`` instead.
account_key : string
The Azure blob storage account access key.
.. deprecated:: 2.8.3
Use ``client`` instead.
blob_service_kwargs : dictionary
Extra arguments to be passed into the azure blob client, for e.g. when
using the emulator, pass in blob_service_kwargs={'is_emulated': True}.
.. deprecated:: 2.8.3
Use ``client`` instead.
dimension_separator : {'.', '/'}, optional
Separator placed between the dimensions of a chunk.
client : azure.storage.blob.ContainerClient, optional
And ``azure.storage.blob.ContainerClient`` to connect with. See
`here <https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.containerclient?view=azure-python>`_ # noqa
for more.

.. versionadded:: 2.8.3

Notes
-----
In order to use this store, you must install the Microsoft Azure Storage SDK for Python.
In order to use this store, you must install the Microsoft Azure Storage SDK for Python,
``azure-storage-blob>=12.5.0``.
"""

def __init__(self, container, prefix='', account_name=None, account_key=None,
blob_service_kwargs=None, dimension_separator=None):
from azure.storage.blob import BlockBlobService
self.container = container
self.prefix = normalize_storage_path(prefix)
self.account_name = account_name
self.account_key = account_key
def __init__(self, container=None, prefix='', account_name=None, account_key=None,
blob_service_kwargs=None, dimension_separator=None,
client=None,
):
self._dimension_separator = dimension_separator
if blob_service_kwargs is not None:
self.blob_service_kwargs = blob_service_kwargs
else: # pragma: no cover
self.blob_service_kwargs = dict()
self.client = BlockBlobService(self.account_name, self.account_key,
**self.blob_service_kwargs)

# needed for pickling
def __getstate__(self):
state = self.__dict__.copy()
del state['client']
return state
self.prefix = normalize_storage_path(prefix)
if client is None:
# deprecated option, try to construct the client for them
msg = (
"Providing 'container', 'account_name', 'account_key', and 'blob_service_kwargs'"
"is deprecated. Provide and instance of 'azure.storage.blob.ContainerClient' "
"'client' instead."
)
warnings.warn(msg, FutureWarning, stacklevel=2)
from azure.storage.blob import ContainerClient
blob_service_kwargs = blob_service_kwargs or {}
client = ContainerClient(
"https://{}.blob.core.windows.net/".format(account_name), container,
credential=account_key, **blob_service_kwargs
)

def __setstate__(self, state):
from azure.storage.blob import BlockBlobService
self.__dict__.update(state)
self.client = BlockBlobService(self.account_name, self.account_key,
**self.blob_service_kwargs)
self.client = client
self._container = container
self._account_name = account_name
self._account_key = account_key

def _warn_deprecated(self, property_):
msg = ("The {} property is deprecated and will be removed in a future
"version. Get the property from 'ABSStore.client' instead.")
warnings.warn(msg.format(property_), FutureWarning, stacklevel=3)

@property
def container(self):
return self._warn_deprecated("container")
return self._container

@property
def account_name(self):
self._warn_deprecated("account_name")
return self._account_name

@property
def account_key(self):
self._warn_deprecated("account_key")
return self._account_name

def _append_path_to_prefix(self, path):
if self.prefix == '':
Expand All @@ -2273,30 +2307,29 @@ def _strip_prefix_from_path(path, prefix):
return path_norm

def __getitem__(self, key):
from azure.common import AzureMissingResourceHttpError
from azure.core.exceptions import ResourceNotFoundError
blob_name = self._append_path_to_prefix(key)
try:
blob = self.client.get_blob_to_bytes(self.container, blob_name)
return blob.content
except AzureMissingResourceHttpError:
return self.client.download_blob(blob_name).readall()
except ResourceNotFoundError:
raise KeyError('Blob %s not found' % blob_name)

def __setitem__(self, key, value):
value = ensure_bytes(value)
blob_name = self._append_path_to_prefix(key)
self.client.create_blob_from_bytes(self.container, blob_name, value)
self.client.upload_blob(blob_name, value, overwrite=True)

def __delitem__(self, key):
from azure.common import AzureMissingResourceHttpError
from azure.core.exceptions import ResourceNotFoundError
try:
self.client.delete_blob(self.container, self._append_path_to_prefix(key))
except AzureMissingResourceHttpError:
self.client.delete_blob(self._append_path_to_prefix(key))
except ResourceNotFoundError:
raise KeyError('Blob %s not found' % key)

def __eq__(self, other):
return (
isinstance(other, ABSStore) and
self.container == other.container and
self.client == other.client and
self.prefix == other.prefix
)

Expand All @@ -2308,63 +2341,52 @@ def __iter__(self):
list_blobs_prefix = self.prefix + '/'
else:
list_blobs_prefix = None
for blob in self.client.list_blobs(self.container, list_blobs_prefix):
for blob in self.client.list_blobs(list_blobs_prefix):
yield self._strip_prefix_from_path(blob.name, self.prefix)

def __len__(self):
return len(self.keys())

def __contains__(self, key):
blob_name = self._append_path_to_prefix(key)
assert len(blob_name) >= 1
if self.client.exists(self.container, blob_name):
return True
else:
return False
return self.client.get_blob_client(blob_name).exists()

def listdir(self, path=None):
from azure.storage.blob import Blob
dir_path = normalize_storage_path(self._append_path_to_prefix(path))
if dir_path:
dir_path += '/'
items = list()
for blob in self.client.list_blobs(self.container, prefix=dir_path, delimiter='/'):
if type(blob) == Blob:
items.append(self._strip_prefix_from_path(blob.name, dir_path))
else:
items.append(self._strip_prefix_from_path(
blob.name[:blob.name.find('/', len(dir_path))], dir_path))
return items
items = set()
for blob in self.client.walk_blobs(name_starts_with=dir_path, delimiter='/'):
items.add(self._strip_prefix_from_path(blob.name, dir_path))
return list(items)

def rmdir(self, path=None):
dir_path = normalize_storage_path(self._append_path_to_prefix(path))
if dir_path:
dir_path += '/'
for blob in self.client.list_blobs(self.container, prefix=dir_path):
assert len(blob.name) >= 1
self.client.delete_blob(self.container, blob.name)
for blob in self.client.list_blobs(name_starts_with=dir_path):
self.client.delete_blob(blob)

def getsize(self, path=None):
from azure.storage.blob import Blob
store_path = normalize_storage_path(path)
fs_path = self.prefix
if store_path:
fs_path = self._append_path_to_prefix(store_path)
fs_path = self._append_path_to_prefix(store_path)
if fs_path:
blob_client = self.client.get_blob_client(fs_path)
else:
blob_client = None

if fs_path != "" and self.client.exists(self.container, fs_path):
return self.client.get_blob_properties(
self.container, fs_path
).properties.content_length
if blob_client and blob_client.exists():
return blob_client.get_blob_properties().size
else:
size = 0
if fs_path == '':
fs_path = None
else:
elif not fs_path.endswith('/'):
fs_path += '/'
for blob in self.client.list_blobs(self.container, prefix=fs_path,
delimiter='/'):
if type(blob) == Blob:
size += blob.properties.content_length
for blob in self.client.walk_blobs(name_starts_with=fs_path, delimiter='/'):
blob_client = self.client.get_blob_client(blob)
if blob_client.exists():
size += blob_client.get_blob_properties().size
return size

def clear(self):
Expand Down
19 changes: 19 additions & 0 deletions zarr/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pytest


@pytest.fixture(scope="session")
def azurite():
import docker

print("Starting azurite docker container")
client = docker.from_env()
azurite = client.containers.run(
"mcr.microsoft.com/azure-storage/azurite",
"azurite-blob --loose --blobHost 0.0.0.0",
detach=True,
ports={"10000": "10000"},
)
print("Successfully created azurite container...")
yield azurite
print("Teardown azurite docker container")
azurite.stop()
11 changes: 4 additions & 7 deletions zarr/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
init_group,
)
from zarr.util import buffer_size
from zarr.tests.util import skip_test_env_var, have_fsspec
from zarr.tests.util import abs_container, skip_test_env_var, have_fsspec

# noinspection PyMethodMayBeStatic

Expand Down Expand Up @@ -1623,16 +1623,13 @@ def test_nbytes_stored(self):


@skip_test_env_var("ZARR_TEST_ABS")
@pytest.mark.usefixtures("azurite")
class TestArrayWithABSStore(TestArray):

@staticmethod
def absstore():
asb = pytest.importorskip("azure.storage.blob")
blob_client = asb.BlockBlobService(is_emulated=True)
blob_client.delete_container('test')
blob_client.create_container('test')
store = ABSStore(container='test', account_name='foo', account_key='bar',
blob_service_kwargs={'is_emulated': True})
client = abs_container()
store = ABSStore(client=client)
store.rmdir()
return store

Expand Down
11 changes: 4 additions & 7 deletions zarr/tests/test_hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
array_meta_key, atexit_rmglob, atexit_rmtree,
group_meta_key, init_array, init_group)
from zarr.util import InfoReporter
from zarr.tests.util import skip_test_env_var, have_fsspec
from zarr.tests.util import skip_test_env_var, have_fsspec, abs_container


# noinspection PyStatementEffect
Expand Down Expand Up @@ -947,16 +947,13 @@ def create_store():


@skip_test_env_var("ZARR_TEST_ABS")
@pytest.mark.usefixtures("azurite")
class TestGroupWithABSStore(TestGroup):

@staticmethod
def create_store():
asb = pytest.importorskip("azure.storage.blob")
blob_client = asb.BlockBlobService(is_emulated=True)
blob_client.delete_container('test')
blob_client.create_container('test')
store = ABSStore(container='test', account_name='foo', account_key='bar',
blob_service_kwargs={'is_emulated': True})
container_client = abs_container()
store = ABSStore(client=container_client)
store.rmdir()
return store, None

Expand Down
21 changes: 12 additions & 9 deletions zarr/tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
attrs_key, default_compressor, getsize,
group_meta_key, init_array, init_group, migrate_1to2)
from zarr.storage import FSStore
from zarr.tests.util import CountingDict, have_fsspec, skip_test_env_var
from zarr.tests.util import CountingDict, have_fsspec, skip_test_env_var, abs_container


@contextmanager
Expand Down Expand Up @@ -1923,24 +1923,27 @@ def test_format_compatibility():


@skip_test_env_var("ZARR_TEST_ABS")
@pytest.mark.usefixtures("azurite")
class TestABSStore(StoreTests):

def create_store(self, prefix=None, **kwargs):
asb = pytest.importorskip("azure.storage.blob")
blob_client = asb.BlockBlobService(is_emulated=True, socket_timeout=10)
blob_client.delete_container("test")
blob_client.create_container("test")
container_client = abs_container()
store = ABSStore(
container="test",
prefix=prefix,
account_name="foo",
account_key="bar",
blob_service_kwargs={"is_emulated": True, "socket_timeout": 10},
client=container_client,
**kwargs,
)
store.rmdir()
return store

def test_non_client_deprecated(self):
with pytest.warns(FutureWarning, match='Providing'):
store = ABSStore("test", account_name="test", account_key="test")

for attr in ["container", "account_name", "account_key"]:
with pytest.warns(FutureWarning, match=attr):
getattr(store, attr)

def test_iterators_with_prefix(self):
for prefix in ['test_prefix', '/test_prefix', 'test_prefix/', 'test/prefix', '', None]:
store = self.create_store(prefix=prefix)
Expand Down
Loading