Skip to content

Commit 0d46a04

Browse files
author
Ashley Scillitoe
authored
Add some missing logic for failed URI's in datasets and test_saving (#607)
1 parent faca234 commit 0d46a04

File tree

5 files changed

+35
-7
lines changed

5 files changed

+35
-7
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ See the [documentation](https://docs.seldon.io/projects/alibi-detect/en/latest/c
1010

1111
### Development
1212
- UTF-8 decoding is enforced when `README.md` is opened by `setup.py`. This is to prevent pip install errors on systems with `PYTHONIOENCODING` set to use other encoders ([#605](https://github.com/SeldonIO/alibi-detect/pull/605)).
13+
- Skip specific save/load tests that require downloading remote artefacts if the relevant URI(s) is/are down ([#607](https://github.com/SeldonIO/alibi-detect/pull/607)).
1314

1415
## v0.10.3
1516
## [v0.10.3](https://github.com/SeldonIO/alibi-detect/tree/v0.10.3) (2022-08-17)

alibi_detect/datasets.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from alibi_detect.utils.data import Bunch
1212
from alibi_detect.utils.url import _join_url
1313
from requests import RequestException
14+
from urllib.error import URLError
1415
from scipy.io import arff
1516
from sklearn.datasets import fetch_kddcup99
1617

@@ -59,7 +60,11 @@ def fetch_kdd(target: list = ['dos', 'r2l', 'u2r', 'probe'],
5960
"""
6061

6162
# fetch raw data
62-
data_raw = fetch_kddcup99(subset=None, data_home=None, percent10=percent10)
63+
try:
64+
data_raw = fetch_kddcup99(subset=None, data_home=None, percent10=percent10)
65+
except URLError:
66+
logger.exception("Could not connect, URL may be out of service")
67+
raise
6368

6469
# specify columns
6570
cols = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes',

alibi_detect/saving/tests/datasets.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
2+
import pytest
23
from alibi_testing.data import get_movie_sentiment_data
34
from pytest_cases import parametrize
5+
from requests import RequestException
46

57
# Note: If any of below cases become large, see https://smarie.github.io/python-pytest-cases/#c-caching-cases
68
FLOAT = np.float32
@@ -63,4 +65,7 @@ def data_synthetic_nd(data_shape):
6365
class TextData:
6466
@staticmethod
6567
def movie_sentiment_data():
66-
return get_movie_sentiment_data()
68+
try:
69+
return get_movie_sentiment_data()
70+
except RequestException:
71+
pytest.skip('Movie sentiment dataset URL down')

alibi_detect/saving/tests/test_saving.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from functools import partial
1010
from pathlib import Path
1111
from typing import Callable
12+
from requests.exceptions import HTTPError
1213

1314
import toml
1415
import dill
@@ -202,7 +203,10 @@ def nlp_embedding_and_tokenizer(model_name, max_len, uae, backend):
202203
backend = 'tf' if backend == 'tensorflow' else 'pt'
203204

204205
# Load tokenizer
205-
tokenizer = AutoTokenizer.from_pretrained(model_name)
206+
try:
207+
tokenizer = AutoTokenizer.from_pretrained(model_name + 'TODO')
208+
except (OSError, HTTPError):
209+
pytest.skip(f"Problem downloading {model_name} from huggingface.co")
206210
X = 'A dummy string' # this will be padded to max_len
207211
tokens = tokenizer(list(X[:5]), pad_to_max_length=True,
208212
max_length=max_len, return_tensors=backend)
@@ -214,13 +218,19 @@ def nlp_embedding_and_tokenizer(model_name, max_len, uae, backend):
214218
enc_dim = 32
215219

216220
if backend == 'tf':
217-
embedding = TransformerEmbedding_tf(model_name, emb_type, layers)
221+
try:
222+
embedding = TransformerEmbedding_tf(model_name, emb_type, layers)
223+
except (OSError, HTTPError):
224+
pytest.skip(f"Problem downloading {model_name} from huggingface.co")
218225
if uae:
219226
x_emb = embedding(tokens)
220227
shape = (x_emb.shape[1],)
221228
embedding = UAE_tf(input_layer=embedding, shape=shape, enc_dim=enc_dim)
222229
else:
223-
embedding = TransformerEmbedding_pt(model_name, emb_type, layers)
230+
try:
231+
embedding = TransformerEmbedding_pt(model_name, emb_type, layers)
232+
except (OSError, HTTPError):
233+
pytest.skip(f"Problem downloading {model_name} from huggingface.co")
224234
if uae:
225235
x_emb = embedding(tokens)
226236
emb_dim = x_emb.shape[1]

alibi_detect/tests/test_datasets.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import pandas as pd
33
import pytest
44
from requests import RequestException
5+
from urllib.error import URLError
56
from alibi_detect.datasets import fetch_kdd, fetch_ecg, corruption_types_cifar10c, fetch_cifar10c, \
67
fetch_attack, fetch_nab, get_list_nab
78
from alibi_detect.utils.data import Bunch
@@ -24,7 +25,7 @@ def test_fetch_kdd(return_X_y):
2425
keep_cols = np.random.choice(keep_cols_list, 5, replace=False)
2526
try:
2627
data = fetch_kdd(target=target, keep_cols=keep_cols, percent10=True, return_X_y=return_X_y)
27-
except RequestException:
28+
except URLError:
2829
pytest.skip('KDD dataset URL down')
2930
if return_X_y:
3031
assert isinstance(data, tuple)
@@ -53,13 +54,19 @@ def test_fetch_ecg(return_X_y):
5354

5455

5556
# CIFAR-10-C dataset
56-
corruption_list = corruption_types_cifar10c()
57+
try:
58+
corruption_list = corruption_types_cifar10c()
59+
except RequestException:
60+
corruption_list = None
5761

5862

63+
@pytest.mark.skipif(corruption_list is None, reason="CIFAR-10-C dataset URL is down")
5964
def test_types_cifar10c():
65+
print(corruption_list)
6066
assert len(corruption_list) == 19
6167

6268

69+
@pytest.mark.skipif(corruption_list is None, reason="CIFAR-10-C dataset URL is down")
6370
@pytest.mark.parametrize('return_X_y', [True, False])
6471
def test_fetch_cifar10c(return_X_y):
6572
corruption = list(np.random.choice(corruption_list, 5, replace=False))

0 commit comments

Comments
 (0)