Skip to content

infra: clean up pickle.load logic in integ tests #1611

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 23, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 5 additions & 11 deletions tests/integ/test_airflow_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import gzip
import os
import pickle
import sys
import pytest
import tests.integ

Expand Down Expand Up @@ -102,11 +101,10 @@ def test_byo_airflow_config_uploads_data_source_to_s3_when_inputs_provided(
def test_kmeans_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

kmeans = KMeans(
role=ROLE,
Expand Down Expand Up @@ -141,11 +139,10 @@ def test_kmeans_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_
def test_fm_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

fm = FactorizationMachines(
role=ROLE,
Expand Down Expand Up @@ -207,11 +204,10 @@ def test_ipinsights_airflow_config_uploads_data_source_to_s3(sagemaker_session,
def test_knn_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

knn = KNN(
role=ROLE,
Expand Down Expand Up @@ -278,11 +274,10 @@ def test_linearlearner_airflow_config_uploads_data_source_to_s3(
):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

train_set[1][:100] = 1
train_set[1][100:200] = 0
Expand Down Expand Up @@ -381,11 +376,10 @@ def test_ntm_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_ins
def test_pca_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

pca = PCA(
role=ROLE,
Expand Down
7 changes: 2 additions & 5 deletions tests/integ/test_byo_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import json
import os
import pickle
import sys

import pytest

Expand Down Expand Up @@ -58,10 +57,9 @@ def test_byo_estimator(sagemaker_session, region, cpu_instance_type):

with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

prefix = "test_byo_estimator"
key = "recordio-pb-data"
Expand Down Expand Up @@ -107,10 +105,9 @@ def test_async_byo_estimator(sagemaker_session, region, cpu_instance_type):

with timeout(minutes=5):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

prefix = "test_byo_estimator"
key = "recordio-pb-data"
Expand Down
7 changes: 2 additions & 5 deletions tests/integ/test_factorization_machines.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import gzip
import os
import pickle
import sys
import time

from sagemaker import FactorizationMachines, FactorizationMachinesModel
Expand All @@ -29,11 +28,10 @@ def test_factorization_machines(sagemaker_session, cpu_instance_type):

with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

fm = FactorizationMachines(
role="SageMakerRole",
Expand Down Expand Up @@ -71,11 +69,10 @@ def test_async_factorization_machines(sagemaker_session, cpu_instance_type):

with timeout(minutes=5):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

fm = FactorizationMachines(
role="SageMakerRole",
Expand Down
7 changes: 2 additions & 5 deletions tests/integ/test_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import json
import os
import pickle
import sys
import time

import pytest
Expand All @@ -31,11 +30,10 @@ def test_kmeans(sagemaker_session, cpu_instance_type):
job_name = unique_name_from_base("kmeans")
with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

kmeans = KMeans(
role="SageMakerRole",
Expand Down Expand Up @@ -94,11 +92,10 @@ def test_async_kmeans(sagemaker_session, cpu_instance_type):

with timeout(minutes=5):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

kmeans = KMeans(
role="SageMakerRole",
Expand Down
7 changes: 2 additions & 5 deletions tests/integ/test_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import gzip
import os
import pickle
import sys
import time

from sagemaker import KNN, KNNModel
Expand All @@ -29,11 +28,10 @@ def test_knn_regressor(sagemaker_session, cpu_instance_type):

with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

knn = KNN(
role="SageMakerRole",
Expand Down Expand Up @@ -66,11 +64,10 @@ def test_async_knn_classifier(sagemaker_session, cpu_instance_type):

with timeout(minutes=5):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

knn = KNN(
role="SageMakerRole",
Expand Down
10 changes: 3 additions & 7 deletions tests/integ/test_linear_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import gzip
import os
import pickle
import sys
import time

import numpy as np
Expand All @@ -33,11 +32,10 @@ def test_linear_learner(sagemaker_session, cpu_instance_type):

with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

train_set[1][:100] = 1
train_set[1][100:200] = 0
Expand Down Expand Up @@ -102,11 +100,10 @@ def test_linear_learner_multiclass(sagemaker_session, cpu_instance_type):

with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

train_set = train_set[0], train_set[1].astype(np.dtype("float32"))

Expand Down Expand Up @@ -137,11 +134,10 @@ def test_async_linear_learner(sagemaker_session, cpu_instance_type):

with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

train_set[1][:100] = 1
train_set[1][100:200] = 0
Expand Down
7 changes: 2 additions & 5 deletions tests/integ/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import gzip
import os
import pickle
import sys
import time

import sagemaker.amazon.pca
Expand All @@ -29,11 +28,10 @@ def test_pca(sagemaker_session, cpu_instance_type):

with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

pca = sagemaker.amazon.pca.PCA(
role="SageMakerRole",
Expand Down Expand Up @@ -72,11 +70,10 @@ def test_async_pca(sagemaker_session, cpu_instance_type):

with timeout(minutes=5):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

pca = sagemaker.amazon.pca.PCA(
role="SageMakerRole",
Expand Down
4 changes: 1 addition & 3 deletions tests/integ/test_record_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import gzip
import os
import pickle
import sys

from six.moves.urllib.parse import urlparse

Expand All @@ -29,9 +28,8 @@ def test_record_set(sagemaker_session, cpu_instance_type):
In particular, test that the objects uploaded to the S3 bucket are encrypted.
"""
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}
with gzip.open(data_path, "rb") as file_object:
train_set, _, _ = pickle.load(file_object, **pickle_args)
train_set, _, _ = pickle.load(file_object, encoding="latin1")
kmeans = KMeans(
role="SageMakerRole",
train_instance_count=1,
Expand Down
9 changes: 3 additions & 6 deletions tests/integ/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import json
import os
import pickle
import sys
import time

import pytest
Expand Down Expand Up @@ -109,12 +108,11 @@ def test_transform_mxnet(
@pytest.mark.canary_quick
def test_attach_transform_kmeans(sagemaker_session, cpu_instance_type):
data_path = os.path.join(DATA_DIR, "one_p_mnist")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
train_set_path = os.path.join(data_path, "mnist.pkl.gz")
with gzip.open(train_set_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

kmeans = KMeans(
role="SageMakerRole",
Expand Down Expand Up @@ -177,7 +175,7 @@ def test_transform_pytorch_vpc_custom_model_bucket(
entry_point=os.path.join(data_dir, "mnist.py"),
role="SageMakerRole",
framework_version=pytorch_full_version,
py_version="py3",
py_version=PYTHON_VERSION,
sagemaker_session=sagemaker_session,
vpc_config={"Subnets": subnet_ids, "SecurityGroupIds": [security_group_id]},
code_location="s3://{}".format(custom_bucket_name),
Expand Down Expand Up @@ -232,13 +230,12 @@ def test_transform_mxnet_tags(

def test_transform_byo_estimator(sagemaker_session, cpu_instance_type):
data_path = os.path.join(DATA_DIR, "one_p_mnist")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}
tags = [{"Key": "some-tag", "Value": "value-for-tag"}]

# Load the data into memory as numpy arrays
train_set_path = os.path.join(data_path, "mnist.pkl.gz")
with gzip.open(train_set_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

kmeans = KMeans(
role="SageMakerRole",
Expand Down
8 changes: 3 additions & 5 deletions tests/integ/test_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import json
import os
import pickle
import sys
import time

import numpy as np
Expand Down Expand Up @@ -55,10 +54,10 @@
@pytest.fixture(scope="module")
def kmeans_train_set(sagemaker_session):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

return train_set

Expand Down Expand Up @@ -847,10 +846,9 @@ def test_tuning_byo_estimator(sagemaker_session, cpu_instance_type):

with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
train_set, _, _ = pickle.load(f, encoding="latin1")

prefix = "test_byo_estimator"
key = "recordio-pb-data"
Expand Down
Loading