Skip to content

infra: use fixture for Python version in PyTorch integ tests #1612

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def pytest_addoption(parser):
parser.addoption("--chainer-full-version", action="store", default="5.0.0")
parser.addoption("--mxnet-full-version", action="store", default="1.6.0")
parser.addoption("--ei-mxnet-full-version", action="store", default="1.5.1")
parser.addoption("--pytorch-full-version", action="store", default="1.5.0")
parser.addoption(
"--rl-coach-mxnet-full-version",
action="store",
Expand Down Expand Up @@ -266,8 +265,18 @@ def ei_mxnet_full_version(request):


@pytest.fixture(scope="module")
def pytorch_full_version(request):
return request.config.getoption("--pytorch-full-version")
def pytorch_full_version():
return "1.5.0"


@pytest.fixture(scope="module")
def pytorch_full_py_version():
return "py3"


@pytest.fixture(scope="module")
def pytorch_full_ei_version():
return "1.3.1"


@pytest.fixture(scope="module")
Expand Down
29 changes: 2 additions & 27 deletions tests/integ/test_airflow_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,39 +611,14 @@ def test_xgboost_airflow_config_uploads_data_source_to_s3(

@pytest.mark.canary_quick
def test_pytorch_airflow_config_uploads_data_source_to_s3_when_inputs_not_provided(
sagemaker_session, cpu_instance_type, pytorch_full_version
sagemaker_session, cpu_instance_type, pytorch_full_version, pytorch_full_py_version,
):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
estimator = PyTorch(
entry_point=PYTORCH_MNIST_SCRIPT,
role=ROLE,
framework_version=pytorch_full_version,
py_version="py3",
train_instance_count=2,
train_instance_type=cpu_instance_type,
hyperparameters={"epochs": 6, "backend": "gloo"},
sagemaker_session=sagemaker_session,
)

training_config = _build_airflow_workflow(
estimator=estimator, instance_type=cpu_instance_type
)

_assert_that_s3_url_contains_data(
sagemaker_session,
training_config["HyperParameters"]["sagemaker_submit_directory"].strip('"'),
)


def test_pytorch_12_airflow_config_uploads_data_source_to_s3_when_inputs_not_provided(
sagemaker_session, cpu_instance_type
):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
estimator = PyTorch(
entry_point=PYTORCH_MNIST_SCRIPT,
role=ROLE,
framework_version="1.2.0",
py_version="py3",
py_version=pytorch_full_py_version,
train_instance_count=2,
train_instance_type=cpu_instance_type,
hyperparameters={"epochs": 6, "backend": "gloo"},
Expand Down
15 changes: 6 additions & 9 deletions tests/integ/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from sagemaker.sklearn.model import SKLearnModel
from tests.integ import DATA_DIR, PYTHON_VERSION

MNIST_FOLDER_NAME = "MNIST"

GIT_REPO = "https://github.com/aws/sagemaker-python-sdk.git"
BRANCH = "test-branch-git-config"
Expand All @@ -51,33 +50,31 @@


@pytest.mark.local_mode
def test_github(sagemaker_local_session):
def test_github(sagemaker_local_session, pytorch_full_version, pytorch_full_py_version):
script_path = "mnist.py"
data_path = os.path.join(DATA_DIR, "pytorch_mnist")
git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT}

# TODO: fails for newer pytorch versions when using MNIST from torchvision due to missing dataset
# "algo-1-v767u_1 | RuntimeError: Dataset not found. You can use download=True to download it"
pytorch = PyTorch(
entry_point=script_path,
role="SageMakerRole",
source_dir="pytorch",
framework_version="0.4", # hard-code to last known good pytorch for now (see TODO above)
py_version=PYTHON_VERSION,
framework_version=pytorch_full_version,
py_version=pytorch_full_py_version,
train_instance_count=1,
train_instance_type="local",
sagemaker_session=sagemaker_local_session,
git_config=git_config,
)

pytorch.fit({"training": "file://" + os.path.join(data_path, "training", MNIST_FOLDER_NAME)})
data_path = os.path.join(DATA_DIR, "pytorch_mnist")
pytorch.fit({"training": "file://" + os.path.join(data_path, "training")})

with lock.lock(LOCK_PATH):
try:
predictor = pytorch.deploy(initial_instance_count=1, instance_type="local")
data = numpy.zeros(shape=(1, 1, 28, 28)).astype(numpy.float32)
result = predictor.predict(data)
assert result is not None
assert 10 == len(result[0]) # check that there is a probability for each label
finally:
predictor.delete_endpoint()

Expand Down
58 changes: 29 additions & 29 deletions tests/integ/test_pytorch_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,13 @@
import numpy
import os
import pytest
from sagemaker.pytorch.defaults import LATEST_PY2_VERSION

from sagemaker.pytorch.estimator import PyTorch
from sagemaker.pytorch.model import PyTorchModel
from sagemaker.utils import sagemaker_timestamp

from tests.integ import (
test_region,
DATA_DIR,
PYTHON_VERSION,
TRAINING_DEFAULT_TIMEOUT_MINUTES,
EI_SUPPORTED_REGIONS,
)
Expand All @@ -39,22 +37,21 @@


@pytest.fixture(scope="module", name="pytorch_training_job")
def fixture_training_job(sagemaker_session, pytorch_full_version, cpu_instance_type):
def fixture_training_job(
sagemaker_session, pytorch_full_version, pytorch_full_py_version, cpu_instance_type
):
with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
pytorch = _get_pytorch_estimator(sagemaker_session, pytorch_full_version, cpu_instance_type)
pytorch = _get_pytorch_estimator(
sagemaker_session, pytorch_full_version, pytorch_full_py_version, cpu_instance_type
)

pytorch.fit({"training": _upload_training_data(pytorch)})
return pytorch.latest_training_job.name


@pytest.mark.canary_quick
@pytest.mark.regional_testing
@pytest.mark.skipif(
PYTHON_VERSION == "py2",
reason="Python 2 is supported by PyTorch {} and lower versions.".format(LATEST_PY2_VERSION),
)
def test_sync_fit_deploy(pytorch_training_job, sagemaker_session, cpu_instance_type):
# TODO: add tests against local mode when it's ready to be used
def test_fit_deploy(pytorch_training_job, sagemaker_session, cpu_instance_type):
endpoint_name = "test-pytorch-sync-fit-attach-deploy{}".format(sagemaker_timestamp())
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
estimator = PyTorch.attach(pytorch_training_job, sagemaker_session=sagemaker_session)
Expand All @@ -70,16 +67,12 @@ def test_sync_fit_deploy(pytorch_training_job, sagemaker_session, cpu_instance_t


@pytest.mark.local_mode
@pytest.mark.skipif(
PYTHON_VERSION == "py2",
reason="Python 2 is supported by PyTorch {} and lower versions.".format(LATEST_PY2_VERSION),
)
def test_fit_deploy(sagemaker_local_session, pytorch_full_version):
def test_local_fit_deploy(sagemaker_local_session, pytorch_full_version, pytorch_full_py_version):
pytorch = PyTorch(
entry_point=MNIST_SCRIPT,
role="SageMakerRole",
framework_version=pytorch_full_version,
py_version="py3",
py_version=pytorch_full_py_version,
train_instance_count=1,
train_instance_type="local",
sagemaker_session=sagemaker_local_session,
Expand All @@ -99,7 +92,11 @@ def test_fit_deploy(sagemaker_local_session, pytorch_full_version):


def test_deploy_model(
pytorch_training_job, sagemaker_session, cpu_instance_type, pytorch_full_version
pytorch_training_job,
sagemaker_session,
cpu_instance_type,
pytorch_full_version,
pytorch_full_py_version,
):
endpoint_name = "test-pytorch-deploy-model-{}".format(sagemaker_timestamp())

Expand All @@ -113,7 +110,7 @@ def test_deploy_model(
"SageMakerRole",
entry_point=MNIST_SCRIPT,
framework_version=pytorch_full_version,
py_version="py3",
py_version=pytorch_full_py_version,
sagemaker_session=sagemaker_session,
)
predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name)
Expand All @@ -125,7 +122,9 @@ def test_deploy_model(
assert output.shape == (batch_size, 10)


def test_deploy_packed_model_with_entry_point_name(sagemaker_session, cpu_instance_type):
def test_deploy_packed_model_with_entry_point_name(
sagemaker_session, cpu_instance_type, pytorch_full_version, pytorch_full_py_version
):
endpoint_name = "test-pytorch-deploy-model-{}".format(sagemaker_timestamp())

with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
Expand All @@ -134,8 +133,8 @@ def test_deploy_packed_model_with_entry_point_name(sagemaker_session, cpu_instan
model_data,
"SageMakerRole",
entry_point="mnist.py",
framework_version="1.4.0",
py_version="py3",
framework_version=pytorch_full_version,
py_version=pytorch_full_py_version,
sagemaker_session=sagemaker_session,
)
predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name)
Expand All @@ -147,19 +146,20 @@ def test_deploy_packed_model_with_entry_point_name(sagemaker_session, cpu_instan
assert output.shape == (batch_size, 10)


@pytest.mark.skipif(PYTHON_VERSION == "py2", reason="PyTorch EIA does not support Python 2.")
@pytest.mark.skipif(
test_region() not in EI_SUPPORTED_REGIONS, reason="EI isn't supported in that specific region."
)
def test_deploy_model_with_accelerator(sagemaker_session, cpu_instance_type):
def test_deploy_model_with_accelerator(
sagemaker_session, cpu_instance_type, pytorch_full_ei_version, pytorch_full_py_version
):
endpoint_name = "test-pytorch-deploy-eia-{}".format(sagemaker_timestamp())
model_data = sagemaker_session.upload_data(path=EIA_MODEL)
pytorch = PyTorchModel(
model_data,
"SageMakerRole",
entry_point=EIA_SCRIPT,
framework_version="1.3.1",
py_version="py3",
framework_version=pytorch_full_ei_version,
py_version=pytorch_full_py_version,
sagemaker_session=sagemaker_session,
)
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
Expand All @@ -185,13 +185,13 @@ def _upload_training_data(pytorch):


def _get_pytorch_estimator(
sagemaker_session, pytorch_full_version, instance_type, entry_point=MNIST_SCRIPT
sagemaker_session, pytorch_version, py_version, instance_type, entry_point=MNIST_SCRIPT
):
return PyTorch(
entry_point=entry_point,
role="SageMakerRole",
framework_version=pytorch_full_version,
py_version="py3",
framework_version=pytorch_version,
py_version=py_version,
train_instance_count=1,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
Expand Down
5 changes: 2 additions & 3 deletions tests/integ/test_source_dirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
import pytest

import tests.integ.lock as lock
from tests.integ import DATA_DIR, PYTHON_VERSION

from sagemaker.pytorch.estimator import PyTorch
from tests.integ import DATA_DIR


@pytest.mark.local_mode
Expand All @@ -38,7 +37,7 @@ def test_source_dirs(tmpdir, sagemaker_local_session):
source_dir=source_dir,
dependencies=[lib],
framework_version="0.4", # hard-code to last known good pytorch for now (see TODO above)
py_version=PYTHON_VERSION,
py_version="py3",
train_instance_count=1,
train_instance_type="local",
sagemaker_session=sagemaker_local_session,
Expand Down
8 changes: 6 additions & 2 deletions tests/integ/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,11 @@ def test_attach_transform_kmeans(sagemaker_session, cpu_instance_type):


def test_transform_pytorch_vpc_custom_model_bucket(
sagemaker_session, pytorch_full_version, cpu_instance_type, custom_bucket_name
sagemaker_session,
pytorch_full_version,
pytorch_full_py_version,
cpu_instance_type,
custom_bucket_name,
):
data_dir = os.path.join(DATA_DIR, "pytorch_mnist")

Expand All @@ -177,7 +181,7 @@ def test_transform_pytorch_vpc_custom_model_bucket(
entry_point=os.path.join(data_dir, "mnist.py"),
role="SageMakerRole",
framework_version=pytorch_full_version,
py_version="py3",
py_version=pytorch_full_py_version,
sagemaker_session=sagemaker_session,
vpc_config={"Subnets": subnet_ids, "SecurityGroupIds": [security_group_id]},
code_location="s3://{}".format(custom_bucket_name),
Expand Down
6 changes: 4 additions & 2 deletions tests/integ/test_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,9 @@ def test_tuning_chainer(sagemaker_session, chainer_full_version, cpu_instance_ty
reason="This test has always failed, but the failure was masked by a bug. "
"This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968"
)
def test_attach_tuning_pytorch(sagemaker_session, cpu_instance_type, pytorch_full_version):
def test_attach_tuning_pytorch(
sagemaker_session, cpu_instance_type, pytorch_full_version, pytorch_full_py_version
):
mnist_dir = os.path.join(DATA_DIR, "pytorch_mnist")
mnist_script = os.path.join(mnist_dir, "mnist.py")

Expand All @@ -771,7 +773,7 @@ def test_attach_tuning_pytorch(sagemaker_session, cpu_instance_type, pytorch_ful
role="SageMakerRole",
train_instance_count=1,
framework_version=pytorch_full_version,
py_version="py3",
py_version=pytorch_full_py_version,
train_instance_type=cpu_instance_type,
sagemaker_session=sagemaker_session,
)
Expand Down