diff --git a/buildspec.yml b/buildspec.yml index 7858bee8..cab46184 100644 --- a/buildspec.yml +++ b/buildspec.yml @@ -39,7 +39,7 @@ phases: - GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID" - DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID" - DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID" - - DLC_EIA_TAG="$FRAMEWORK_VERSION-dlc-eia-$BUILD_ID" + - DLC_EIA_TAG="$EIA_FRAMEWORK_VERSION-dlc-eia-$BUILD_ID" # run local CPU integration tests (build and push the image to ECR repo) - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG" diff --git a/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py b/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py index 92857434..313f956f 100644 --- a/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py +++ b/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py @@ -13,7 +13,6 @@ from __future__ import absolute_import import os -import textwrap import torch from sagemaker_inference import ( @@ -29,9 +28,21 @@ DEFAULT_MODEL_FILENAME = "model.pt" +class ModelLoadError(Exception): + pass + + class DefaultPytorchInferenceHandler(default_inference_handler.DefaultInferenceHandler): VALID_CONTENT_TYPES = (content_types.JSON, content_types.NPY) + @staticmethod + def _is_model_file(filename): + is_model_file = False + if os.path.isfile(filename): + _, ext = os.path.splitext(filename) + is_model_file = ext in [".pt", ".pth"] + return is_model_file + def default_model_fn(self, model_dir): """Loads a model. For PyTorch, a default function to load a model only if Elastic Inference is used. In other cases, users should provide customized model_fn() in script. @@ -47,12 +58,30 @@ def default_model_fn(self, model_dir): raise FileNotFoundError("Failed to load model with default model_fn: missing file {}." .format(DEFAULT_MODEL_FILENAME)) # Client-framework is CPU only. But model will run in Elastic Inference server with CUDA. - return torch.jit.load(model_path, map_location=torch.device('cpu')) + try: + return torch.jit.load(model_path, map_location=torch.device('cpu')) + except RuntimeError as e: + raise ModelLoadError( + "Failed to load {}. Please ensure model is saved using torchscript.".format(model_path) + ) from e else: - raise NotImplementedError(textwrap.dedent(""" - Please provide a model_fn implementation. - See documentation for model_fn at https://github.com/aws/sagemaker-python-sdk - """)) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model_path = os.path.join(model_dir, DEFAULT_MODEL_FILENAME) + if not os.path.exists(model_path): + model_files = [file for file in os.listdir(model_dir) if self._is_model_file(file)] + if len(model_files) != 1: + raise ValueError( + "Exactly one .pth or .pt file is required for PyTorch models: {}".format(model_files) + ) + model_path = os.path.join(model_dir, model_files[0]) + try: + model = torch.jit.load(model_path, map_location=device) + except RuntimeError as e: + raise ModelLoadError( + "Failed to load {}. Please ensure model is saved using torchscript.".format(model_path) + ) from e + model = model.to(device) + return model def default_input_fn(self, input_data, content_type): """A default input_fn that can handle JSON, CSV and NPZ formats. diff --git a/test/integration/__init__.py b/test/integration/__init__.py index 0442aa25..bdc792c4 100644 --- a/test/integration/__init__.py +++ b/test/integration/__init__.py @@ -18,12 +18,16 @@ resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'resources')) mnist_path = os.path.join(resources_path, 'mnist') +resnet18_path = os.path.join(resources_path, 'resnet18') data_dir = os.path.join(mnist_path, 'data') training_dir = os.path.join(data_dir, 'training') cpu_sub_dir = 'model_cpu' gpu_sub_dir = 'model_gpu' eia_sub_dir = 'model_eia' code_sub_dir = 'code' +default_sub_dir = 'default_model' +default_sub_eia_dir = 'default_model_eia' +default_sub_traced_resnet_dir = 'default_traced_resnet' model_cpu_dir = os.path.join(mnist_path, cpu_sub_dir) mnist_cpu_script = os.path.join(model_cpu_dir, code_sub_dir, 'mnist.py') @@ -59,6 +63,28 @@ "model_call_model_fn_once.tar.gz", script_path="code") +default_model_dir = os.path.join(resnet18_path, default_sub_dir) +default_model_script = os.path.join(default_model_dir, code_sub_dir, "resnet18.py") +default_model_tar = file_utils.make_tarfile( + default_model_script, os.path.join(default_model_dir, "model.pt"), default_model_dir, script_path="code" +) + +default_traced_resnet_dir = os.path.join(resnet18_path, default_sub_traced_resnet_dir) +default_traced_resnet_script = os.path.join(default_traced_resnet_dir, code_sub_dir, "resnet18.py") +default_model_traced_resnet18_tar = file_utils.make_tarfile( + default_traced_resnet_script, + os.path.join(default_traced_resnet_dir, "traced_resnet18.pt"), + default_traced_resnet_dir, + filename="traced_resnet18.tar.gz", + script_path="code", +) + +default_model_eia_dir = os.path.join(mnist_path, default_sub_eia_dir) +default_model_eia_script = os.path.join(default_model_eia_dir, code_sub_dir, "mnist.py") +default_model_eia_tar = file_utils.make_tarfile( + default_model_eia_script, os.path.join(default_model_eia_dir, "model.pt"), default_model_eia_dir +) + ROLE = 'dummy/unused-role' DEFAULT_TIMEOUT = 20 PYTHON3 = 'py3' diff --git a/test/integration/sagemaker/test_default_inference.py b/test/integration/sagemaker/test_default_inference.py new file mode 100644 index 00000000..b690d34c --- /dev/null +++ b/test/integration/sagemaker/test_default_inference.py @@ -0,0 +1,130 @@ +# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import + +import json +import numpy as np +import pytest +import requests +import sagemaker +from sagemaker.predictor import RealTimePredictor +from sagemaker.pytorch import PyTorchModel, PyTorchPredictor + +from integration import ( + default_model_script, + default_model_tar, + default_traced_resnet_script, + default_model_traced_resnet18_tar, + default_model_eia_script, + default_model_eia_tar, +) +from integration.sagemaker.timeout import timeout_and_delete_endpoint + + +@pytest.mark.cpu_test +def test_default_inference_cpu(sagemaker_session, image_uri, instance_type): + instance_type = instance_type or "ml.c4.xlarge" + # Scripted model is serialized with torch.jit.save(). + # Default inference test doesn't need to instantiate model definition + _test_default_inference( + sagemaker_session, image_uri, instance_type, default_model_tar, default_model_script + ) + + +@pytest.mark.gpu_test +def test_default_inference_gpu(sagemaker_session, image_uri, instance_type): + instance_type = instance_type or "ml.p2.xlarge" + # Scripted model is serialized with torch.jit.save(). + # Default inference test doesn't need to instantiate model definition + _test_default_inference( + sagemaker_session, image_uri, instance_type, default_model_tar, default_model_script + ) + + +@pytest.mark.skip( + reason="Latest EIA version - 1.5.1 uses mms. Enable when EIA images use torchserve" +) +@pytest.mark.eia_test +def test_default_inference_eia(sagemaker_session, image_uri, instance_type, accelerator_type): + instance_type = instance_type or "ml.c4.xlarge" + # Scripted model is serialized with torch.jit.save(). + # Default inference test doesn't need to instantiate model definition + _test_default_inference( + sagemaker_session, + image_uri, + instance_type, + default_model_eia_tar, + default_model_eia_script, + accelerator_type=accelerator_type, + ) + + +@pytest.mark.gpu_test +def test_default_inference_any_model_name_gpu(sagemaker_session, image_uri, instance_type): + instance_type = instance_type or "ml.p2.xlarge" + # Scripted model is serialized with torch.jit.save(). + # Default inference test doesn't need to instantiate model definition + _test_default_inference( + sagemaker_session, + image_uri, + instance_type, + default_model_traced_resnet18_tar, + default_traced_resnet_script, + ) + + +def _test_default_inference( + sagemaker_session, image_uri, instance_type, model_tar, mnist_script, accelerator_type=None +): + endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-pytorch-serving") + + model_data = sagemaker_session.upload_data( + path=model_tar, + key_prefix="sagemaker-pytorch-serving/models", + ) + + pytorch = PyTorchModel( + model_data=model_data, + role="SageMakerRole", + predictor_cls=RealTimePredictor if not accelerator_type else PyTorchPredictor, + entry_point=mnist_script, + image=image_uri, + sagemaker_session=sagemaker_session, + ) + with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=30): + predictor = pytorch.deploy( + initial_instance_count=1, + instance_type=instance_type, + accelerator_type=accelerator_type, + endpoint_name=endpoint_name, + ) + + if accelerator_type: + batch_size = 100 + data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32) + output = predictor.predict(data) + assert output.shape == (batch_size, 10) + else: + image_url = ( + "https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/master/" + "sagemaker_neo_compilation_jobs/pytorch_torchvision/cat.jpg" + ) + img_data = requests.get(image_url).content + with open("cat.jpg", "wb") as file_obj: + file_obj.write(img_data) + with open("cat.jpg", "rb") as f: + payload = f.read() + payload = bytearray(payload) + response = predictor.predict(payload) + result = json.loads(response.decode()) + assert len(result) == 1000 diff --git a/test/integration/sagemaker/test_mnist.py b/test/integration/sagemaker/test_mnist.py index 912eba5b..174bbe67 100644 --- a/test/integration/sagemaker/test_mnist.py +++ b/test/integration/sagemaker/test_mnist.py @@ -34,7 +34,7 @@ def test_mnist_gpu(sagemaker_session, image_uri, instance_type): _test_mnist_distributed(sagemaker_session, image_uri, instance_type, model_gpu_tar, mnist_gpu_script) -@pytest.mark.skip(reason="Latest EIA version is too old - 1.3.1. Remove this after a new DLC release") +@pytest.mark.skip(reason="Latest EIA version - 1.5.1 uses mms. Enable when EIA images use torchserve") @pytest.mark.eia_test def test_mnist_eia(sagemaker_session, image_uri, instance_type, accelerator_type): instance_type = instance_type or 'ml.c4.xlarge' diff --git a/test/resources/mnist/default_model_eia/code/mnist.py b/test/resources/mnist/default_model_eia/code/mnist.py new file mode 100644 index 00000000..960af1c1 --- /dev/null +++ b/test/resources/mnist/default_model_eia/code/mnist.py @@ -0,0 +1,35 @@ +# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import +import logging +import os +import sys + +import torch + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +logger.addHandler(logging.StreamHandler(sys.stdout)) + + +def predict_fn(input_data, model): + logger.info('Performing EIA inference with Torch JIT context with input of size {}'.format(input_data.shape)) + # With EI, client instance should be CPU for cost-efficiency. + # Sub-graphs with unsupported arguments run locally. Server runs with CUDA + device = torch.device('cpu') + model = model.to(device) + input_data = input_data.to(device) + with torch.no_grad(): + # Set the target device to the accelerator ordinal + with torch.jit.optimized_execution(True, {'target_device': 'eia:0'}): + return model(input_data) diff --git a/test/resources/mnist/default_model_eia/model.pt b/test/resources/mnist/default_model_eia/model.pt new file mode 100644 index 00000000..d7b4691d Binary files /dev/null and b/test/resources/mnist/default_model_eia/model.pt differ diff --git a/test/resources/resnet18/default_model/code/resnet18.py b/test/resources/resnet18/default_model/code/resnet18.py new file mode 100644 index 00000000..701aff8c --- /dev/null +++ b/test/resources/resnet18/default_model/code/resnet18.py @@ -0,0 +1,51 @@ +import io +import json +import logging + +import numpy as np +import torch +import torchvision.transforms as transforms +from PIL import Image # Training container doesn't have this package + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +def transform_fn(model, payload, request_content_type, response_content_type): + + logger.info("Invoking user-defined transform function") + + if request_content_type and request_content_type != "application/octet-stream": + raise RuntimeError( + "Content type must be application/octet-stream. Provided: {0}".format( + request_content_type + ) + ) + + # preprocess + decoded = Image.open(io.BytesIO(payload)) + preprocess = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ) + normalized = preprocess(decoded) + batchified = normalized.unsqueeze(0) + + # predict + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + batchified = batchified.to(device) + result = model.forward(batchified) + + # Softmax (assumes batch size 1) + result = np.squeeze(result.cpu().detach().numpy()) + result_exp = np.exp(result - np.max(result)) + result = result_exp / np.sum(result_exp) + + response_body = json.dumps(result.tolist()) + content_type = "application/json" + + return response_body, content_type diff --git a/test/resources/resnet18/default_model/model.pt b/test/resources/resnet18/default_model/model.pt new file mode 100644 index 00000000..31ef7be6 Binary files /dev/null and b/test/resources/resnet18/default_model/model.pt differ diff --git a/test/resources/resnet18/default_traced_resnet/code/resnet18.py b/test/resources/resnet18/default_traced_resnet/code/resnet18.py new file mode 100644 index 00000000..701aff8c --- /dev/null +++ b/test/resources/resnet18/default_traced_resnet/code/resnet18.py @@ -0,0 +1,51 @@ +import io +import json +import logging + +import numpy as np +import torch +import torchvision.transforms as transforms +from PIL import Image # Training container doesn't have this package + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +def transform_fn(model, payload, request_content_type, response_content_type): + + logger.info("Invoking user-defined transform function") + + if request_content_type and request_content_type != "application/octet-stream": + raise RuntimeError( + "Content type must be application/octet-stream. Provided: {0}".format( + request_content_type + ) + ) + + # preprocess + decoded = Image.open(io.BytesIO(payload)) + preprocess = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ) + normalized = preprocess(decoded) + batchified = normalized.unsqueeze(0) + + # predict + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + batchified = batchified.to(device) + result = model.forward(batchified) + + # Softmax (assumes batch size 1) + result = np.squeeze(result.cpu().detach().numpy()) + result_exp = np.exp(result - np.max(result)) + result = result_exp / np.sum(result_exp) + + response_body = json.dumps(result.tolist()) + content_type = "application/json" + + return response_body, content_type diff --git a/test/resources/resnet18/default_traced_resnet/traced_resnet18.pt b/test/resources/resnet18/default_traced_resnet/traced_resnet18.pt new file mode 100644 index 00000000..19cd36ac Binary files /dev/null and b/test/resources/resnet18/default_traced_resnet/traced_resnet18.pt differ diff --git a/test/unit/test_default_inference_handler.py b/test/unit/test_default_inference_handler.py index f45fadde..427494a7 100644 --- a/test/unit/test_default_inference_handler.py +++ b/test/unit/test_default_inference_handler.py @@ -14,6 +14,7 @@ import csv import json +import os import mock import numpy as np @@ -58,8 +59,61 @@ def eia_inference_handler(): def test_default_model_fn(inference_handler): - with pytest.raises(NotImplementedError): - inference_handler.default_model_fn("model_dir") + with mock.patch("sagemaker_pytorch_serving_container.default_pytorch_inference_handler.os") as mock_os: + mock_os.getenv.return_value = "true" + mock_os.path.join = os.path.join + mock_os.path.exists.return_value = True + with mock.patch("torch.jit.load") as mock_torch_load: + mock_torch_load.return_value = DummyModel() + model = inference_handler.default_model_fn("model_dir") + assert model is not None + + +def test_default_model_fn_unknown_name(inference_handler): + with mock.patch("sagemaker_pytorch_serving_container.default_pytorch_inference_handler.os") as mock_os: + mock_os.getenv.return_value = "false" + mock_os.path.join = os.path.join + mock_os.path.exists.return_value = False + mock_os.path.isfile.return_value = True + mock_os.listdir.return_value = ["abcd.pt", "efgh.txt", "ijkl.bin"] + mock_os.path.splitext = os.path.splitext + with mock.patch("torch.jit.load") as mock_torch_load: + mock_torch_load.return_value = DummyModel() + model = inference_handler.default_model_fn("model_dir") + assert model is not None + + +@pytest.mark.parametrize( + "listdir_return_value", [["abcd.py", "efgh.txt", "ijkl.bin"], ["abcd.pt", "efgh.pth"]] +) +def test_default_model_fn_no_model_file(inference_handler, listdir_return_value): + with mock.patch("sagemaker_pytorch_serving_container.default_pytorch_inference_handler.os") as mock_os: + mock_os.getenv.return_value = "false" + mock_os.path.join = os.path.join + mock_os.path.exists.return_value = False + mock_os.path.isfile.return_value = True + mock_os.listdir.return_value = listdir_return_value + mock_os.path.splitext = os.path.splitext + with mock.patch("torch.jit.load") as mock_torch_load: + mock_torch_load.return_value = DummyModel() + with pytest.raises(ValueError, match=r"Exactly one .pth or .pt file is required for PyTorch models: .*"): + inference_handler.default_model_fn("model_dir") + + +def _produce_runtime_error(x, **kwargs): + raise RuntimeError("dummy runtime error") + + +@pytest.mark.parametrize("test_case", ["eia", "non_eia"]) +def test_default_model_fn_non_torchscript_model(inference_handler, test_case): + with mock.patch("sagemaker_pytorch_serving_container.default_pytorch_inference_handler.os") as mock_os: + mock_os.getenv.return_value = "true" if test_case == "eia" else "false" + mock_os.path.join = os.path.join + mock_os.path.exists.return_value = True + with mock.patch("torch.jit") as mock_torch_jit: + mock_torch_jit.load = _produce_runtime_error + with pytest.raises(Exception, match=r"Failed to load .*. Please ensure model is saved using torchscript."): + inference_handler.default_model_fn("model_dir") def test_default_input_fn_json(inference_handler, tensor):