aws · saimidu · Oct 25, 2021 · Oct 11, 2021 · Oct 12, 2021 · Oct 12, 2021
diff --git a/buildspec.yml b/buildspec.yml
@@ -39,7 +39,7 @@ phases:
       - GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID"
       - DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID"
       - DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID"
-      - DLC_EIA_TAG="$FRAMEWORK_VERSION-dlc-eia-$BUILD_ID"
+      - DLC_EIA_TAG="$EIA_FRAMEWORK_VERSION-dlc-eia-$BUILD_ID"
 
       # run local CPU integration tests (build and push the image to ECR repo)
       - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG"

diff --git a/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py b/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py
@@ -13,7 +13,6 @@
 from __future__ import absolute_import
 
 import os
-import textwrap
 
 import torch
 from sagemaker_inference import (
@@ -29,9 +28,21 @@
 DEFAULT_MODEL_FILENAME = "model.pt"
 
 
+class ModelLoadError(Exception):
+    pass
+
+
 class DefaultPytorchInferenceHandler(default_inference_handler.DefaultInferenceHandler):
     VALID_CONTENT_TYPES = (content_types.JSON, content_types.NPY)
 
+    @staticmethod
+    def _is_model_file(filename):
+        is_model_file = False
+        if os.path.isfile(filename):
+            _, ext = os.path.splitext(filename)
+            is_model_file = ext in [".pt", ".pth"]
+        return is_model_file
+
     def default_model_fn(self, model_dir):
         """Loads a model. For PyTorch, a default function to load a model only if Elastic Inference is used.
         In other cases, users should provide customized model_fn() in script.
@@ -47,12 +58,30 @@ def default_model_fn(self, model_dir):
                 raise FileNotFoundError("Failed to load model with default model_fn: missing file {}."
                                         .format(DEFAULT_MODEL_FILENAME))
             # Client-framework is CPU only. But model will run in Elastic Inference server with CUDA.
-            return torch.jit.load(model_path, map_location=torch.device('cpu'))
+            try:
+                return torch.jit.load(model_path, map_location=torch.device('cpu'))
+            except RuntimeError as e:
+                raise ModelLoadError(
+                    "Failed to load {}. Please ensure model is saved using torchscript.".format(model_path)
+                ) from e
         else:
-            raise NotImplementedError(textwrap.dedent("""
-            Please provide a model_fn implementation.
-            See documentation for model_fn at https://github.com/aws/sagemaker-python-sdk
-            """))
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            model_path = os.path.join(model_dir, DEFAULT_MODEL_FILENAME)
+            if not os.path.exists(model_path):
+                model_files = [file for file in os.listdir(model_dir) if self._is_model_file(file)]
+                if len(model_files) != 1:
+                    raise ValueError(
+                        "Exactly one .pth or .pt file is required for PyTorch models: {}".format(model_files)
+                    )
+                model_path = os.path.join(model_dir, model_files[0])
+            try:
+                model = torch.jit.load(model_path, map_location=device)
+            except RuntimeError as e:
+                raise ModelLoadError(
+                    "Failed to load {}. Please ensure model is saved using torchscript.".format(model_path)
+                ) from e
+            model = model.to(device)
+            return model
 
     def default_input_fn(self, input_data, content_type):
         """A default input_fn that can handle JSON, CSV and NPZ formats.

diff --git a/test/integration/__init__.py b/test/integration/__init__.py
@@ -18,12 +18,16 @@
 
 resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'resources'))
 mnist_path = os.path.join(resources_path, 'mnist')
+resnet18_path = os.path.join(resources_path, 'resnet18')
 data_dir = os.path.join(mnist_path, 'data')
 training_dir = os.path.join(data_dir, 'training')
 cpu_sub_dir = 'model_cpu'
 gpu_sub_dir = 'model_gpu'
 eia_sub_dir = 'model_eia'
 code_sub_dir = 'code'
+default_sub_dir = 'default_model'
+default_sub_eia_dir = 'default_model_eia'
+default_sub_traced_resnet_dir = 'default_traced_resnet'
 
 model_cpu_dir = os.path.join(mnist_path, cpu_sub_dir)
 mnist_cpu_script = os.path.join(model_cpu_dir, code_sub_dir, 'mnist.py')
@@ -59,6 +63,28 @@
                                                  "model_call_model_fn_once.tar.gz",
                                                  script_path="code")
 
+default_model_dir = os.path.join(resnet18_path, default_sub_dir)
+default_model_script = os.path.join(default_model_dir, code_sub_dir, "resnet18.py")
+default_model_tar = file_utils.make_tarfile(
+    default_model_script, os.path.join(default_model_dir, "model.pt"), default_model_dir, script_path="code"
+)
+
+default_traced_resnet_dir = os.path.join(resnet18_path, default_sub_traced_resnet_dir)
+default_traced_resnet_script = os.path.join(default_traced_resnet_dir, code_sub_dir, "resnet18.py")
+default_model_traced_resnet18_tar = file_utils.make_tarfile(
+    default_traced_resnet_script,
+    os.path.join(default_traced_resnet_dir, "traced_resnet18.pt"),
+    default_traced_resnet_dir,
+    filename="traced_resnet18.tar.gz",
+    script_path="code",
+)
+
+default_model_eia_dir = os.path.join(mnist_path, default_sub_eia_dir)
+default_model_eia_script = os.path.join(default_model_eia_dir, code_sub_dir, "mnist.py")
+default_model_eia_tar = file_utils.make_tarfile(
+    default_model_eia_script, os.path.join(default_model_eia_dir, "model.pt"), default_model_eia_dir
+)
+
 ROLE = 'dummy/unused-role'
 DEFAULT_TIMEOUT = 20
 PYTHON3 = 'py3'

diff --git a/test/integration/sagemaker/test_default_inference.py b/test/integration/sagemaker/test_default_inference.py
@@ -0,0 +1,130 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import json
+import numpy as np
+import pytest
+import requests
+import sagemaker
+from sagemaker.predictor import RealTimePredictor
+from sagemaker.pytorch import PyTorchModel, PyTorchPredictor
+
+from integration import (
+    default_model_script,
+    default_model_tar,
+    default_traced_resnet_script,
+    default_model_traced_resnet18_tar,
+    default_model_eia_script,
+    default_model_eia_tar,
+)
+from integration.sagemaker.timeout import timeout_and_delete_endpoint
+
+
+@pytest.mark.cpu_test
+def test_default_inference_cpu(sagemaker_session, image_uri, instance_type):
+    instance_type = instance_type or "ml.c4.xlarge"
+    # Scripted model is serialized with torch.jit.save().
+    # Default inference test doesn't need to instantiate model definition
+    _test_default_inference(
+        sagemaker_session, image_uri, instance_type, default_model_tar, default_model_script
+    )
+
+
+@pytest.mark.gpu_test
+def test_default_inference_gpu(sagemaker_session, image_uri, instance_type):
+    instance_type = instance_type or "ml.p2.xlarge"
+    # Scripted model is serialized with torch.jit.save().
+    # Default inference test doesn't need to instantiate model definition
+    _test_default_inference(
+        sagemaker_session, image_uri, instance_type, default_model_tar, default_model_script
+    )
+
+
+@pytest.mark.skip(
+    reason="Latest EIA version - 1.5.1 uses mms. Enable when EIA images use torchserve"
+)
+@pytest.mark.eia_test
+def test_default_inference_eia(sagemaker_session, image_uri, instance_type, accelerator_type):
+    instance_type = instance_type or "ml.c4.xlarge"
+    # Scripted model is serialized with torch.jit.save().
+    # Default inference test doesn't need to instantiate model definition
+    _test_default_inference(
+        sagemaker_session,
+        image_uri,
+        instance_type,
+        default_model_eia_tar,
+        default_model_eia_script,
+        accelerator_type=accelerator_type,
+    )
+
+
+@pytest.mark.gpu_test
+def test_default_inference_any_model_name_gpu(sagemaker_session, image_uri, instance_type):
+    instance_type = instance_type or "ml.p2.xlarge"
+    # Scripted model is serialized with torch.jit.save().
+    # Default inference test doesn't need to instantiate model definition
+    _test_default_inference(
+        sagemaker_session,
+        image_uri,
+        instance_type,
+        default_model_traced_resnet18_tar,
+        default_traced_resnet_script,
+    )
+
+
+def _test_default_inference(
+    sagemaker_session, image_uri, instance_type, model_tar, mnist_script, accelerator_type=None
+):
+    endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-pytorch-serving")
+
+    model_data = sagemaker_session.upload_data(
+        path=model_tar,
+        key_prefix="sagemaker-pytorch-serving/models",
+    )
+
+    pytorch = PyTorchModel(
+        model_data=model_data,
+        role="SageMakerRole",
+        predictor_cls=RealTimePredictor if not accelerator_type else PyTorchPredictor,
+        entry_point=mnist_script,
+        image=image_uri,
+        sagemaker_session=sagemaker_session,
+    )
+    with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=30):
+        predictor = pytorch.deploy(
+            initial_instance_count=1,
+            instance_type=instance_type,
+            accelerator_type=accelerator_type,
+            endpoint_name=endpoint_name,
+        )
+
+        if accelerator_type:
+            batch_size = 100
+            data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32)
+            output = predictor.predict(data)
+            assert output.shape == (batch_size, 10)
+        else:
+            image_url = (
+                "https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/master/"
+                "sagemaker_neo_compilation_jobs/pytorch_torchvision/cat.jpg"
+            )
+            img_data = requests.get(image_url).content
+            with open("cat.jpg", "wb") as file_obj:
+                file_obj.write(img_data)
+            with open("cat.jpg", "rb") as f:
+                payload = f.read()
+                payload = bytearray(payload)
+            response = predictor.predict(payload)
+            result = json.loads(response.decode())
+            assert len(result) == 1000
diff --git a/test/integration/sagemaker/test_mnist.py b/test/integration/sagemaker/test_mnist.py
@@ -34,7 +34,7 @@ def test_mnist_gpu(sagemaker_session, image_uri, instance_type):
     _test_mnist_distributed(sagemaker_session, image_uri, instance_type, model_gpu_tar, mnist_gpu_script)
 
 
-@pytest.mark.skip(reason="Latest EIA version is too old - 1.3.1. Remove this after a new DLC release")
+@pytest.mark.skip(reason="Latest EIA version - 1.5.1 uses mms. Enable when EIA images use torchserve")
 @pytest.mark.eia_test
 def test_mnist_eia(sagemaker_session, image_uri, instance_type, accelerator_type):
     instance_type = instance_type or 'ml.c4.xlarge'

diff --git a/test/resources/mnist/default_model_eia/code/mnist.py b/test/resources/mnist/default_model_eia/code/mnist.py
@@ -0,0 +1,35 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+import logging
+import os
+import sys
+
+import torch
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+logger.addHandler(logging.StreamHandler(sys.stdout))
+
+
+def predict_fn(input_data, model):
+    logger.info('Performing EIA inference with Torch JIT context with input of size {}'.format(input_data.shape))
+    # With EI, client instance should be CPU for cost-efficiency.
+    # Sub-graphs with unsupported arguments run locally. Server runs with CUDA
+    device = torch.device('cpu')
+    model = model.to(device)
+    input_data = input_data.to(device)
+    with torch.no_grad():
+        # Set the target device to the accelerator ordinal
+        with torch.jit.optimized_execution(True, {'target_device': 'eia:0'}):
+            return model(input_data)
diff --git a/test/resources/mnist/default_model_eia/model.pt b/test/resources/mnist/default_model_eia/model.pt
diff --git a/test/resources/resnet18/default_model/code/resnet18.py b/test/resources/resnet18/default_model/code/resnet18.py
@@ -0,0 +1,51 @@
+import io
+import json
+import logging
+
+import numpy as np
+import torch
+import torchvision.transforms as transforms
+from PIL import Image  # Training container doesn't have this package
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+
+def transform_fn(model, payload, request_content_type, response_content_type):
+
+    logger.info("Invoking user-defined transform function")
+
+    if request_content_type and request_content_type != "application/octet-stream":
+        raise RuntimeError(
+            "Content type must be application/octet-stream. Provided: {0}".format(
+                request_content_type
+            )
+        )
+
+    # preprocess
+    decoded = Image.open(io.BytesIO(payload))
+    preprocess = transforms.Compose(
+        [
+            transforms.Resize(256),
+            transforms.CenterCrop(224),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ]
+    )
+    normalized = preprocess(decoded)
+    batchified = normalized.unsqueeze(0)
+
+    # predict
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    batchified = batchified.to(device)
+    result = model.forward(batchified)
+
+    # Softmax (assumes batch size 1)
+    result = np.squeeze(result.cpu().detach().numpy())
+    result_exp = np.exp(result - np.max(result))
+    result = result_exp / np.sum(result_exp)
+
+    response_body = json.dumps(result.tolist())
+    content_type = "application/json"
+
+    return response_body, content_type
diff --git a/test/resources/resnet18/default_model/model.pt b/test/resources/resnet18/default_model/model.pt