Skip to content

change: Enable default model fn for cpu and gpu #107

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Oct 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion buildspec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ phases:
- GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID"
- DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID"
- DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID"
- DLC_EIA_TAG="$FRAMEWORK_VERSION-dlc-eia-$BUILD_ID"
- DLC_EIA_TAG="$EIA_FRAMEWORK_VERSION-dlc-eia-$BUILD_ID"

# run local CPU integration tests (build and push the image to ECR repo)
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from __future__ import absolute_import

import os
import textwrap

import torch
from sagemaker_inference import (
Expand All @@ -29,9 +28,21 @@
DEFAULT_MODEL_FILENAME = "model.pt"


class ModelLoadError(Exception):
pass


class DefaultPytorchInferenceHandler(default_inference_handler.DefaultInferenceHandler):
VALID_CONTENT_TYPES = (content_types.JSON, content_types.NPY)

@staticmethod
def _is_model_file(filename):
is_model_file = False
if os.path.isfile(filename):
_, ext = os.path.splitext(filename)
is_model_file = ext in [".pt", ".pth"]
return is_model_file

def default_model_fn(self, model_dir):
"""Loads a model. For PyTorch, a default function to load a model only if Elastic Inference is used.
In other cases, users should provide customized model_fn() in script.
Expand All @@ -47,12 +58,30 @@ def default_model_fn(self, model_dir):
raise FileNotFoundError("Failed to load model with default model_fn: missing file {}."
.format(DEFAULT_MODEL_FILENAME))
# Client-framework is CPU only. But model will run in Elastic Inference server with CUDA.
return torch.jit.load(model_path, map_location=torch.device('cpu'))
try:
return torch.jit.load(model_path, map_location=torch.device('cpu'))
except RuntimeError as e:
raise ModelLoadError(
"Failed to load {}. Please ensure model is saved using torchscript.".format(model_path)
) from e
else:
raise NotImplementedError(textwrap.dedent("""
Please provide a model_fn implementation.
See documentation for model_fn at https://github.com/aws/sagemaker-python-sdk
"""))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = os.path.join(model_dir, DEFAULT_MODEL_FILENAME)
if not os.path.exists(model_path):
model_files = [file for file in os.listdir(model_dir) if self._is_model_file(file)]
if len(model_files) != 1:
raise ValueError(
"Exactly one .pth or .pt file is required for PyTorch models: {}".format(model_files)
)
model_path = os.path.join(model_dir, model_files[0])
try:
model = torch.jit.load(model_path, map_location=device)
except RuntimeError as e:
raise ModelLoadError(
"Failed to load {}. Please ensure model is saved using torchscript.".format(model_path)
) from e
model = model.to(device)
return model

def default_input_fn(self, input_data, content_type):
"""A default input_fn that can handle JSON, CSV and NPZ formats.
Expand Down
26 changes: 26 additions & 0 deletions test/integration/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@

resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'resources'))
mnist_path = os.path.join(resources_path, 'mnist')
resnet18_path = os.path.join(resources_path, 'resnet18')
data_dir = os.path.join(mnist_path, 'data')
training_dir = os.path.join(data_dir, 'training')
cpu_sub_dir = 'model_cpu'
gpu_sub_dir = 'model_gpu'
eia_sub_dir = 'model_eia'
code_sub_dir = 'code'
default_sub_dir = 'default_model'
default_sub_eia_dir = 'default_model_eia'
default_sub_traced_resnet_dir = 'default_traced_resnet'

model_cpu_dir = os.path.join(mnist_path, cpu_sub_dir)
mnist_cpu_script = os.path.join(model_cpu_dir, code_sub_dir, 'mnist.py')
Expand Down Expand Up @@ -59,6 +63,28 @@
"model_call_model_fn_once.tar.gz",
script_path="code")

default_model_dir = os.path.join(resnet18_path, default_sub_dir)
default_model_script = os.path.join(default_model_dir, code_sub_dir, "resnet18.py")
default_model_tar = file_utils.make_tarfile(
default_model_script, os.path.join(default_model_dir, "model.pt"), default_model_dir, script_path="code"
)

default_traced_resnet_dir = os.path.join(resnet18_path, default_sub_traced_resnet_dir)
default_traced_resnet_script = os.path.join(default_traced_resnet_dir, code_sub_dir, "resnet18.py")
default_model_traced_resnet18_tar = file_utils.make_tarfile(
default_traced_resnet_script,
os.path.join(default_traced_resnet_dir, "traced_resnet18.pt"),
default_traced_resnet_dir,
filename="traced_resnet18.tar.gz",
script_path="code",
)

default_model_eia_dir = os.path.join(mnist_path, default_sub_eia_dir)
default_model_eia_script = os.path.join(default_model_eia_dir, code_sub_dir, "mnist.py")
default_model_eia_tar = file_utils.make_tarfile(
default_model_eia_script, os.path.join(default_model_eia_dir, "model.pt"), default_model_eia_dir
)

ROLE = 'dummy/unused-role'
DEFAULT_TIMEOUT = 20
PYTHON3 = 'py3'
Expand Down
130 changes: 130 additions & 0 deletions test/integration/sagemaker/test_default_inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import

import json
import numpy as np
import pytest
import requests
import sagemaker
from sagemaker.predictor import RealTimePredictor
from sagemaker.pytorch import PyTorchModel, PyTorchPredictor

from integration import (
default_model_script,
default_model_tar,
default_traced_resnet_script,
default_model_traced_resnet18_tar,
default_model_eia_script,
default_model_eia_tar,
)
from integration.sagemaker.timeout import timeout_and_delete_endpoint


@pytest.mark.cpu_test
def test_default_inference_cpu(sagemaker_session, image_uri, instance_type):
instance_type = instance_type or "ml.c4.xlarge"
# Scripted model is serialized with torch.jit.save().
# Default inference test doesn't need to instantiate model definition
_test_default_inference(
sagemaker_session, image_uri, instance_type, default_model_tar, default_model_script
)


@pytest.mark.gpu_test
def test_default_inference_gpu(sagemaker_session, image_uri, instance_type):
instance_type = instance_type or "ml.p2.xlarge"
# Scripted model is serialized with torch.jit.save().
# Default inference test doesn't need to instantiate model definition
_test_default_inference(
sagemaker_session, image_uri, instance_type, default_model_tar, default_model_script
)


@pytest.mark.skip(
reason="Latest EIA version - 1.5.1 uses mms. Enable when EIA images use torchserve"
)
@pytest.mark.eia_test
def test_default_inference_eia(sagemaker_session, image_uri, instance_type, accelerator_type):
instance_type = instance_type or "ml.c4.xlarge"
# Scripted model is serialized with torch.jit.save().
# Default inference test doesn't need to instantiate model definition
_test_default_inference(
sagemaker_session,
image_uri,
instance_type,
default_model_eia_tar,
default_model_eia_script,
accelerator_type=accelerator_type,
)


@pytest.mark.gpu_test
def test_default_inference_any_model_name_gpu(sagemaker_session, image_uri, instance_type):
instance_type = instance_type or "ml.p2.xlarge"
# Scripted model is serialized with torch.jit.save().
# Default inference test doesn't need to instantiate model definition
_test_default_inference(
sagemaker_session,
image_uri,
instance_type,
default_model_traced_resnet18_tar,
default_traced_resnet_script,
)


def _test_default_inference(
sagemaker_session, image_uri, instance_type, model_tar, mnist_script, accelerator_type=None
):
endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-pytorch-serving")

model_data = sagemaker_session.upload_data(
path=model_tar,
key_prefix="sagemaker-pytorch-serving/models",
)

pytorch = PyTorchModel(
model_data=model_data,
role="SageMakerRole",
predictor_cls=RealTimePredictor if not accelerator_type else PyTorchPredictor,
entry_point=mnist_script,
image=image_uri,
sagemaker_session=sagemaker_session,
)
with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=30):
predictor = pytorch.deploy(
initial_instance_count=1,
instance_type=instance_type,
accelerator_type=accelerator_type,
endpoint_name=endpoint_name,
)

if accelerator_type:
batch_size = 100
data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32)
output = predictor.predict(data)
assert output.shape == (batch_size, 10)
else:
image_url = (
"https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/master/"
"sagemaker_neo_compilation_jobs/pytorch_torchvision/cat.jpg"
)
img_data = requests.get(image_url).content
with open("cat.jpg", "wb") as file_obj:
file_obj.write(img_data)
with open("cat.jpg", "rb") as f:
payload = f.read()
payload = bytearray(payload)
response = predictor.predict(payload)
result = json.loads(response.decode())
assert len(result) == 1000
2 changes: 1 addition & 1 deletion test/integration/sagemaker/test_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_mnist_gpu(sagemaker_session, image_uri, instance_type):
_test_mnist_distributed(sagemaker_session, image_uri, instance_type, model_gpu_tar, mnist_gpu_script)


@pytest.mark.skip(reason="Latest EIA version is too old - 1.3.1. Remove this after a new DLC release")
@pytest.mark.skip(reason="Latest EIA version - 1.5.1 uses mms. Enable when EIA images use torchserve")
@pytest.mark.eia_test
def test_mnist_eia(sagemaker_session, image_uri, instance_type, accelerator_type):
instance_type = instance_type or 'ml.c4.xlarge'
Expand Down
35 changes: 35 additions & 0 deletions test/resources/mnist/default_model_eia/code/mnist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import
import logging
import os
import sys

import torch

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler(sys.stdout))


def predict_fn(input_data, model):
logger.info('Performing EIA inference with Torch JIT context with input of size {}'.format(input_data.shape))
# With EI, client instance should be CPU for cost-efficiency.
# Sub-graphs with unsupported arguments run locally. Server runs with CUDA
device = torch.device('cpu')
model = model.to(device)
input_data = input_data.to(device)
with torch.no_grad():
# Set the target device to the accelerator ordinal
with torch.jit.optimized_execution(True, {'target_device': 'eia:0'}):
return model(input_data)
Binary file added test/resources/mnist/default_model_eia/model.pt
Binary file not shown.
51 changes: 51 additions & 0 deletions test/resources/resnet18/default_model/code/resnet18.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import io
import json
import logging

import numpy as np
import torch
import torchvision.transforms as transforms
from PIL import Image # Training container doesn't have this package

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)


def transform_fn(model, payload, request_content_type, response_content_type):

logger.info("Invoking user-defined transform function")

if request_content_type and request_content_type != "application/octet-stream":
raise RuntimeError(
"Content type must be application/octet-stream. Provided: {0}".format(
request_content_type
)
)

# preprocess
decoded = Image.open(io.BytesIO(payload))
preprocess = transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
)
normalized = preprocess(decoded)
batchified = normalized.unsqueeze(0)

# predict
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batchified = batchified.to(device)
result = model.forward(batchified)

# Softmax (assumes batch size 1)
result = np.squeeze(result.cpu().detach().numpy())
result_exp = np.exp(result - np.max(result))
result = result_exp / np.sum(result_exp)

response_body = json.dumps(result.tolist())
content_type = "application/json"

return response_body, content_type
Binary file added test/resources/resnet18/default_model/model.pt
Binary file not shown.
Loading