Adding PyTorch EI Support (#38)

dfan · web-flow · commit b903090ee88a · 2020-02-21T14:08:53.000-08:00
* PyTorch Elastic Inference DLC dockerfile

* Add EI inference script and model tar.gz
diff --git a/buildspec.yml b/buildspec.yml
@@ -3,12 +3,15 @@ version: 0.2
 env:
   variables:
     FRAMEWORK_VERSION: '1.4.0'
+    EIA_FRAMEWORK_VERSION: '1.3.1'
     CPU_PY2_VERSION: '2'
     CPU_PY3_VERSION: '3'
     CPU_INSTANCE_TYPE: 'ml.c4.xlarge'
     GPU_PY2_VERSION: '2'
     GPU_PY3_VERSION: '3'
+    EIA_PY3_VERSION: '3'
     GPU_INSTANCE_TYPE: 'ml.p2.xlarge'
+    EIA_ACCELERATOR_TYPE: 'ml.eia2.medium'
     LOCAL_BASE_REPO: 'pytorch-base'
     ECR_REPO: 'sagemaker-test'
     GITHUB_REPO: 'sagemaker-pytorch-serving-container'
@@ -44,6 +47,7 @@ phases:
 
       - cpu_dockerfile="Dockerfile.cpu"
       - gpu_dockerfile="Dockerfile.gpu"
+      - eia_dockerfile="Dockerfile.eia"
 
       # build py2 images
       - build_dir="docker/$FRAMEWORK_VERSION/py$CPU_PY2_VERSION"
@@ -60,15 +64,22 @@ phases:
       - cp -r docker/build_artifacts/* $build_dir/
       - CPU_PY3_TAG="$FRAMEWORK_VERSION-cpu-py3-$BUILD_ID"
       - GPU_PY3_TAG="$FRAMEWORK_VERSION-gpu-py3-$BUILD_ID"
+      - EIA_PY3_TAG="$EIA_FRAMEWORK_VERSION-eia-py3-$BUILD_ID"
       - docker build -f "$build_dir/$cpu_dockerfile" -t $PREPROD_IMAGE:$CPU_PY3_TAG $build_dir
       - docker build -f "$build_dir/$gpu_dockerfile" -t $PREPROD_IMAGE:$GPU_PY3_TAG $build_dir
+      # PY2 not offered for EIA PyTorch
+      - eia_build_dir="docker/$EIA_FRAMEWORK_VERSION/py$EIA_PY3_VERSION"
+      - cp sagemaker_pytorch_inference.tar.gz $eia_build_dir/
+      - cp -r docker/build_artifacts/* $eia_build_dir/
+      - docker build -f "$eia_build_dir/$eia_dockerfile" -t $PREPROD_IMAGE:$EIA_PY3_TAG $eia_build_dir
 
       # push images to ecr
       - $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
       - docker push $PREPROD_IMAGE:$CPU_PY2_TAG
       - docker push $PREPROD_IMAGE:$CPU_PY3_TAG
       - docker push $PREPROD_IMAGE:$GPU_PY2_TAG
       - docker push $PREPROD_IMAGE:$GPU_PY3_TAG
+      - docker push $PREPROD_IMAGE:$EIA_PY3_TAG
 
       # launch remote gpu instance
       - prefix='ml.'
@@ -104,6 +115,10 @@ phases:
       - execute-command-if-has-matching-changes "$py3_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "docker/*" "buildspec.yml"
       - execute-command-if-has-matching-changes "$py2_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "docker/*" "buildspec.yml"
 
+      # run eia sagemaker tests
+      - py3_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $EIA_FRAMEWORK_VERSION --py-version $EIA_PY3_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --accelerator-type $EIA_ACCELERATOR_TYPE --tag $EIA_PY3_TAG"
+      - execute-command-if-has-matching-changes "$py3_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "docker/*" "buildspec.yml"
+
     finally:
       # shut down remote gpu instance
       - cleanup-gpu-instances
@@ -114,3 +129,4 @@ phases:
       - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_PY3_TAG
       - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_PY2_TAG
       - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_PY3_TAG
+      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$EIA_PY3_TAG
diff --git a/docker/1.3.1/py3/Dockerfile.eia b/docker/1.3.1/py3/Dockerfile.eia
@@ -0,0 +1,109 @@
+FROM ubuntu:16.04
+LABEL maintainer="Amazon AI"
+LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
+
+# Add arguments to achieve the version, python and url
+ARG PYTHON_VERSION=3.6.6
+ARG PYTORCH_VERSION=1.3.1
+ARG TORCHVISION_VERSION=0.4.2
+ARG GRAPHVIZ_VERSION=0.13.2
+ARG MMS_VERSION=1.0.8
+ARG HEALTH_CHECK_VERSION=1.5.3
+
+# See http://bugs.python.org/issue19846
+ENV LANG C.UTF-8
+ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH
+ENV PATH /opt/conda/bin:$PATH
+ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main
+ENV TEMP=/home/model-server/tmp
+
+RUN apt-get update \
+ && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
+    build-essential \
+    ca-certificates \
+    cmake \
+    curl \
+    git \
+    jq \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    libgomp1 \
+    libibverbs-dev \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    openjdk-8-jdk-headless \
+    vim \
+    wget \
+    zlib1g-dev 
+
+# Install OpenSSH. Allow OpenSSH to talk to containers without asking for confirmation
+RUN apt-get install -y --no-install-recommends \
+    openssh-client \
+    openssh-server \
+ && mkdir -p /var/run/sshd \
+ && cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new \
+ && echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new \
+ && mv /etc/ssh/ssh_config.new /etc/ssh/ssh_configs
+
+RUN curl -o ~/miniconda.sh -O  https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
+ && chmod +x ~/miniconda.sh \
+ && ~/miniconda.sh -b -p /opt/conda \
+ && rm ~/miniconda.sh \
+ && /opt/conda/bin/conda update conda \
+ && /opt/conda/bin/conda install -y \
+    python=$PYTHON_VERSION \
+    cython==0.29.12 \
+    ipython==7.7.0 \
+    numpy==1.16.4 \
+    scipy==1.3.0 \
+    typing==3.6.4 \
+ && /opt/conda/bin/conda clean -ya
+
+RUN conda install -c \
+    conda-forge \
+    awscli \
+    opencv==4.0.1 \
+ && conda install -y \
+    scikit-learn==0.21.2 \
+    pandas==0.25.0 \
+    pillow==6.2.1 \
+    h5py==2.9.0 \
+    requests==2.22.0 \
+ && conda clean -ya \
+ && /opt/conda/bin/conda config --set ssl_verify False \
+ && pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
+ && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
+ # Torchvision wheel must be installed first, so that PyTorch-EI framework is not overwritten.
+ && pip install https://download.pytorch.org/whl/cpu/torchvision-0.4.2%2Bcpu-cp36-cp36m-linux_x86_64.whl \
+ && pip install https://s3.amazonaws.com/amazonei-pytorch/torch_eia-1.3.1-cp36-cp36m-manylinux1_x86_64.whl \
+ && pip install graphviz==$GRAPHVIZ_VERSION \
+ && pip install mxnet-model-server==$MMS_VERSION
+
+RUN useradd -m model-server \
+ && mkdir -p /home/model-server/tmp \
+ && chown -R model-server /home/model-server
+
+COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
+COPY config.properties /home/model-server
+
+RUN chmod +x /usr/local/bin/dockerd-entrypoint.py
+
+COPY sagemaker_pytorch_inference.tar.gz /sagemaker_pytorch_inference.tar.gz
+RUN pip install --no-cache-dir \
+    /sagemaker_pytorch_inference.tar.gz \
+ && rm /sagemaker_pytorch_inference.tar.gz
+
+RUN curl https://aws-dlc-licenses.s3.amazonaws.com/pytorch/license.txt -o /license.txt
+
+RUN wget https://amazonei-tools.s3.amazonaws.com/v${HEALTH_CHECK_VERSION}/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz -O /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz \
+ && tar -xvf /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz -C /opt/ \
+ && rm -rf /opt/ei_tools_${HEALTH_CHECK_VERSION}.tar.gz \
+ && chmod a+x /opt/ei_tools/bin/health_check \
+ && mkdir -p /opt/ei_health_check/bin \
+ && ln -s /opt/ei_tools/bin/health_check /opt/ei_health_check/bin/health_check \
+ && ln -s /opt/ei_tools/lib /opt/ei_health_check/lib
+
+EXPOSE 8080 8081
+ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
+CMD ["mxnet-model-server", "--start", "--mms-config", "/home/model-server/config.properties"]
diff --git a/test/conftest.py b/test/conftest.py
@@ -48,10 +48,12 @@ def pytest_addoption(parser):
     parser.addoption('--build-base-image', '-B', action='store_true')
     parser.addoption('--aws-id')
     parser.addoption('--instance-type')
+    parser.addoption('--accelerator-type')
     parser.addoption('--docker-base-name', default='pytorch')
     parser.addoption('--region', default='us-west-2')
     parser.addoption('--framework-version', default=PyTorch.LATEST_VERSION)
     parser.addoption('--py-version', choices=['2', '3'], default=str(sys.version_info.major))
+    # Processor is still "cpu" for EIA tests
     parser.addoption('--processor', choices=['gpu', 'cpu'], default='cpu')
     # If not specified, will default to {framework-version}-{processor}-py{py-version}
     parser.addoption('--tag', default=None)
@@ -162,6 +164,11 @@ def fixture_instance_type(request, processor):
     return provided_instance_type or default_instance_type
 
 
+@pytest.fixture(name='accelerator_type', scope='session')
+def fixture_accelerator_type(request):
+    return request.config.getoption('--accelerator-type')
+
+
 @pytest.fixture(name='docker_registry', scope='session')
 def fixture_docker_registry(aws_id, region):
     return '{}.dkr.ecr.{}.amazonaws.com'.format(aws_id, region)
@@ -173,10 +180,22 @@ def fixture_ecr_image(docker_registry, docker_base_name, tag):
 
 
 @pytest.fixture(autouse=True)
-def skip_by_device_type(request, use_gpu, instance_type):
+def skip_by_device_type(request, use_gpu, instance_type, accelerator_type):
     is_gpu = use_gpu or instance_type[3] in ['g', 'p']
-    if (request.node.get_closest_marker('skip_gpu') and is_gpu) or \
-            (request.node.get_closest_marker('skip_cpu') and not is_gpu):
+    is_eia = accelerator_type is not None
+
+    # Separate out cases for clearer logic.
+    # When running GPU test, skip CPU test. When running CPU test, skip GPU test.
+    if (request.node.get_closest_marker('gpu_test') and not is_gpu) or \
+            (request.node.get_closest_marker('cpu_test') and is_gpu):
+        pytest.skip('Skipping because running on \'{}\' instance'.format(instance_type))
+
+    # When running EIA test, skip the CPU and GPU functions
+    elif (request.node.get_closest_marker('gpu_test') or request.node.get_closest_marker('cpu_test')) and is_eia:
+        pytest.skip('Skipping because running on \'{}\' instance'.format(instance_type))
+
+    # When running CPU or GPU test, skip EIA test.
+    elif request.node.get_closest_marker('eia_test') and not is_eia:
         pytest.skip('Skipping because running on \'{}\' instance'.format(instance_type))
 
 
diff --git a/test/integration/__init__.py b/test/integration/__init__.py
@@ -19,12 +19,19 @@
 mnist_script = os.path.join(mnist_path, 'mnist.py')
 data_dir = os.path.join(mnist_path, 'data')
 training_dir = os.path.join(data_dir, 'training')
+cpu_sub_dir = 'model_cpu'
+gpu_sub_dir = 'model_gpu'
+eia_sub_dir = 'model_eia'
 
-model_cpu_dir = os.path.join(mnist_path, 'model_cpu')
+model_cpu_dir = os.path.join(mnist_path, cpu_sub_dir)
+mnist_cpu_script = os.path.join(model_cpu_dir, 'mnist.py')
 model_cpu_1d_dir = os.path.join(model_cpu_dir, '1d')
 mnist_1d_script = os.path.join(model_cpu_1d_dir, 'mnist_1d.py')
-model_gpu_dir = os.path.join(mnist_path, 'model_gpu')
+model_gpu_dir = os.path.join(mnist_path, gpu_sub_dir)
+mnist_gpu_script = os.path.join(model_gpu_dir, 'mnist.py')
 model_gpu_1d_dir = os.path.join(model_gpu_dir, '1d')
+model_eia_dir = os.path.join(mnist_path, eia_sub_dir)
+mnist_eia_script = os.path.join(model_eia_dir, 'mnist.py')
 call_model_fn_once_script = os.path.join(resources_path, 'call_model_fn_once.py')
 
 ROLE = 'dummy/unused-role'
diff --git a/test/integration/sagemaker/test_mnist.py b/test/integration/sagemaker/test_mnist.py
@@ -19,41 +19,54 @@
 import sagemaker
 from sagemaker.pytorch import PyTorchModel
 
-from test.integration import mnist_script, model_cpu_dir
+from test.integration import model_cpu_dir, mnist_cpu_script, mnist_gpu_script, model_eia_dir, mnist_eia_script
 from test.integration.sagemaker.timeout import timeout_and_delete_endpoint
 
 
-@pytest.mark.skip_gpu
+@pytest.mark.cpu_test
 def test_mnist_distributed_cpu(sagemaker_session, ecr_image, instance_type):
     instance_type = instance_type or 'ml.c4.xlarge'
-    _test_mnist_distributed(sagemaker_session, ecr_image, instance_type)
+    model_dir = os.path.join(model_cpu_dir, 'model_mnist.tar.gz')
+    _test_mnist_distributed(sagemaker_session, ecr_image, instance_type, model_dir, mnist_cpu_script)
 
 
-@pytest.mark.skip_cpu
+@pytest.mark.gpu_test
 def test_mnist_distributed_gpu(sagemaker_session, ecr_image, instance_type):
     instance_type = instance_type or 'ml.p2.xlarge'
-    _test_mnist_distributed(sagemaker_session, ecr_image, instance_type)
+    model_dir = os.path.join(model_cpu_dir, 'model_mnist.tar.gz')
+    _test_mnist_distributed(sagemaker_session, ecr_image, instance_type, model_dir, mnist_gpu_script)
 
 
-def _test_mnist_distributed(sagemaker_session, ecr_image, instance_type):
-    model_dir = os.path.join(model_cpu_dir, 'model_mnist.tar.gz')
+@pytest.mark.eia_test
+def test_mnist_eia(sagemaker_session, ecr_image, instance_type, accelerator_type):
+    instance_type = instance_type or 'ml.c4.xlarge'
+    # Scripted model is serialized with torch.jit.save().
+    # Inference test for EIA doesn't need to instantiate model definition then load state_dict
+    model_dir = os.path.join(model_eia_dir, 'model_mnist.tar.gz')
+    _test_mnist_distributed(sagemaker_session, ecr_image, instance_type, model_dir, mnist_eia_script,
+                            accelerator_type=accelerator_type)
+
 
+def _test_mnist_distributed(sagemaker_session, ecr_image, instance_type, model_dir, mnist_script,
+                            accelerator_type=None):
     endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-pytorch-serving")
 
     model_data = sagemaker_session.upload_data(
         path=model_dir,
         key_prefix="sagemaker-pytorch-serving/models",
     )
 
-    pytorch = PyTorchModel(model_data,
-                           'SageMakerRole',
-                           mnist_script,
-                           image=ecr_image,
-                           sagemaker_session=sagemaker_session)
+    pytorch = PyTorchModel(model_data=model_data, role='SageMakerRole', entry_point=mnist_script,
+                           image=ecr_image, sagemaker_session=sagemaker_session)
 
     with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=30):
-        predictor = pytorch.deploy(initial_instance_count=1, instance_type=instance_type,
-                                   endpoint_name=endpoint_name)
+        # Use accelerator type to differentiate EI vs. CPU and GPU. Don't use processor value
+        if accelerator_type is not None:
+            predictor = pytorch.deploy(initial_instance_count=1, instance_type=instance_type,
+                                       accelerator_type=accelerator_type, endpoint_name=endpoint_name)
+        else:
+            predictor = pytorch.deploy(initial_instance_count=1, instance_type=instance_type,
+                                       endpoint_name=endpoint_name)
 
         batch_size = 100
         data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32)
diff --git a/test/resources/mnist/model_eia/mnist.py b/test/resources/mnist/model_eia/mnist.py
@@ -0,0 +1,48 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+import logging
+import os
+import sys
+
+import torch
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+logger.addHandler(logging.StreamHandler(sys.stdout))
+
+
+def predict_fn(input_data, model):
+    logger.info('Performing EIA inference with Torch JIT context with input of size {}'.format(input_data.shape))
+    # With EI, client instance should be CPU for cost-efficiency. Subgraphs with unsupported arguments run locally. Server runs with CUDA
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    mdoel = model.to(device)
+    input_data = input_data.to(device)
+    with torch.no_grad():
+        # Set the target device to the accelerator ordinal
+        with torch.jit.optimized_execution(True, {'target_device': 'eia:0'}):
+            return model(input_data)
+
+
+def model_fn(model_dir):
+    logger.info('model_fn: Loading model with TorchScript from {}'.format(model_dir))
+    # Scripted model is serialized with torch.jit.save().
+    # No need to instantiate model definition then load state_dict
+    model = torch.jit.load('model.pth')
+    return model
+
+
+def save_model(model, model_dir):
+    logger.info("Saving the model to {}.".format(model_dir))
+    path = os.path.join(model_dir, 'model.pth')
+    torch.jit.save(model, path)
diff --git a/test/resources/mnist/model_eia/model.tar.gz b/test/resources/mnist/model_eia/model.tar.gz
diff --git a/test/resources/mnist/model_eia/model_mnist.tar.gz b/test/resources/mnist/model_eia/model_mnist.tar.gz