diff --git a/buildspec.yml b/buildspec.yml index d9e70ebd..228c96a3 100644 --- a/buildspec.yml +++ b/buildspec.yml @@ -2,12 +2,10 @@ version: 0.2 env: variables: - FRAMEWORK_VERSION: '1.4.0' + FRAMEWORK_VERSION: '1.5.0' EIA_FRAMEWORK_VERSION: '1.3.1' - CPU_PY2_VERSION: '2' CPU_PY3_VERSION: '3' CPU_INSTANCE_TYPE: 'ml.c4.xlarge' - GPU_PY2_VERSION: '2' GPU_PY3_VERSION: '3' EIA_PY3_VERSION: '3' GPU_INSTANCE_TYPE: 'ml.p2.xlarge' @@ -40,16 +38,6 @@ phases: - gpu_dockerfile="Dockerfile.gpu" - eia_dockerfile="Dockerfile.eia" - # build py2 images - - build_dir="docker/$FRAMEWORK_VERSION/py$CPU_PY2_VERSION" - - cp -r docker/build_artifacts/* $build_dir/ - - CPU_PY2_TAG="$FRAMEWORK_VERSION-cpu-py2-$BUILD_ID" - - GPU_PY2_TAG="$FRAMEWORK_VERSION-gpu-py2-$BUILD_ID" - - build_cmd="docker build -f "$build_dir/$cpu_dockerfile" -t $PREPROD_IMAGE:$CPU_PY2_TAG $build_dir" - - execute-command-if-has-matching-changes "$build_cmd" "test/" "docker/*" "buildspec.yml" - - build_cmd="docker build -f "$build_dir/$gpu_dockerfile" -t $PREPROD_IMAGE:$GPU_PY2_TAG $build_dir" - - execute-command-if-has-matching-changes "$build_cmd" "test/" "docker/*" "buildspec.yml" - # build py3 image - build_dir="docker/$FRAMEWORK_VERSION/py$GPU_PY3_VERSION" - cp -r docker/build_artifacts/* $build_dir/ @@ -67,12 +55,8 @@ phases: # push images to ecr - $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION) - - push_cmd="docker push $PREPROD_IMAGE:$CPU_PY2_TAG" - - execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml" - push_cmd="docker push $PREPROD_IMAGE:$CPU_PY3_TAG" - execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml" - - push_cmd="docker push $PREPROD_IMAGE:$GPU_PY2_TAG" - - execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml" - push_cmd="docker push $PREPROD_IMAGE:$GPU_PY3_TAG" - execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml" - push_cmd="docker push $PREPROD_IMAGE:$EIA_PY3_TAG" @@ -86,9 +70,7 @@ phases: # run cpu integration tests - py3_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY3_VERSION --processor cpu --tag $CPU_PY3_TAG" - - py2_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY2_VERSION --processor cpu --tag $CPU_PY2_TAG" - execute-command-if-has-matching-changes "$py3_cmd" "test/" "docker/*" "buildspec.yml" - - execute-command-if-has-matching-changes "$py2_cmd" "test/" "docker/*" "buildspec.yml" # run gpu integration tests - printf "$SETUP_CMDS" > $SETUP_FILE @@ -96,21 +78,13 @@ phases: - py3_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$py3_pytest_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\"" - execute-command-if-has-matching-changes "$py3_cmd" "test/" "docker/*" "buildspec.yml" - - py2_pytest_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY2_VERSION --processor gpu --tag $GPU_PY2_TAG" - - py2_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$py2_pytest_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup" - - execute-command-if-has-matching-changes "$py2_cmd" "test/" "docker/*" "buildspec.yml" - # run cpu sagemaker tests - py3_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY3_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $CPU_PY3_TAG" - - py2_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY2_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $CPU_PY2_TAG" - execute-command-if-has-matching-changes "$py3_cmd" "test/" "docker/*" "buildspec.yml" - - execute-command-if-has-matching-changes "$py2_cmd" "test/" "docker/*" "buildspec.yml" # run gpu sagemaker tests - py3_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY3_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GPU_PY3_TAG" - - py2_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY2_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GPU_PY2_TAG" - execute-command-if-has-matching-changes "$py3_cmd" "test/" "docker/*" "buildspec.yml" - - execute-command-if-has-matching-changes "$py2_cmd" "test/" "docker/*" "buildspec.yml" # run eia sagemaker tests - py3_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $EIA_FRAMEWORK_VERSION --py-version $EIA_PY3_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --accelerator-type $EIA_ACCELERATOR_TYPE --tag $EIA_PY3_TAG" @@ -122,8 +96,6 @@ phases: - cleanup-key-pairs # remove ecr image - - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_PY2_TAG - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_PY3_TAG - - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_PY2_TAG - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_PY3_TAG - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$EIA_PY3_TAG diff --git a/docker/1.5.0/py3/Dockerfile.cpu b/docker/1.5.0/py3/Dockerfile.cpu new file mode 100644 index 00000000..2c13e9eb --- /dev/null +++ b/docker/1.5.0/py3/Dockerfile.cpu @@ -0,0 +1,88 @@ +FROM ubuntu:16.04 + +LABEL maintainer="Amazon AI" +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true + +ARG PYTHON_VERSION=3.6.6 +ARG PYTORCH_VERSION=1.5.0 +ARG TORCHVISION_VERSION=0.6.0 +ARG MMS_VERSION=1.0.8 + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH +ENV PATH /opt/conda/bin:$PATH +ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main +ENV TEMP=/home/model-server/tmp + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + cmake \ + curl \ + git \ + jq \ + libgl1-mesa-glx \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender-dev \ + openjdk-8-jdk-headless \ + vim \ + wget \ + zlib1g-dev + +RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && chmod +x ~/miniconda.sh \ + && ~/miniconda.sh -b -p /opt/conda \ + && rm ~/miniconda.sh \ + && /opt/conda/bin/conda update conda \ + && /opt/conda/bin/conda install -y \ + python=$PYTHON_VERSION \ + cython==0.29.12 \ + ipython==7.7.0 \ + mkl-include==2019.4 \ + mkl==2019.4 \ + numpy==1.16.4 \ + scipy==1.3.0 \ + typing==3.6.4 \ + && /opt/conda/bin/conda clean -ya + +RUN conda install -c \ + conda-forge \ + opencv==4.0.1 \ + && conda install -y \ + scikit-learn==0.21.2 \ + pandas==0.25.0 \ + h5py==2.9.0 \ + requests==2.22.0 \ + && conda clean -ya \ + && pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ + && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \ + && pip install mxnet-model-server==$MMS_VERSION + + # Uninstall and re-install torch and torchvision from the PyTorch website +RUN pip uninstall -y torch \ + && pip uninstall -y torchvision \ + && pip install torch==$PYTORCH_VERSION+cpu torchvision==$TORCHVISION_VERSION+cpu -f https://download.pytorch.org/whl/torch_stable.html + +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp \ + && chown -R model-server /home/model-server + +COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py +COPY config.properties /home/model-server + +RUN chmod +x /usr/local/bin/dockerd-entrypoint.py + +RUN pip install --no-cache-dir "sagemaker-pytorch-inference<2" + +RUN curl https://aws-dlc-licenses.s3.amazonaws.com/pytorch-1.5.0/license.txt -o /license.txt + +RUN conda install -y -c conda-forge pyyaml==5.3.1 +RUN pip install sagemaker-containers==2.8.6 pillow==7.1.0 awscli + +EXPOSE 8080 8081 +ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] +CMD ["mxnet-model-server", "--start", "--mms-config", "/home/model-server/config.properties"] diff --git a/docker/1.5.0/py3/Dockerfile.gpu b/docker/1.5.0/py3/Dockerfile.gpu new file mode 100644 index 00000000..403660f0 --- /dev/null +++ b/docker/1.5.0/py3/Dockerfile.gpu @@ -0,0 +1,103 @@ +FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu16.04 +# NCCL_VERSION=2.4.7, CUDNN_VERSION=7.6.2.24 +LABEL maintainer="Amazon AI" +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true + +# Add arguments to achieve the version, python and url +ARG PYTHON_VERSION=3.6.6 +ARG PYTORCH_VERSION=1.5.0 +ARG TORCHVISION_VERSION=0.6.0 +ARG MMS_VERSION=1.0.8 + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH +ENV PATH /opt/conda/bin:$PATH +ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main +ENV TEMP=/home/model-server/tmp + +RUN apt-get update \ + && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \ + build-essential \ + ca-certificates \ + cmake \ + curl \ + git \ + jq \ + libgl1-mesa-glx \ + libglib2.0-0 \ + libgomp1 \ + libibverbs-dev \ + libsm6 \ + libxext6 \ + libxrender-dev \ + openjdk-8-jdk-headless \ + vim \ + wget \ + zlib1g-dev + +# Install OpenSSH. Allow OpenSSH to talk to containers without asking for confirmation +RUN apt-get install -y --no-install-recommends \ + openssh-client \ + openssh-server \ + && mkdir -p /var/run/sshd \ + && cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new \ + && echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new \ + && mv /etc/ssh/ssh_config.new /etc/ssh/ssh_configs + +RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && chmod +x ~/miniconda.sh \ + && ~/miniconda.sh -b -p /opt/conda \ + && rm ~/miniconda.sh \ + && /opt/conda/bin/conda update conda \ + && /opt/conda/bin/conda install -y \ + python=$PYTHON_VERSION \ + cython==0.29.12 \ + ipython==7.7.0 \ + mkl-include==2019.4 \ + mkl==2019.4 \ + numpy==1.16.4 \ + scipy==1.3.0 \ + typing==3.6.4 \ + && /opt/conda/bin/conda clean -ya + +RUN conda install -c \ + pytorch magma-cuda101 \ + && conda install -c \ + conda-forge \ + opencv==4.0.1 \ + && conda install -y \ + scikit-learn==0.21.2 \ + pandas==0.25.0 \ + h5py==2.9.0 \ + requests==2.22.0 \ + && conda clean -ya \ + && /opt/conda/bin/conda config --set ssl_verify False \ + && pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ + && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \ + && pip install mxnet-model-server==$MMS_VERSION + +# Uninstall and re-install torch and torchvision from the PyTorch website +RUN pip uninstall -y torch \ + && pip uninstall -y torchvision \ + && pip install torch==$PYTORCH_VERSION+cu101 torchvision==$TORCHVISION_VERSION+cu101 -f https://download.pytorch.org/whl/torch_stable.html + +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp \ + && chown -R model-server /home/model-server + +COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py +COPY config.properties /home/model-server + +RUN chmod +x /usr/local/bin/dockerd-entrypoint.py + +RUN pip install --no-cache-dir "sagemaker-pytorch-inference<2" + +RUN curl https://aws-dlc-licenses.s3.amazonaws.com/pytorch-1.5.0/license.txt -o /license.txt + +RUN conda install -y -c conda-forge pyyaml==5.3.1 +RUN pip install sagemaker-containers==2.8.6 pillow==7.1.0 awscli + +EXPOSE 8080 8081 +ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] +CMD ["mxnet-model-server", "--start", "--mms-config", "/home/model-server/config.properties"]