Skip to content

Commit 99655b2

Browse files
TusharKanekiDeyTushar Dey
and
Tushar Dey
authored
Adding Dockerfile for PT1.5 (#71)
* Pytorch 1.5 version for Inference * Adding new line at the eof Co-authored-by: Tushar Dey <[email protected]>
1 parent b3bd2c1 commit 99655b2

File tree

3 files changed

+192
-29
lines changed

3 files changed

+192
-29
lines changed

buildspec.yml

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,10 @@ version: 0.2
22

33
env:
44
variables:
5-
FRAMEWORK_VERSION: '1.4.0'
5+
FRAMEWORK_VERSION: '1.5.0'
66
EIA_FRAMEWORK_VERSION: '1.3.1'
7-
CPU_PY2_VERSION: '2'
87
CPU_PY3_VERSION: '3'
98
CPU_INSTANCE_TYPE: 'ml.c4.xlarge'
10-
GPU_PY2_VERSION: '2'
119
GPU_PY3_VERSION: '3'
1210
EIA_PY3_VERSION: '3'
1311
GPU_INSTANCE_TYPE: 'ml.p2.xlarge'
@@ -40,16 +38,6 @@ phases:
4038
- gpu_dockerfile="Dockerfile.gpu"
4139
- eia_dockerfile="Dockerfile.eia"
4240

43-
# build py2 images
44-
- build_dir="docker/$FRAMEWORK_VERSION/py$CPU_PY2_VERSION"
45-
- cp -r docker/build_artifacts/* $build_dir/
46-
- CPU_PY2_TAG="$FRAMEWORK_VERSION-cpu-py2-$BUILD_ID"
47-
- GPU_PY2_TAG="$FRAMEWORK_VERSION-gpu-py2-$BUILD_ID"
48-
- build_cmd="docker build -f "$build_dir/$cpu_dockerfile" -t $PREPROD_IMAGE:$CPU_PY2_TAG $build_dir"
49-
- execute-command-if-has-matching-changes "$build_cmd" "test/" "docker/*" "buildspec.yml"
50-
- build_cmd="docker build -f "$build_dir/$gpu_dockerfile" -t $PREPROD_IMAGE:$GPU_PY2_TAG $build_dir"
51-
- execute-command-if-has-matching-changes "$build_cmd" "test/" "docker/*" "buildspec.yml"
52-
5341
# build py3 image
5442
- build_dir="docker/$FRAMEWORK_VERSION/py$GPU_PY3_VERSION"
5543
- cp -r docker/build_artifacts/* $build_dir/
@@ -67,12 +55,8 @@ phases:
6755

6856
# push images to ecr
6957
- $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
70-
- push_cmd="docker push $PREPROD_IMAGE:$CPU_PY2_TAG"
71-
- execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml"
7258
- push_cmd="docker push $PREPROD_IMAGE:$CPU_PY3_TAG"
7359
- execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml"
74-
- push_cmd="docker push $PREPROD_IMAGE:$GPU_PY2_TAG"
75-
- execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml"
7660
- push_cmd="docker push $PREPROD_IMAGE:$GPU_PY3_TAG"
7761
- execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml"
7862
- push_cmd="docker push $PREPROD_IMAGE:$EIA_PY3_TAG"
@@ -86,31 +70,21 @@ phases:
8670

8771
# run cpu integration tests
8872
- py3_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY3_VERSION --processor cpu --tag $CPU_PY3_TAG"
89-
- py2_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY2_VERSION --processor cpu --tag $CPU_PY2_TAG"
9073
- execute-command-if-has-matching-changes "$py3_cmd" "test/" "docker/*" "buildspec.yml"
91-
- execute-command-if-has-matching-changes "$py2_cmd" "test/" "docker/*" "buildspec.yml"
9274

9375
# run gpu integration tests
9476
- printf "$SETUP_CMDS" > $SETUP_FILE
9577
- py3_pytest_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY3_VERSION --processor gpu --tag $GPU_PY3_TAG"
9678
- py3_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$py3_pytest_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
9779
- execute-command-if-has-matching-changes "$py3_cmd" "test/" "docker/*" "buildspec.yml"
9880

99-
- py2_pytest_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY2_VERSION --processor gpu --tag $GPU_PY2_TAG"
100-
- py2_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$py2_pytest_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup"
101-
- execute-command-if-has-matching-changes "$py2_cmd" "test/" "docker/*" "buildspec.yml"
102-
10381
# run cpu sagemaker tests
10482
- py3_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY3_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $CPU_PY3_TAG"
105-
- py2_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY2_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $CPU_PY2_TAG"
10683
- execute-command-if-has-matching-changes "$py3_cmd" "test/" "docker/*" "buildspec.yml"
107-
- execute-command-if-has-matching-changes "$py2_cmd" "test/" "docker/*" "buildspec.yml"
10884

10985
# run gpu sagemaker tests
11086
- py3_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY3_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GPU_PY3_TAG"
111-
- py2_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY2_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GPU_PY2_TAG"
11287
- execute-command-if-has-matching-changes "$py3_cmd" "test/" "docker/*" "buildspec.yml"
113-
- execute-command-if-has-matching-changes "$py2_cmd" "test/" "docker/*" "buildspec.yml"
11488

11589
# run eia sagemaker tests
11690
- py3_cmd="pytest test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $EIA_FRAMEWORK_VERSION --py-version $EIA_PY3_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --accelerator-type $EIA_ACCELERATOR_TYPE --tag $EIA_PY3_TAG"
@@ -122,8 +96,6 @@ phases:
12296
- cleanup-key-pairs
12397

12498
# remove ecr image
125-
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_PY2_TAG
12699
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_PY3_TAG
127-
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_PY2_TAG
128100
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_PY3_TAG
129101
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$EIA_PY3_TAG

docker/1.5.0/py3/Dockerfile.cpu

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
FROM ubuntu:16.04
2+
3+
LABEL maintainer="Amazon AI"
4+
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
5+
LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
6+
7+
ARG PYTHON_VERSION=3.6.6
8+
ARG PYTORCH_VERSION=1.5.0
9+
ARG TORCHVISION_VERSION=0.6.0
10+
ARG MMS_VERSION=1.0.8
11+
12+
# See http://bugs.python.org/issue19846
13+
ENV LANG C.UTF-8
14+
ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH
15+
ENV PATH /opt/conda/bin:$PATH
16+
ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main
17+
ENV TEMP=/home/model-server/tmp
18+
19+
RUN apt-get update && apt-get install -y --no-install-recommends \
20+
build-essential \
21+
ca-certificates \
22+
cmake \
23+
curl \
24+
git \
25+
jq \
26+
libgl1-mesa-glx \
27+
libglib2.0-0 \
28+
libsm6 \
29+
libxext6 \
30+
libxrender-dev \
31+
openjdk-8-jdk-headless \
32+
vim \
33+
wget \
34+
zlib1g-dev
35+
36+
RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
37+
&& chmod +x ~/miniconda.sh \
38+
&& ~/miniconda.sh -b -p /opt/conda \
39+
&& rm ~/miniconda.sh \
40+
&& /opt/conda/bin/conda update conda \
41+
&& /opt/conda/bin/conda install -y \
42+
python=$PYTHON_VERSION \
43+
cython==0.29.12 \
44+
ipython==7.7.0 \
45+
mkl-include==2019.4 \
46+
mkl==2019.4 \
47+
numpy==1.16.4 \
48+
scipy==1.3.0 \
49+
typing==3.6.4 \
50+
&& /opt/conda/bin/conda clean -ya
51+
52+
RUN conda install -c \
53+
conda-forge \
54+
opencv==4.0.1 \
55+
&& conda install -y \
56+
scikit-learn==0.21.2 \
57+
pandas==0.25.0 \
58+
h5py==2.9.0 \
59+
requests==2.22.0 \
60+
&& conda clean -ya \
61+
&& pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
62+
&& ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
63+
&& pip install mxnet-model-server==$MMS_VERSION
64+
65+
# Uninstall and re-install torch and torchvision from the PyTorch website
66+
RUN pip uninstall -y torch \
67+
&& pip uninstall -y torchvision \
68+
&& pip install torch==$PYTORCH_VERSION+cpu torchvision==$TORCHVISION_VERSION+cpu -f https://download.pytorch.org/whl/torch_stable.html
69+
70+
RUN useradd -m model-server \
71+
&& mkdir -p /home/model-server/tmp \
72+
&& chown -R model-server /home/model-server
73+
74+
COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
75+
COPY config.properties /home/model-server
76+
77+
RUN chmod +x /usr/local/bin/dockerd-entrypoint.py
78+
79+
RUN pip install --no-cache-dir "sagemaker-pytorch-inference<2"
80+
81+
RUN curl https://aws-dlc-licenses.s3.amazonaws.com/pytorch-1.5.0/license.txt -o /license.txt
82+
83+
RUN conda install -y -c conda-forge pyyaml==5.3.1
84+
RUN pip install sagemaker-containers==2.8.6 pillow==7.1.0 awscli
85+
86+
EXPOSE 8080 8081
87+
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
88+
CMD ["mxnet-model-server", "--start", "--mms-config", "/home/model-server/config.properties"]

docker/1.5.0/py3/Dockerfile.gpu

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu16.04
2+
# NCCL_VERSION=2.4.7, CUDNN_VERSION=7.6.2.24
3+
LABEL maintainer="Amazon AI"
4+
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
5+
6+
# Add arguments to achieve the version, python and url
7+
ARG PYTHON_VERSION=3.6.6
8+
ARG PYTORCH_VERSION=1.5.0
9+
ARG TORCHVISION_VERSION=0.6.0
10+
ARG MMS_VERSION=1.0.8
11+
12+
# See http://bugs.python.org/issue19846
13+
ENV LANG C.UTF-8
14+
ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH
15+
ENV PATH /opt/conda/bin:$PATH
16+
ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main
17+
ENV TEMP=/home/model-server/tmp
18+
19+
RUN apt-get update \
20+
&& apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
21+
build-essential \
22+
ca-certificates \
23+
cmake \
24+
curl \
25+
git \
26+
jq \
27+
libgl1-mesa-glx \
28+
libglib2.0-0 \
29+
libgomp1 \
30+
libibverbs-dev \
31+
libsm6 \
32+
libxext6 \
33+
libxrender-dev \
34+
openjdk-8-jdk-headless \
35+
vim \
36+
wget \
37+
zlib1g-dev
38+
39+
# Install OpenSSH. Allow OpenSSH to talk to containers without asking for confirmation
40+
RUN apt-get install -y --no-install-recommends \
41+
openssh-client \
42+
openssh-server \
43+
&& mkdir -p /var/run/sshd \
44+
&& cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new \
45+
&& echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new \
46+
&& mv /etc/ssh/ssh_config.new /etc/ssh/ssh_configs
47+
48+
RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
49+
&& chmod +x ~/miniconda.sh \
50+
&& ~/miniconda.sh -b -p /opt/conda \
51+
&& rm ~/miniconda.sh \
52+
&& /opt/conda/bin/conda update conda \
53+
&& /opt/conda/bin/conda install -y \
54+
python=$PYTHON_VERSION \
55+
cython==0.29.12 \
56+
ipython==7.7.0 \
57+
mkl-include==2019.4 \
58+
mkl==2019.4 \
59+
numpy==1.16.4 \
60+
scipy==1.3.0 \
61+
typing==3.6.4 \
62+
&& /opt/conda/bin/conda clean -ya
63+
64+
RUN conda install -c \
65+
pytorch magma-cuda101 \
66+
&& conda install -c \
67+
conda-forge \
68+
opencv==4.0.1 \
69+
&& conda install -y \
70+
scikit-learn==0.21.2 \
71+
pandas==0.25.0 \
72+
h5py==2.9.0 \
73+
requests==2.22.0 \
74+
&& conda clean -ya \
75+
&& /opt/conda/bin/conda config --set ssl_verify False \
76+
&& pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
77+
&& ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
78+
&& pip install mxnet-model-server==$MMS_VERSION
79+
80+
# Uninstall and re-install torch and torchvision from the PyTorch website
81+
RUN pip uninstall -y torch \
82+
&& pip uninstall -y torchvision \
83+
&& pip install torch==$PYTORCH_VERSION+cu101 torchvision==$TORCHVISION_VERSION+cu101 -f https://download.pytorch.org/whl/torch_stable.html
84+
85+
RUN useradd -m model-server \
86+
&& mkdir -p /home/model-server/tmp \
87+
&& chown -R model-server /home/model-server
88+
89+
COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
90+
COPY config.properties /home/model-server
91+
92+
RUN chmod +x /usr/local/bin/dockerd-entrypoint.py
93+
94+
RUN pip install --no-cache-dir "sagemaker-pytorch-inference<2"
95+
96+
RUN curl https://aws-dlc-licenses.s3.amazonaws.com/pytorch-1.5.0/license.txt -o /license.txt
97+
98+
RUN conda install -y -c conda-forge pyyaml==5.3.1
99+
RUN pip install sagemaker-containers==2.8.6 pillow==7.1.0 awscli
100+
101+
EXPOSE 8080 8081
102+
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
103+
CMD ["mxnet-model-server", "--start", "--mms-config", "/home/model-server/config.properties"]

0 commit comments

Comments
 (0)