1010# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
1111# ANY KIND, either express or implied. See the License for the specific
1212# language governing permissions and limitations under the License.
13- """Utility function to capture local environment """
13+ """Utility functions to build docker image """
1414from __future__ import absolute_import
1515
1616import logging
17+ import os
18+ import shutil
1719import subprocess
1820import sys
1921from typing import Optional
2426
2527logger = logging .getLogger (__name__ )
2628
27- REQUIREMENT_TXT_PATH = "/tmp/requirements.txt"
28- ENVIRONMENT_YML_PATH = "/tmp/environment.yml"
29+ REQUIREMENT_TXT_PATH = "/tmp/{image_name}/ requirements.txt"
30+ ENVIRONMENT_YML_PATH = "/tmp/{image_name}/ environment.yml"
2931DOCKERFILE_PATH = "/tmp/Dockerfile"
3032
3133CONDA_DOCKERFILE_TEMPLATE = """
32- FROM {base_image_name }
34+ FROM {base_image }
3335ADD environment.yml .
3436
3537# Install prerequisites for conda
5557"""
5658
5759PIP_DOCKERFILE_TEMPLATE = """
58- FROM {base_image_name}
60+ FROM {base_image}
61+
62+ # Install the latest Python3 if the base image doesn't have python pre-installed
63+ RUN python --version || {{ \
64+ apt-get update; \
65+ apt-get install -y python3-full python3-pip; \
66+ }}
67+
5968ADD requirements.txt .
6069
6170# Create a virtual environment
62- RUN python -m venv {env_name}
71+ RUN python3 -m venv {env_name}
6372
6473# Activate the virtual environment
65- RUN . {env_name}/bin/activate
74+ ENV PATH=" {env_name}/bin:$PATH"
6675
6776RUN pip install --no-cache-dir -r requirements.txt
6877"""
6978
79+ BASE_IMAGE_TEMPLATE = """
80+ FROM {base_image}
81+ """
82+
83+
84+ def build_image (
85+ image_name : str = "sm-custom-image" ,
86+ env_name : str = "sm_custom_env" ,
87+ deploy_to_ecr : bool = False ,
88+ base_image : Optional [str ] = "ubuntu:latest" ,
89+ dependency_file : Optional [str ] = None ,
90+ ecr_repo_name : Optional [str ] = None ,
91+ boto_session : Optional [boto3 .Session ] = None ,
92+ region : Optional [str ] = None ,
93+ ) -> Optional [str ]:
94+ """WARNING: This function is expremental and not intended for production use.
95+
96+ Build a docker image with the given base image and dependencies.
97+
98+ When using this utility method, the docker daemon must be active in the environment.
99+
100+ Args:
101+ image_name (str): The name of the docker image.
102+ env_name (str): The name of the virtual environment to be activated in the image,
103+ defaults to "sm_custom_env".
104+ deploy_to_ecr (bool): Whether to deploy the docker image to AWS ECR, defaults to False.
105+ If set to True, the AWS credentials must be configured in the environment.
106+ base_image (Optional[str]): The base Docker image, can be an AWS ECR image URI, defaults
107+ to ubuntu:latest.
108+ dependency_file (Optional[str]): Either the path to a dependencies file (conda
109+ environment.yml OR pip requirements.txt file).
110+ ecr_repo_name (Optional[str]): The AWS ECR repo to push the docker image. If not specified,
111+ it will use image_name as the ECR repo name. This parameter is only valid when
112+ deploy_to_ecr is True.
113+ boto_session (Optional[boto3.Session]): The boto3 session with AWS account info. If not
114+ provided, a new boto session will be created.
115+ region (Optional[str]): The AWS region.
116+
117+ Returns:
118+ Optional[str]: If deploy_to_ecr set to True, return the AWS ECR uri of the image.
119+
120+ Exceptions:
121+ docker.errors.DockerException: Error while fetching server API version:
122+ The docker engine is not running in your environment.
123+ docker.errors.BuildError: The docker failed to build the image. The most likely reason is:
124+ 1) Some packages are not supported in the base image.
125+ botocore.exceptions.ClientError: AWS credentials are not configured.
126+ """
127+ if ".dkr.ecr." in base_image :
128+ # If the base image is on AWS ECR, need to authenticate first
129+ _docker_ecr_login (boto_session , region )
130+
131+ path = f"/tmp/{ image_name } "
132+ if os .path .exists (path ):
133+ shutil .rmtree (path )
134+ os .mkdir (path )
135+ environment_yml_path = ENVIRONMENT_YML_PATH .format (image_name = image_name )
136+ requirement_txt_path = REQUIREMENT_TXT_PATH .format (image_name = image_name )
137+
138+ if dependency_file :
139+ if dependency_file .endswith (".yml" ):
140+ shutil .copy (dependency_file , environment_yml_path )
141+ dockerfile_contents = CONDA_DOCKERFILE_TEMPLATE .format (
142+ base_image = base_image ,
143+ env_name = env_name ,
144+ )
145+ elif dependency_file .endswith (".txt" ):
146+ shutil .copy (dependency_file , requirement_txt_path )
147+ dockerfile_contents = PIP_DOCKERFILE_TEMPLATE .format (
148+ base_image = base_image ,
149+ env_name = env_name ,
150+ )
151+ else :
152+ raise ValueError (
153+ "The dependency file must be a conda "
154+ "environment.yml file or a pip requirements.txt file."
155+ )
156+ else :
157+ dockerfile_contents = BASE_IMAGE_TEMPLATE .format (base_image = base_image )
158+
159+ _build_docker_image (image_name , dockerfile_contents )
160+ if deploy_to_ecr :
161+ return _push_image_to_ecr (image_name , ecr_repo_name , boto_session , region )
162+ return None
163+
70164
71165def capture_local_environment (
72166 image_name : str = "sm-local-capture" ,
73167 env_name : str = "saved_local_env" ,
74168 package_manager : str = "pip" ,
75169 deploy_to_ecr : bool = False ,
76- base_image_name : Optional [str ] = None ,
170+ base_image : Optional [str ] = None ,
77171 job_conda_env : Optional [str ] = None ,
78172 additional_dependencies : Optional [str ] = None ,
79173 ecr_repo_name : Optional [str ] = None ,
80174 boto_session : Optional [boto3 .Session ] = None ,
81- ):
82- """Capture all dependency packages installed in the local environment and build a docker image.
175+ region : Optional [str ] = None ,
176+ ) -> Optional [str ]:
177+ """WARNING: This function is expremental and not intended for production use.
178+
179+ Capture all dependency packages installed in the local environment and build a docker image.
83180
84181 When using this utility method, the docker daemon must be active in the environment.
85182 Please note that this is an experimental feature. This utility function is not be able to
@@ -93,8 +190,8 @@ def capture_local_environment(
93190 package_manager (str): The package manager, must be one of "conda" or "pip".
94191 deploy_to_ecr (bool): Whether to deploy the docker image to AWS ECR, defaults to False.
95192 If set to True, the AWS credentials must be configured in the environment.
96- base_image_name (Optional[str]): If provided will be used as the base image, else the
97- utility will evaluate from local environment in following manner:
193+ base_image (Optional[str]): If provided will be used as the base image, can be an AWS ECR
194+ image URI, else the utility will evaluate from local environment in following manner:
98195 1. If package manager is conda, it will use ubuntu:latest.
99196 2. If package manager is pip, it is resolved to base python image with the same
100197 python version as the environment running the local code.
@@ -104,12 +201,16 @@ def capture_local_environment(
104201 additional_dependencies (Optional[str]): Either the path to a dependencies file (conda
105202 environment.yml OR pip requirements.txt file). Regardless of this setting utility will
106203 automatically generate the dependencies file corresponding to the current active
107- environment’ s snapshot. In addition to this, additional dependencies is configurable.
204+ environment' s snapshot. In addition to this, additional dependencies is configurable.
108205 ecr_repo_name (Optional[str]): The AWS ECR repo to push the docker image. If not specified,
109206 it will use image_name as the ECR repo name. This parameter is only valid when
110207 deploy_to_ecr is True.
111208 boto_session (Optional[boto3.Session]): The boto3 session with AWS account info. If not
112209 provided, a new boto session will be created.
210+ region (Optional[str]): The AWS region.
211+
212+ Returns:
213+ Optional[str]: If deploy_to_ecr set to True, return the AWS ECR uri of the image.
113214
114215 Exceptions:
115216 docker.errors.DockerException: Error while fetching server API version:
@@ -119,17 +220,23 @@ def capture_local_environment(
119220 between your local environment and additional dependencies.
120221 botocore.exceptions.ClientError: AWS credentials are not configured.
121222 """
223+ path = f"/tmp/{ image_name } "
224+ if os .path .exists (path ):
225+ shutil .rmtree (path )
226+ os .mkdir (path )
227+ environment_yml_path = ENVIRONMENT_YML_PATH .format (image_name = image_name )
228+ requirement_txt_path = REQUIREMENT_TXT_PATH .format (image_name = image_name )
122229
123230 if package_manager == "conda" :
124231 if job_conda_env :
125232 subprocess .run (
126- f"conda env export -n { job_conda_env } > { ENVIRONMENT_YML_PATH } --no-builds" ,
233+ f"conda env export -n { job_conda_env } > { environment_yml_path } --no-builds" ,
127234 shell = True ,
128235 check = True ,
129236 )
130237 else :
131238 subprocess .run (
132- f"conda env export > { ENVIRONMENT_YML_PATH } --no-builds" , shell = True , check = True
239+ f"conda env export > { environment_yml_path } --no-builds" , shell = True , check = True
133240 )
134241
135242 if additional_dependencies :
@@ -143,26 +250,29 @@ def capture_local_environment(
143250 if additional_dependencies .endswith (".yml" ):
144251 _merge_environment_ymls (
145252 env_name ,
146- ENVIRONMENT_YML_PATH ,
253+ environment_yml_path ,
147254 additional_dependencies ,
148- ENVIRONMENT_YML_PATH ,
255+ environment_yml_path ,
149256 )
150257 elif additional_dependencies .endswith (".txt" ):
151258 _merge_environment_yml_with_requirement_txt (
152259 env_name ,
153- ENVIRONMENT_YML_PATH ,
260+ environment_yml_path ,
154261 additional_dependencies ,
155- ENVIRONMENT_YML_PATH ,
262+ environment_yml_path ,
156263 )
157264
158- if not base_image_name :
159- base_image_name = "ubuntu:latest"
265+ if not base_image :
266+ base_image = "ubuntu:latest"
267+ elif ".dkr.ecr." in base_image :
268+ # If the base image is on AWS ECR, need to authenticate first
269+ _docker_ecr_login (boto_session , region )
160270 dockerfile_contents = CONDA_DOCKERFILE_TEMPLATE .format (
161- base_image_name = base_image_name ,
271+ base_image = base_image ,
162272 env_name = env_name ,
163273 )
164274 elif package_manager == "pip" :
165- subprocess .run (f"pip list --format=freeze > { REQUIREMENT_TXT_PATH } " , shell = True , check = True )
275+ subprocess .run (f"pip list --format=freeze > { requirement_txt_path } " , shell = True , check = True )
166276
167277 if additional_dependencies :
168278 if not additional_dependencies .endswith (".txt" ):
@@ -171,15 +281,15 @@ def capture_local_environment(
171281 )
172282 with open (additional_dependencies , "r" ) as f :
173283 additional_requirements = f .read ()
174- with open (REQUIREMENT_TXT_PATH , "a" ) as f :
284+ with open (requirement_txt_path , "a" ) as f :
175285 f .write (additional_requirements )
176- logger .info ("Merged requirements file saved to %s" , REQUIREMENT_TXT_PATH )
286+ logger .info ("Merged requirements file saved to %s" , requirement_txt_path )
177287
178- if not base_image_name :
288+ if not base_image :
179289 version = sys .version_info
180- base_image_name = f"python:{ version .major } .{ version .minor } .{ version .micro } "
290+ base_image = f"python:{ version .major } .{ version .minor } .{ version .micro } "
181291 dockerfile_contents = PIP_DOCKERFILE_TEMPLATE .format (
182- base_image_name = base_image_name ,
292+ base_image = base_image ,
183293 env_name = env_name ,
184294 )
185295
@@ -189,25 +299,11 @@ def capture_local_environment(
189299 "Use conda or pip as the package manager."
190300 )
191301
192- # Create the Dockerfile
193- with open (DOCKERFILE_PATH , "w" ) as f :
194- f .write (dockerfile_contents )
195-
196- client = docker .from_env ()
197- _ , logs = client .images .build (
198- path = "/tmp" ,
199- dockerfile = DOCKERFILE_PATH ,
200- rm = True ,
201- tag = image_name ,
202- )
203- for log in logs :
204- logger .info (log .get ("stream" , "" ).strip ())
205- logger .info ("Docker image %s built successfully" , image_name )
302+ _build_docker_image (image_name , dockerfile_contents )
206303
207304 if deploy_to_ecr :
208- if boto_session is None :
209- boto_session = boto3 .Session ()
210- _push_image_to_ecr (image_name , ecr_repo_name , boto_session )
305+ return _push_image_to_ecr (image_name , ecr_repo_name , boto_session , region )
306+ return None
211307
212308
213309def _merge_environment_ymls (env_name : str , env_file1 : str , env_file2 : str , output_file : str ):
@@ -300,16 +396,68 @@ def _merge_environment_yml_with_requirement_txt(
300396 logger .info ("Merged environment file saved to '%s'" , output_file )
301397
302398
303- def _push_image_to_ecr (image_name : str , ecr_repo_name : str , boto_session : Optional [boto3 .Session ]):
399+ def _build_docker_image (image_name : str , dockerfile_contents : str ):
400+ """Build the Docker image locally.
401+
402+ Args:
403+ image_name (str): The name of the docker image.
404+ dockerfile_contents (str): The content of Dockerfile.
405+ """
406+ # Create the Dockerfile
407+ with open (DOCKERFILE_PATH , "w" ) as f :
408+ f .write (dockerfile_contents )
409+
410+ client = docker .from_env ()
411+ _ , logs = client .images .build (
412+ path = f"/tmp/{ image_name } " ,
413+ dockerfile = DOCKERFILE_PATH ,
414+ rm = True ,
415+ tag = image_name ,
416+ )
417+ for log in logs :
418+ logger .info (log .get ("stream" , "" ).strip ())
419+ logger .info ("Docker image %s built successfully" , image_name )
420+
421+
422+ def _docker_ecr_login (boto_session : Optional [boto3 .Session ], region : Optional [str ]):
423+ """Authenticate Docker with AWS ECR credentials
424+
425+ Args:
426+ boto_session (Optional[boto3.Session]): The boto3 session with AWS account info. If not
427+ provided, a new boto session will be created.
428+ region (Optional[str]): The AWS region.
429+ """
430+ if boto_session is None :
431+ boto_session = boto3 .Session (region_name = region )
432+ region = boto_session .region_name or "us-west-2"
433+ aws_account_id = boto_session .client ("sts" , region_name = region ).get_caller_identity ()["Account" ]
434+ docker_login_cmd = (
435+ f"aws ecr get-login-password --region { region } "
436+ f"| docker login --username AWS --password-stdin { aws_account_id } .dkr.ecr.{ region } .amazonaws.com"
437+ )
438+ subprocess .run (docker_login_cmd , shell = True , check = True )
439+
440+
441+ def _push_image_to_ecr (
442+ image_name : str ,
443+ ecr_repo_name : str ,
444+ boto_session : Optional [boto3 .Session ],
445+ region : Optional [str ],
446+ ):
304447 """Push the docker image to AWS ECR.
305448
306449 Args:
307450 image_name (str): The name of the docker image.
308451 ecr_repo_name (str): The AWS ECR repo to push the docker image.
452+ boto_session (Optional[boto3.Session]): The boto3 session with AWS account info. If not
453+ provided, a new boto session will be created.
454+ region (Optional[str]): The AWS region.
309455 """
310- region = boto_session .region_name
456+ if boto_session is None :
457+ boto_session = boto3 .Session (region_name = region )
458+ region = boto_session .region_name or "us-west-2"
311459 aws_account_id = boto_session .client ("sts" , region_name = region ).get_caller_identity ()["Account" ]
312- ecr_client = boto3 .client ("ecr" )
460+ ecr_client = boto_session .client ("ecr" , region_name = region )
313461
314462 # Authenticate Docker with ECR
315463 registry_url = f"{ aws_account_id } .dkr.ecr.{ region } .amazonaws.com"
@@ -336,3 +484,5 @@ def _push_image_to_ecr(image_name: str, ecr_repo_name: str, boto_session: Option
336484 subprocess .run (docker_push_cmd , shell = True , check = True )
337485
338486 logger .info ("Image %s pushed to %s" , image_name , ecr_image_uri )
487+
488+ return ecr_image_uri
0 commit comments