10
10
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11
11
# ANY KIND, either express or implied. See the License for the specific
12
12
# language governing permissions and limitations under the License.
13
- """Utility function to capture local environment """
13
+ """Utility functions to build docker image """
14
14
from __future__ import absolute_import
15
15
16
16
import logging
17
+ import os
18
+ import shutil
17
19
import subprocess
18
20
import sys
19
21
from typing import Optional
24
26
25
27
logger = logging .getLogger (__name__ )
26
28
27
- REQUIREMENT_TXT_PATH = "/tmp/requirements.txt"
28
- ENVIRONMENT_YML_PATH = "/tmp/environment.yml"
29
+ REQUIREMENT_TXT_PATH = "/tmp/{image_name}/ requirements.txt"
30
+ ENVIRONMENT_YML_PATH = "/tmp/{image_name}/ environment.yml"
29
31
DOCKERFILE_PATH = "/tmp/Dockerfile"
30
32
31
33
CONDA_DOCKERFILE_TEMPLATE = """
32
- FROM {base_image_name }
34
+ FROM {base_image }
33
35
ADD environment.yml .
34
36
35
37
# Install prerequisites for conda
55
57
"""
56
58
57
59
PIP_DOCKERFILE_TEMPLATE = """
58
- FROM {base_image_name}
60
+ FROM {base_image}
61
+
62
+ # Install the latest Python3 if the base image doesn't have python pre-installed
63
+ RUN python --version || {{ \
64
+ apt-get update; \
65
+ apt-get install -y python3-full python3-pip; \
66
+ }}
67
+
59
68
ADD requirements.txt .
60
69
61
70
# Create a virtual environment
62
- RUN python -m venv {env_name}
71
+ RUN python3 -m venv {env_name}
63
72
64
73
# Activate the virtual environment
65
- RUN . {env_name}/bin/activate
74
+ ENV PATH=" {env_name}/bin:$PATH"
66
75
67
76
RUN pip install --no-cache-dir -r requirements.txt
68
77
"""
69
78
79
+ BASE_IMAGE_TEMPLATE = """
80
+ FROM {base_image}
81
+ """
82
+
83
+
84
+ def build_image (
85
+ image_name : str = "sm-custom-image" ,
86
+ env_name : str = "sm_custom_env" ,
87
+ deploy_to_ecr : bool = False ,
88
+ base_image : Optional [str ] = "ubuntu:latest" ,
89
+ dependency_file : Optional [str ] = None ,
90
+ ecr_repo_name : Optional [str ] = None ,
91
+ boto_session : Optional [boto3 .Session ] = None ,
92
+ region : Optional [str ] = None ,
93
+ ) -> Optional [str ]:
94
+ """WARNING: This function is expremental and not intended for production use.
95
+
96
+ Build a docker image with the given base image and dependencies.
97
+
98
+ When using this utility method, the docker daemon must be active in the environment.
99
+
100
+ Args:
101
+ image_name (str): The name of the docker image.
102
+ env_name (str): The name of the virtual environment to be activated in the image,
103
+ defaults to "sm_custom_env".
104
+ deploy_to_ecr (bool): Whether to deploy the docker image to AWS ECR, defaults to False.
105
+ If set to True, the AWS credentials must be configured in the environment.
106
+ base_image (Optional[str]): The base Docker image, can be an AWS ECR image URI, defaults
107
+ to ubuntu:latest.
108
+ dependency_file (Optional[str]): Either the path to a dependencies file (conda
109
+ environment.yml OR pip requirements.txt file).
110
+ ecr_repo_name (Optional[str]): The AWS ECR repo to push the docker image. If not specified,
111
+ it will use image_name as the ECR repo name. This parameter is only valid when
112
+ deploy_to_ecr is True.
113
+ boto_session (Optional[boto3.Session]): The boto3 session with AWS account info. If not
114
+ provided, a new boto session will be created.
115
+ region (Optional[str]): The AWS region.
116
+
117
+ Returns:
118
+ Optional[str]: If deploy_to_ecr set to True, return the AWS ECR uri of the image.
119
+
120
+ Exceptions:
121
+ docker.errors.DockerException: Error while fetching server API version:
122
+ The docker engine is not running in your environment.
123
+ docker.errors.BuildError: The docker failed to build the image. The most likely reason is:
124
+ 1) Some packages are not supported in the base image.
125
+ botocore.exceptions.ClientError: AWS credentials are not configured.
126
+ """
127
+ if ".dkr.ecr." in base_image :
128
+ # If the base image is on AWS ECR, need to authenticate first
129
+ _docker_ecr_login (boto_session , region )
130
+
131
+ path = f"/tmp/{ image_name } "
132
+ if os .path .exists (path ):
133
+ shutil .rmtree (path )
134
+ os .mkdir (path )
135
+ environment_yml_path = ENVIRONMENT_YML_PATH .format (image_name = image_name )
136
+ requirement_txt_path = REQUIREMENT_TXT_PATH .format (image_name = image_name )
137
+
138
+ if dependency_file :
139
+ if dependency_file .endswith (".yml" ):
140
+ shutil .copy (dependency_file , environment_yml_path )
141
+ dockerfile_contents = CONDA_DOCKERFILE_TEMPLATE .format (
142
+ base_image = base_image ,
143
+ env_name = env_name ,
144
+ )
145
+ elif dependency_file .endswith (".txt" ):
146
+ shutil .copy (dependency_file , requirement_txt_path )
147
+ dockerfile_contents = PIP_DOCKERFILE_TEMPLATE .format (
148
+ base_image = base_image ,
149
+ env_name = env_name ,
150
+ )
151
+ else :
152
+ raise ValueError (
153
+ "The dependency file must be a conda "
154
+ "environment.yml file or a pip requirements.txt file."
155
+ )
156
+ else :
157
+ dockerfile_contents = BASE_IMAGE_TEMPLATE .format (base_image = base_image )
158
+
159
+ _build_docker_image (image_name , dockerfile_contents )
160
+ if deploy_to_ecr :
161
+ return _push_image_to_ecr (image_name , ecr_repo_name , boto_session , region )
162
+ return None
163
+
70
164
71
165
def capture_local_environment (
72
166
image_name : str = "sm-local-capture" ,
73
167
env_name : str = "saved_local_env" ,
74
168
package_manager : str = "pip" ,
75
169
deploy_to_ecr : bool = False ,
76
- base_image_name : Optional [str ] = None ,
170
+ base_image : Optional [str ] = None ,
77
171
job_conda_env : Optional [str ] = None ,
78
172
additional_dependencies : Optional [str ] = None ,
79
173
ecr_repo_name : Optional [str ] = None ,
80
174
boto_session : Optional [boto3 .Session ] = None ,
81
- ):
82
- """Capture all dependency packages installed in the local environment and build a docker image.
175
+ region : Optional [str ] = None ,
176
+ ) -> Optional [str ]:
177
+ """WARNING: This function is expremental and not intended for production use.
178
+
179
+ Capture all dependency packages installed in the local environment and build a docker image.
83
180
84
181
When using this utility method, the docker daemon must be active in the environment.
85
182
Please note that this is an experimental feature. This utility function is not be able to
@@ -93,8 +190,8 @@ def capture_local_environment(
93
190
package_manager (str): The package manager, must be one of "conda" or "pip".
94
191
deploy_to_ecr (bool): Whether to deploy the docker image to AWS ECR, defaults to False.
95
192
If set to True, the AWS credentials must be configured in the environment.
96
- base_image_name (Optional[str]): If provided will be used as the base image, else the
97
- utility will evaluate from local environment in following manner:
193
+ base_image (Optional[str]): If provided will be used as the base image, can be an AWS ECR
194
+ image URI, else the utility will evaluate from local environment in following manner:
98
195
1. If package manager is conda, it will use ubuntu:latest.
99
196
2. If package manager is pip, it is resolved to base python image with the same
100
197
python version as the environment running the local code.
@@ -104,12 +201,16 @@ def capture_local_environment(
104
201
additional_dependencies (Optional[str]): Either the path to a dependencies file (conda
105
202
environment.yml OR pip requirements.txt file). Regardless of this setting utility will
106
203
automatically generate the dependencies file corresponding to the current active
107
- environment’ s snapshot. In addition to this, additional dependencies is configurable.
204
+ environment' s snapshot. In addition to this, additional dependencies is configurable.
108
205
ecr_repo_name (Optional[str]): The AWS ECR repo to push the docker image. If not specified,
109
206
it will use image_name as the ECR repo name. This parameter is only valid when
110
207
deploy_to_ecr is True.
111
208
boto_session (Optional[boto3.Session]): The boto3 session with AWS account info. If not
112
209
provided, a new boto session will be created.
210
+ region (Optional[str]): The AWS region.
211
+
212
+ Returns:
213
+ Optional[str]: If deploy_to_ecr set to True, return the AWS ECR uri of the image.
113
214
114
215
Exceptions:
115
216
docker.errors.DockerException: Error while fetching server API version:
@@ -119,17 +220,23 @@ def capture_local_environment(
119
220
between your local environment and additional dependencies.
120
221
botocore.exceptions.ClientError: AWS credentials are not configured.
121
222
"""
223
+ path = f"/tmp/{ image_name } "
224
+ if os .path .exists (path ):
225
+ shutil .rmtree (path )
226
+ os .mkdir (path )
227
+ environment_yml_path = ENVIRONMENT_YML_PATH .format (image_name = image_name )
228
+ requirement_txt_path = REQUIREMENT_TXT_PATH .format (image_name = image_name )
122
229
123
230
if package_manager == "conda" :
124
231
if job_conda_env :
125
232
subprocess .run (
126
- f"conda env export -n { job_conda_env } > { ENVIRONMENT_YML_PATH } --no-builds" ,
233
+ f"conda env export -n { job_conda_env } > { environment_yml_path } --no-builds" ,
127
234
shell = True ,
128
235
check = True ,
129
236
)
130
237
else :
131
238
subprocess .run (
132
- f"conda env export > { ENVIRONMENT_YML_PATH } --no-builds" , shell = True , check = True
239
+ f"conda env export > { environment_yml_path } --no-builds" , shell = True , check = True
133
240
)
134
241
135
242
if additional_dependencies :
@@ -143,26 +250,29 @@ def capture_local_environment(
143
250
if additional_dependencies .endswith (".yml" ):
144
251
_merge_environment_ymls (
145
252
env_name ,
146
- ENVIRONMENT_YML_PATH ,
253
+ environment_yml_path ,
147
254
additional_dependencies ,
148
- ENVIRONMENT_YML_PATH ,
255
+ environment_yml_path ,
149
256
)
150
257
elif additional_dependencies .endswith (".txt" ):
151
258
_merge_environment_yml_with_requirement_txt (
152
259
env_name ,
153
- ENVIRONMENT_YML_PATH ,
260
+ environment_yml_path ,
154
261
additional_dependencies ,
155
- ENVIRONMENT_YML_PATH ,
262
+ environment_yml_path ,
156
263
)
157
264
158
- if not base_image_name :
159
- base_image_name = "ubuntu:latest"
265
+ if not base_image :
266
+ base_image = "ubuntu:latest"
267
+ elif ".dkr.ecr." in base_image :
268
+ # If the base image is on AWS ECR, need to authenticate first
269
+ _docker_ecr_login (boto_session , region )
160
270
dockerfile_contents = CONDA_DOCKERFILE_TEMPLATE .format (
161
- base_image_name = base_image_name ,
271
+ base_image = base_image ,
162
272
env_name = env_name ,
163
273
)
164
274
elif package_manager == "pip" :
165
- subprocess .run (f"pip list --format=freeze > { REQUIREMENT_TXT_PATH } " , shell = True , check = True )
275
+ subprocess .run (f"pip list --format=freeze > { requirement_txt_path } " , shell = True , check = True )
166
276
167
277
if additional_dependencies :
168
278
if not additional_dependencies .endswith (".txt" ):
@@ -171,15 +281,15 @@ def capture_local_environment(
171
281
)
172
282
with open (additional_dependencies , "r" ) as f :
173
283
additional_requirements = f .read ()
174
- with open (REQUIREMENT_TXT_PATH , "a" ) as f :
284
+ with open (requirement_txt_path , "a" ) as f :
175
285
f .write (additional_requirements )
176
- logger .info ("Merged requirements file saved to %s" , REQUIREMENT_TXT_PATH )
286
+ logger .info ("Merged requirements file saved to %s" , requirement_txt_path )
177
287
178
- if not base_image_name :
288
+ if not base_image :
179
289
version = sys .version_info
180
- base_image_name = f"python:{ version .major } .{ version .minor } .{ version .micro } "
290
+ base_image = f"python:{ version .major } .{ version .minor } .{ version .micro } "
181
291
dockerfile_contents = PIP_DOCKERFILE_TEMPLATE .format (
182
- base_image_name = base_image_name ,
292
+ base_image = base_image ,
183
293
env_name = env_name ,
184
294
)
185
295
@@ -189,25 +299,11 @@ def capture_local_environment(
189
299
"Use conda or pip as the package manager."
190
300
)
191
301
192
- # Create the Dockerfile
193
- with open (DOCKERFILE_PATH , "w" ) as f :
194
- f .write (dockerfile_contents )
195
-
196
- client = docker .from_env ()
197
- _ , logs = client .images .build (
198
- path = "/tmp" ,
199
- dockerfile = DOCKERFILE_PATH ,
200
- rm = True ,
201
- tag = image_name ,
202
- )
203
- for log in logs :
204
- logger .info (log .get ("stream" , "" ).strip ())
205
- logger .info ("Docker image %s built successfully" , image_name )
302
+ _build_docker_image (image_name , dockerfile_contents )
206
303
207
304
if deploy_to_ecr :
208
- if boto_session is None :
209
- boto_session = boto3 .Session ()
210
- _push_image_to_ecr (image_name , ecr_repo_name , boto_session )
305
+ return _push_image_to_ecr (image_name , ecr_repo_name , boto_session , region )
306
+ return None
211
307
212
308
213
309
def _merge_environment_ymls (env_name : str , env_file1 : str , env_file2 : str , output_file : str ):
@@ -300,16 +396,68 @@ def _merge_environment_yml_with_requirement_txt(
300
396
logger .info ("Merged environment file saved to '%s'" , output_file )
301
397
302
398
303
- def _push_image_to_ecr (image_name : str , ecr_repo_name : str , boto_session : Optional [boto3 .Session ]):
399
+ def _build_docker_image (image_name : str , dockerfile_contents : str ):
400
+ """Build the Docker image locally.
401
+
402
+ Args:
403
+ image_name (str): The name of the docker image.
404
+ dockerfile_contents (str): The content of Dockerfile.
405
+ """
406
+ # Create the Dockerfile
407
+ with open (DOCKERFILE_PATH , "w" ) as f :
408
+ f .write (dockerfile_contents )
409
+
410
+ client = docker .from_env ()
411
+ _ , logs = client .images .build (
412
+ path = f"/tmp/{ image_name } " ,
413
+ dockerfile = DOCKERFILE_PATH ,
414
+ rm = True ,
415
+ tag = image_name ,
416
+ )
417
+ for log in logs :
418
+ logger .info (log .get ("stream" , "" ).strip ())
419
+ logger .info ("Docker image %s built successfully" , image_name )
420
+
421
+
422
+ def _docker_ecr_login (boto_session : Optional [boto3 .Session ], region : Optional [str ]):
423
+ """Authenticate Docker with AWS ECR credentials
424
+
425
+ Args:
426
+ boto_session (Optional[boto3.Session]): The boto3 session with AWS account info. If not
427
+ provided, a new boto session will be created.
428
+ region (Optional[str]): The AWS region.
429
+ """
430
+ if boto_session is None :
431
+ boto_session = boto3 .Session (region_name = region )
432
+ region = boto_session .region_name or "us-west-2"
433
+ aws_account_id = boto_session .client ("sts" , region_name = region ).get_caller_identity ()["Account" ]
434
+ docker_login_cmd = (
435
+ f"aws ecr get-login-password --region { region } "
436
+ f"| docker login --username AWS --password-stdin { aws_account_id } .dkr.ecr.{ region } .amazonaws.com"
437
+ )
438
+ subprocess .run (docker_login_cmd , shell = True , check = True )
439
+
440
+
441
+ def _push_image_to_ecr (
442
+ image_name : str ,
443
+ ecr_repo_name : str ,
444
+ boto_session : Optional [boto3 .Session ],
445
+ region : Optional [str ],
446
+ ):
304
447
"""Push the docker image to AWS ECR.
305
448
306
449
Args:
307
450
image_name (str): The name of the docker image.
308
451
ecr_repo_name (str): The AWS ECR repo to push the docker image.
452
+ boto_session (Optional[boto3.Session]): The boto3 session with AWS account info. If not
453
+ provided, a new boto session will be created.
454
+ region (Optional[str]): The AWS region.
309
455
"""
310
- region = boto_session .region_name
456
+ if boto_session is None :
457
+ boto_session = boto3 .Session (region_name = region )
458
+ region = boto_session .region_name or "us-west-2"
311
459
aws_account_id = boto_session .client ("sts" , region_name = region ).get_caller_identity ()["Account" ]
312
- ecr_client = boto3 .client ("ecr" )
460
+ ecr_client = boto_session .client ("ecr" , region_name = region )
313
461
314
462
# Authenticate Docker with ECR
315
463
registry_url = f"{ aws_account_id } .dkr.ecr.{ region } .amazonaws.com"
@@ -336,3 +484,5 @@ def _push_image_to_ecr(image_name: str, ecr_repo_name: str, boto_session: Option
336
484
subprocess .run (docker_push_cmd , shell = True , check = True )
337
485
338
486
logger .info ("Image %s pushed to %s" , image_name , ecr_image_uri )
487
+
488
+ return ecr_image_uri
0 commit comments