Skip to content

Commit 46efb3a

Browse files
authored
Merge branch 'master' into fix/sagemaker-session-region-not-being-used
2 parents c038acd + 07e1b92 commit 46efb3a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1546
-127
lines changed

.github/workflows/codebuild-ci.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: PR Checks
2+
on:
3+
pull_request_target:
4+
5+
concurrency:
6+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.head_ref }}
7+
cancel-in-progress: true
8+
9+
permissions:
10+
id-token: write # This is required for requesting the JWT
11+
12+
jobs:
13+
codestyle-doc-tests:
14+
runs-on: ubuntu-latest
15+
steps:
16+
- name: Configure AWS Credentials
17+
uses: aws-actions/configure-aws-credentials@v4
18+
with:
19+
role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }}
20+
aws-region: us-west-2
21+
role-duration-seconds: 10800
22+
- name: Run Codestyle & Doc Tests
23+
uses: aws-actions/aws-codebuild-run-build@v1
24+
with:
25+
project-name: sagemaker-python-sdk-ci-codestyle-doc-tests
26+
source-version-override: 'pr/${{ github.event.pull_request.number }}'
27+
unit-tests:
28+
runs-on: ubuntu-latest
29+
strategy:
30+
fail-fast: false
31+
matrix:
32+
python-version: ["py38", "py39", "py310"]
33+
steps:
34+
- name: Configure AWS Credentials
35+
uses: aws-actions/configure-aws-credentials@v4
36+
with:
37+
role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }}
38+
aws-region: us-west-2
39+
role-duration-seconds: 10800
40+
- name: Run Unit Tests
41+
uses: aws-actions/aws-codebuild-run-build@v1
42+
with:
43+
project-name: sagemaker-python-sdk-ci-unit-tests
44+
source-version-override: 'pr/${{ github.event.pull_request.number }}'
45+
env-vars-for-codebuild: |
46+
PY_VERSION
47+
env:
48+
PY_VERSION: ${{ matrix.python-version }}

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
# Changelog
22

3+
## v2.212.0 (2024-03-06)
4+
5+
### Features
6+
7+
* Update SM Python SDK for PT 2.2.0 SM DLC
8+
9+
### Bug Fixes and Other Changes
10+
11+
* Create custom tarfile extractall util to fix backward compatibility issue
12+
* Upgrade smp to version 2.2
13+
* Enhance model builder selection logic to include model size
14+
315
## v2.211.0 (2024-03-05)
416

517
### Features

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.211.1.dev0
1+
2.212.1.dev0

doc/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ docutils==0.15.2
44
packaging==20.9
55
jinja2==3.1.3
66
schema==0.7.5
7+
accelerate>=0.24.1,<=0.27.0
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
accelerate>=0.24.1,<=0.27.0

requirements/extras/test_requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,4 @@ tritonclient[http]<2.37.0
3939
onnx==1.14.1
4040
# tf2onnx==1.15.1
4141
nbformat>=5.9,<6
42+
accelerate>=0.24.1,<=0.27.0

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def read_requirements(filename):
6363
"PyYAML~=6.0",
6464
"jsonschema",
6565
"platformdirs",
66-
"tblib>=1.7.0,<3",
66+
"tblib>=1.7.0,<4",
6767
"urllib3>=1.26.8,<3.0.0",
6868
"requests",
6969
"docker",
@@ -79,6 +79,7 @@ def read_requirements(filename):
7979
"feature-processor": read_requirements(
8080
"requirements/extras/feature-processor_requirements.txt"
8181
),
82+
"huggingface": read_requirements("requirements/extras/huggingface_requirements.txt"),
8283
}
8384
# Meta dependency groups
8485
extras["all"] = [item for group in extras.values() for item in group]

src/sagemaker/fw_utils.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@
141141
"2.0.1",
142142
"2.1.0",
143143
"2.1.2",
144+
"2.2.0",
144145
],
145146
}
146147

@@ -157,10 +158,18 @@
157158
"2.0.0",
158159
"2.0.1",
159160
"2.1.0",
161+
"2.2.0",
160162
]
161163

162164

163-
TORCH_DISTRIBUTED_GPU_SUPPORTED_FRAMEWORK_VERSIONS = ["1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.2"]
165+
TORCH_DISTRIBUTED_GPU_SUPPORTED_FRAMEWORK_VERSIONS = [
166+
"1.13.1",
167+
"2.0.0",
168+
"2.0.1",
169+
"2.1.0",
170+
"2.1.2",
171+
"2.2.0",
172+
]
164173

165174
TRAINIUM_SUPPORTED_DISTRIBUTION_STRATEGIES = ["torch_distributed"]
166175
TRAINIUM_SUPPORTED_TORCH_DISTRIBUTED_FRAMEWORK_VERSIONS = [

src/sagemaker/huggingface/llm_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ def get_huggingface_model_metadata(model_id: str, hf_hub_token: Optional[str] =
8181
Returns:
8282
dict: The model metadata retrieved with the HuggingFace API
8383
"""
84-
84+
if not model_id:
85+
raise ValueError("Model ID is empty. Please provide a valid Model ID.")
8586
hf_model_metadata_url = f"https://huggingface.co/api/models/{model_id}"
8687
hf_model_metadata_json = None
8788
try:

src/sagemaker/image_uri_config/pytorch-smp.json

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
],
66
"version_aliases": {
77
"2.0": "2.0.1",
8-
"2.1": "2.1.2"
8+
"2.1": "2.1.2",
9+
"2.2": "2.2.0"
910
},
1011
"versions": {
1112
"2.0.1": {
@@ -57,6 +58,31 @@
5758
"us-west-2": "658645717510"
5859
},
5960
"repository": "smdistributed-modelparallel"
61+
},
62+
"2.2.0": {
63+
"py_versions": [
64+
"py310"
65+
],
66+
"registries": {
67+
"ap-northeast-1": "658645717510",
68+
"ap-northeast-2": "658645717510",
69+
"ap-northeast-3": "658645717510",
70+
"ap-south-1": "658645717510",
71+
"ap-southeast-1": "658645717510",
72+
"ap-southeast-2": "658645717510",
73+
"ca-central-1": "658645717510",
74+
"eu-central-1": "658645717510",
75+
"eu-north-1": "658645717510",
76+
"eu-west-1": "658645717510",
77+
"eu-west-2": "658645717510",
78+
"eu-west-3": "658645717510",
79+
"sa-east-1": "658645717510",
80+
"us-east-1": "658645717510",
81+
"us-east-2": "658645717510",
82+
"us-west-1": "658645717510",
83+
"us-west-2": "658645717510"
84+
},
85+
"repository": "smdistributed-modelparallel"
6086
}
6187
}
6288
}

src/sagemaker/image_uri_config/pytorch.json

Lines changed: 99 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,7 @@
848848
"ap-northeast-2": "763104351884",
849849
"ap-northeast-3": "364406365360",
850850
"ap-south-1": "763104351884",
851+
"ap-south-2": "772153158452",
851852
"ap-southeast-1": "763104351884",
852853
"ap-southeast-2": "763104351884",
853854
"ap-southeast-3": "907027046896",
@@ -856,11 +857,13 @@
856857
"cn-north-1": "727897471807",
857858
"cn-northwest-1": "727897471807",
858859
"eu-central-1": "763104351884",
860+
"eu-central-2": "380420809688",
859861
"eu-north-1": "763104351884",
860862
"eu-west-1": "763104351884",
861863
"eu-west-2": "763104351884",
862864
"eu-west-3": "763104351884",
863865
"eu-south-1": "692866216735",
866+
"eu-south-2": "503227376785",
864867
"me-south-1": "217643126080",
865868
"sa-east-1": "763104351884",
866869
"us-east-1": "763104351884",
@@ -887,6 +890,7 @@
887890
"ap-northeast-2": "763104351884",
888891
"ap-northeast-3": "364406365360",
889892
"ap-south-1": "763104351884",
893+
"ap-south-2": "772153158452",
890894
"ap-southeast-1": "763104351884",
891895
"ap-southeast-2": "763104351884",
892896
"ap-southeast-3": "907027046896",
@@ -895,11 +899,13 @@
895899
"cn-north-1": "727897471807",
896900
"cn-northwest-1": "727897471807",
897901
"eu-central-1": "763104351884",
902+
"eu-central-2": "380420809688",
898903
"eu-north-1": "763104351884",
899904
"eu-west-1": "763104351884",
900905
"eu-west-2": "763104351884",
901906
"eu-west-3": "763104351884",
902907
"eu-south-1": "692866216735",
908+
"eu-south-2": "503227376785",
903909
"me-south-1": "217643126080",
904910
"sa-east-1": "763104351884",
905911
"us-east-1": "763104351884",
@@ -926,6 +932,7 @@
926932
"ap-northeast-2": "763104351884",
927933
"ap-northeast-3": "364406365360",
928934
"ap-south-1": "763104351884",
935+
"ap-south-2": "772153158452",
929936
"ap-southeast-1": "763104351884",
930937
"ap-southeast-2": "763104351884",
931938
"ap-southeast-3": "907027046896",
@@ -934,11 +941,13 @@
934941
"cn-north-1": "727897471807",
935942
"cn-northwest-1": "727897471807",
936943
"eu-central-1": "763104351884",
944+
"eu-central-2": "380420809688",
937945
"eu-north-1": "763104351884",
938946
"eu-west-1": "763104351884",
939947
"eu-west-2": "763104351884",
940948
"eu-west-3": "763104351884",
941949
"eu-south-1": "692866216735",
950+
"eu-south-2": "503227376785",
942951
"me-south-1": "217643126080",
943952
"sa-east-1": "763104351884",
944953
"us-east-1": "763104351884",
@@ -965,6 +974,7 @@
965974
"ap-northeast-2": "763104351884",
966975
"ap-northeast-3": "364406365360",
967976
"ap-south-1": "763104351884",
977+
"ap-south-2": "772153158452",
968978
"ap-southeast-1": "763104351884",
969979
"ap-southeast-2": "763104351884",
970980
"ap-southeast-3": "907027046896",
@@ -973,11 +983,55 @@
973983
"cn-north-1": "727897471807",
974984
"cn-northwest-1": "727897471807",
975985
"eu-central-1": "763104351884",
986+
"eu-central-2": "380420809688",
976987
"eu-north-1": "763104351884",
977988
"eu-west-1": "763104351884",
978989
"eu-west-2": "763104351884",
979990
"eu-west-3": "763104351884",
980991
"eu-south-1": "692866216735",
992+
"eu-south-2": "503227376785",
993+
"me-south-1": "217643126080",
994+
"sa-east-1": "763104351884",
995+
"us-east-1": "763104351884",
996+
"us-east-2": "763104351884",
997+
"us-gov-east-1": "446045086412",
998+
"us-gov-west-1": "442386744353",
999+
"us-iso-east-1": "886529160074",
1000+
"us-isob-east-1": "094389454867",
1001+
"us-west-1": "763104351884",
1002+
"us-west-2": "763104351884",
1003+
"ca-west-1": "204538143572"
1004+
},
1005+
"repository": "pytorch-inference"
1006+
},
1007+
"2.2.0": {
1008+
"py_versions": [
1009+
"py310"
1010+
],
1011+
"registries": {
1012+
"af-south-1": "626614931356",
1013+
"il-central-1": "780543022126",
1014+
"ap-east-1": "871362719292",
1015+
"ap-northeast-1": "763104351884",
1016+
"ap-northeast-2": "763104351884",
1017+
"ap-northeast-3": "364406365360",
1018+
"ap-south-1": "763104351884",
1019+
"ap-south-2": "772153158452",
1020+
"ap-southeast-1": "763104351884",
1021+
"ap-southeast-2": "763104351884",
1022+
"ap-southeast-3": "907027046896",
1023+
"ap-southeast-4": "457447274322",
1024+
"ca-central-1": "763104351884",
1025+
"cn-north-1": "727897471807",
1026+
"cn-northwest-1": "727897471807",
1027+
"eu-central-1": "763104351884",
1028+
"eu-central-2": "380420809688",
1029+
"eu-north-1": "763104351884",
1030+
"eu-west-1": "763104351884",
1031+
"eu-west-2": "763104351884",
1032+
"eu-west-3": "763104351884",
1033+
"eu-south-1": "692866216735",
1034+
"eu-south-2": "503227376785",
9811035
"me-south-1": "217643126080",
9821036
"sa-east-1": "763104351884",
9831037
"us-east-1": "763104351884",
@@ -1190,7 +1244,8 @@
11901244
"1.12": "1.12.1",
11911245
"1.13": "1.13.1",
11921246
"2.0": "2.0.1",
1193-
"2.1": "2.1.0"
1247+
"2.1": "2.1.0",
1248+
"2.2": "2.2.0"
11941249
},
11951250
"versions": {
11961251
"0.4.0": {
@@ -2113,7 +2168,49 @@
21132168
"ca-west-1": "204538143572"
21142169
},
21152170
"repository": "pytorch-training"
2171+
},
2172+
"2.2.0": {
2173+
"py_versions": [
2174+
"py310"
2175+
],
2176+
"registries": {
2177+
"af-south-1": "626614931356",
2178+
"il-central-1": "780543022126",
2179+
"ap-east-1": "871362719292",
2180+
"ap-northeast-1": "763104351884",
2181+
"ap-northeast-2": "763104351884",
2182+
"ap-northeast-3": "364406365360",
2183+
"ap-south-1": "763104351884",
2184+
"ap-south-2": "772153158452",
2185+
"ap-southeast-1": "763104351884",
2186+
"ap-southeast-2": "763104351884",
2187+
"ap-southeast-3": "907027046896",
2188+
"ap-southeast-4": "457447274322",
2189+
"ca-central-1": "763104351884",
2190+
"cn-north-1": "727897471807",
2191+
"cn-northwest-1": "727897471807",
2192+
"eu-central-1": "763104351884",
2193+
"eu-central-2": "380420809688",
2194+
"eu-north-1": "763104351884",
2195+
"eu-west-1": "763104351884",
2196+
"eu-west-2": "763104351884",
2197+
"eu-west-3": "763104351884",
2198+
"eu-south-1": "692866216735",
2199+
"eu-south-2": "503227376785",
2200+
"me-south-1": "217643126080",
2201+
"sa-east-1": "763104351884",
2202+
"us-east-1": "763104351884",
2203+
"us-east-2": "763104351884",
2204+
"us-gov-east-1": "446045086412",
2205+
"us-gov-west-1": "442386744353",
2206+
"us-iso-east-1": "886529160074",
2207+
"us-isob-east-1": "094389454867",
2208+
"us-west-1": "763104351884",
2209+
"us-west-2": "763104351884",
2210+
"ca-west-1": "204538143572"
2211+
},
2212+
"repository": "pytorch-training"
21162213
}
21172214
}
21182215
}
2119-
}
2216+
}

src/sagemaker/image_uris.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,11 @@ def get_training_image_uri(
678678
if "modelparallel" in distribution["smdistributed"]:
679679
if distribution["smdistributed"]["modelparallel"].get("enabled", True):
680680
framework = "pytorch-smp"
681-
if "p5" in instance_type or "2.1" in framework_version:
681+
if (
682+
"p5" in instance_type
683+
or "2.1" in framework_version
684+
or "2.2" in framework_version
685+
):
682686
container_version = "cu121"
683687
else:
684688
container_version = "cu118"

0 commit comments

Comments
 (0)