Skip to content

Commit 828cdc3

Browse files
authored
Add tei cpu image (#4695)
* Add tei cpu image * fix format issue * fix unit tests * fix typo * fix typo
1 parent 4e83cce commit 828cdc3

File tree

4 files changed

+79
-5
lines changed

4 files changed

+79
-5
lines changed

src/sagemaker/huggingface/llm_utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,13 @@ def get_huggingface_llm_image_uri(
7272
version=version,
7373
image_scope="inference",
7474
)
75+
if backend == "huggingface-tei-cpu":
76+
return image_uris.retrieve(
77+
"huggingface-tei-cpu",
78+
region=region,
79+
version=version,
80+
image_scope="inference",
81+
)
7582
if backend == "lmi":
7683
version = version or "0.24.0"
7784
return image_uris.retrieve(framework="djl-deepspeed", region=region, version=version)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
{
2+
"inference": {
3+
"processors": [
4+
"cpu"
5+
],
6+
"version_aliases": {
7+
"1.2": "1.2.3"
8+
},
9+
"versions": {
10+
"1.2.3": {
11+
"py_versions": [
12+
"py310"
13+
],
14+
"registries": {
15+
"af-south-1": "510948584623",
16+
"ap-east-1": "651117190479",
17+
"ap-northeast-1": "354813040037",
18+
"ap-northeast-2": "366743142698",
19+
"ap-northeast-3": "867004704886",
20+
"ap-south-1": "720646828776",
21+
"ap-south-2": "628508329040",
22+
"ap-southeast-1": "121021644041",
23+
"ap-southeast-2": "783357654285",
24+
"ap-southeast-3": "951798379941",
25+
"ap-southeast-4": "106583098589",
26+
"ca-central-1": "341280168497",
27+
"ca-west-1": "190319476487",
28+
"cn-north-1": "450853457545",
29+
"cn-northwest-1": "451049120500",
30+
"eu-central-1": "492215442770",
31+
"eu-central-2": "680994064768",
32+
"eu-north-1": "662702820516",
33+
"eu-south-1": "978288397137",
34+
"eu-south-2": "104374241257",
35+
"eu-west-1": "141502667606",
36+
"eu-west-2": "764974769150",
37+
"eu-west-3": "659782779980",
38+
"il-central-1": "898809789911",
39+
"me-central-1": "272398656194",
40+
"me-south-1": "801668240914",
41+
"sa-east-1": "737474898029",
42+
"us-east-1": "683313688378",
43+
"us-east-2": "257758044811",
44+
"us-gov-east-1": "237065988967",
45+
"us-gov-west-1": "414596584902",
46+
"us-iso-east-1": "833128469047",
47+
"us-isob-east-1": "281123927165",
48+
"us-west-1": "746614075791",
49+
"us-west-2": "246618743249"
50+
},
51+
"tag_prefix": "2.0.1-tei1.2.3",
52+
"repository": "tei-cpu",
53+
"container_version": {
54+
"cpu": "ubuntu22.04"
55+
}
56+
}
57+
}
58+
}
59+
}

src/sagemaker/image_uris.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@
3737
ECR_URI_TEMPLATE = "{registry}.dkr.{hostname}/{repository}"
3838
HUGGING_FACE_FRAMEWORK = "huggingface"
3939
HUGGING_FACE_LLM_FRAMEWORK = "huggingface-llm"
40-
HUGGING_FACE_TEI_FRAMEWORK = "huggingface-tei"
40+
HUGGING_FACE_TEI_GPU_FRAMEWORK = "huggingface-tei"
41+
HUGGING_FACE_TEI_CPU_FRAMEWORK = "huggingface-tei-cpu"
4142
HUGGING_FACE_LLM_NEURONX_FRAMEWORK = "huggingface-llm-neuronx"
4243
XGBOOST_FRAMEWORK = "xgboost"
4344
SKLEARN_FRAMEWORK = "sklearn"
@@ -478,7 +479,8 @@ def _validate_version_and_set_if_needed(version, config, framework):
478479
if version is None and framework in [
479480
DATA_WRANGLER_FRAMEWORK,
480481
HUGGING_FACE_LLM_FRAMEWORK,
481-
HUGGING_FACE_TEI_FRAMEWORK,
482+
HUGGING_FACE_TEI_GPU_FRAMEWORK,
483+
HUGGING_FACE_TEI_CPU_FRAMEWORK,
482484
HUGGING_FACE_LLM_NEURONX_FRAMEWORK,
483485
STABILITYAI_FRAMEWORK,
484486
]:

tests/unit/sagemaker/image_uris/test_huggingface_llm.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
"gpu": {
2323
"1.2.3": "2.0.1-tei1.2.3-gpu-py310-cu122-ubuntu22.04",
2424
},
25+
"cpu": {
26+
"1.2.3": "2.0.1-tei1.2.3-cpu-py310-ubuntu22.04",
27+
},
2528
}
2629
HF_VERSIONS_MAPPING = {
2730
"gpu": {
@@ -73,17 +76,20 @@ def test_huggingface_uris(load_config):
7376
assert expected == uri
7477

7578

76-
@pytest.mark.parametrize("load_config", ["huggingface-tei.json"], indirect=True)
79+
@pytest.mark.parametrize(
80+
"load_config", ["huggingface-tei.json", "huggingface-tei-cpu.json"], indirect=True
81+
)
7782
def test_huggingface_tei_uris(load_config):
7883
VERSIONS = load_config["inference"]["versions"]
7984
device = load_config["inference"]["processors"][0]
80-
backend = "huggingface-tei"
85+
backend = "huggingface-tei" if device == "gpu" else "huggingface-tei-cpu"
86+
repo = "tei" if device == "gpu" else "tei-cpu"
8187
for version in VERSIONS:
8288
ACCOUNTS = load_config["inference"]["versions"][version]["registries"]
8389
for region in ACCOUNTS.keys():
8490
uri = get_huggingface_llm_image_uri(backend, region=region, version=version)
8591
expected = expected_uris.huggingface_llm_framework_uri(
86-
"tei",
92+
repo,
8793
ACCOUNTS[region],
8894
version,
8995
TEI_VERSIONS_MAPPING[device][version],

0 commit comments

Comments
 (0)