Skip to content

Commit 7727235

Browse files
Refine CLIP embedding microservice by leveraging the third-party CLIP (opea-project#1298)
* Refine CLI embedding microservice using dependency Signed-off-by: lvliang-intel <[email protected]>
1 parent a353f99 commit 7727235

File tree

5 files changed

+57
-94
lines changed

5 files changed

+57
-94
lines changed

comps/embeddings/deployment/docker_compose/compose.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
include:
55
- ../../../third_parties/tei/deployment/docker_compose/compose.yaml
66
- ../../../third_parties/bridgetower/deployment/docker_compose/compose.yaml
7+
- ../../../third_parties/clip/deployment/docker_compose/compose_intel_cpu.yaml
78

89
x-multimodal-bridgetower-embedding-config: &multimodal-bridgetower-embedding-config
910
image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
@@ -64,7 +65,11 @@ services:
6465
no_proxy: ${no_proxy}
6566
http_proxy: ${http_proxy}
6667
https_proxy: ${https_proxy}
68+
CLIP_EMBEDDING_ENDPOINT: ${CLIP_EMBEDDING_ENDPOINT}
6769
EMBEDDING_COMPONENT_NAME: "OPEA_CLIP_EMBEDDING"
70+
depends_on:
71+
multimodal-clip-embedding:
72+
condition: service_healthy
6873
restart: unless-stopped
6974

7075
multimodal-bridgetower-embedding-server:
Lines changed: 29 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
# Copyright (C) 2024 Intel Corporation
22
# SPDX-License-Identifier: Apache-2.0
33

4+
import asyncio
45
import os
5-
from typing import List, Union
66

7-
import torch
8-
import torch.nn as nn
9-
from einops import rearrange
10-
from transformers import AutoProcessor, AutoTokenizer, CLIPModel
7+
import requests
118

129
from comps import CustomLogger, OpeaComponent, OpeaComponentRegistry, ServiceType
1310
from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse, EmbeddingResponseData
@@ -16,51 +13,6 @@
1613
logflag = os.getenv("LOGFLAG", False)
1714

1815

19-
model_name = "openai/clip-vit-base-patch32"
20-
21-
clip = CLIPModel.from_pretrained(model_name)
22-
processor = AutoProcessor.from_pretrained(model_name)
23-
tokenizer = AutoTokenizer.from_pretrained(model_name)
24-
25-
26-
class vCLIP(nn.Module):
27-
def __init__(self, cfg):
28-
super().__init__()
29-
30-
self.num_frm = cfg["num_frm"]
31-
self.model_name = cfg["model_name"]
32-
33-
def embed_query(self, texts):
34-
"""Input is list of texts."""
35-
text_inputs = tokenizer(texts, padding=True, return_tensors="pt")
36-
text_features = clip.get_text_features(**text_inputs)
37-
return text_features
38-
39-
def get_embedding_length(self):
40-
text_features = self.embed_query("sample_text")
41-
return text_features.shape[1]
42-
43-
def get_image_embeddings(self, images):
44-
"""Input is list of images."""
45-
image_inputs = processor(images=images, return_tensors="pt")
46-
image_features = clip.get_image_features(**image_inputs)
47-
return image_features
48-
49-
def get_video_embeddings(self, frames_batch):
50-
"""Input is list of list of frames in video."""
51-
self.batch_size = len(frames_batch)
52-
vid_embs = []
53-
for frames in frames_batch:
54-
frame_embeddings = self.get_image_embeddings(frames)
55-
frame_embeddings = rearrange(frame_embeddings, "(b n) d -> b n d", b=len(frames_batch))
56-
# Normalize, mean aggregate and return normalized video_embeddings
57-
frame_embeddings = frame_embeddings / frame_embeddings.norm(dim=-1, keepdim=True)
58-
video_embeddings = frame_embeddings.mean(dim=1)
59-
video_embeddings = video_embeddings / video_embeddings.norm(dim=-1, keepdim=True)
60-
vid_embs.append(video_embeddings)
61-
return torch.cat(vid_embs, dim=0)
62-
63-
6416
@OpeaComponentRegistry.register("OPEA_CLIP_EMBEDDING")
6517
class OpeaClipEmbedding(OpeaComponent):
6618
"""A specialized embedding component derived from OpeaComponent for CLIP embedding services.
@@ -74,7 +26,7 @@ class OpeaClipEmbedding(OpeaComponent):
7426

7527
def __init__(self, name: str, description: str, config: dict = None):
7628
super().__init__(name, ServiceType.EMBEDDING.name.lower(), description, config)
77-
self.embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 4})
29+
self.base_url = os.getenv("CLIP_EMBEDDING_ENDPOINT", "http://localhost:6990")
7830

7931
health_status = self.check_health()
8032
if not health_status:
@@ -89,46 +41,38 @@ async def invoke(self, input: EmbeddingRequest) -> EmbeddingResponse:
8941
Returns:
9042
EmbeddingResponse: The response in OpenAI embedding format, including embeddings, model, and usage information.
9143
"""
92-
# Parse input according to the EmbeddingRequest format
93-
if isinstance(input.input, str):
94-
texts = [input.input.replace("\n", " ")]
95-
elif isinstance(input.input, list):
96-
if all(isinstance(item, str) for item in input.input):
97-
texts = [text.replace("\n", " ") for text in input.input]
98-
else:
99-
raise ValueError("Invalid input format: Only string or list of strings are supported.")
100-
else:
101-
raise TypeError("Unsupported input type: input must be a string or list of strings.")
102-
embed_vector = self.get_embeddings(texts)
103-
if input.dimensions is not None:
104-
embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))]
105-
106-
# for standard openai embedding format
107-
res = EmbeddingResponse(
108-
data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))]
109-
)
110-
return res
44+
json_payload = input.model_dump()
45+
try:
46+
response = await asyncio.to_thread(
47+
requests.post,
48+
f"{self.base_url}/v1/embeddings",
49+
headers={"Content-Type": "application/json"},
50+
json=json_payload,
51+
)
52+
response.raise_for_status()
53+
response_json = response.json()
54+
55+
return EmbeddingResponse(
56+
data=[EmbeddingResponseData(**item) for item in response_json.get("data", [])],
57+
model=response_json.get("model", input.model),
58+
usage=response_json.get("usage", {}),
59+
)
60+
except requests.RequestException as e:
61+
raise RuntimeError(f"Failed to invoke embedding service: {str(e)}")
11162

11263
def check_health(self) -> bool:
11364
"""Checks if the embedding model is healthy.
11465
11566
Returns:
11667
bool: True if the embedding model is initialized, False otherwise.
11768
"""
118-
if self.embeddings:
69+
try:
70+
_ = requests.post(
71+
f"{self.base_url}/v1/embeddings",
72+
headers={"Content-Type": "application/json"},
73+
json={"input": "health check"},
74+
)
75+
11976
return True
120-
else:
77+
except requests.RequestException as e:
12178
return False
122-
123-
def get_embeddings(self, text: Union[str, List[str]]) -> List[List[float]]:
124-
"""Generates embeddings for input text.
125-
126-
Args:
127-
text (Union[str, List[str]]): Input text or list of texts.
128-
129-
Returns:
130-
List[List[float]]: List of embedding vectors.
131-
"""
132-
texts = [text] if isinstance(text, str) else text
133-
embed_vector = self.embeddings.embed_query(texts).tolist()
134-
return embed_vector

comps/embeddings/src/requirements.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
aiohttp
22
docarray
3-
einops
43
fastapi
54
huggingface_hub
6-
open-clip-torch
75
openai
86
opentelemetry-api
97
opentelemetry-exporter-otlp
@@ -13,5 +11,4 @@ predictionguard==2.2.1
1311
prometheus-fastapi-instrumentator
1412
PyYAML
1513
shortuuid
16-
transformers
1714
uvicorn

comps/third_parties/clip/deployment/docker_compose/compose_intel_cpu.yaml

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
11
# Copyright (C) 2024 Intel Corporation
22
# SPDX-License-Identifier: Apache-2.0
33

4-
version: "3.8"
5-
64
services:
7-
embedding:
5+
multimodal-clip-embedding:
86
image: opea/embedding-multimodal-clip:latest
9-
container_name: embedding-multimodal-server
7+
container_name: multimodal-clip-embedding-server
108
ports:
11-
- "6000:6000"
9+
- "${MULTIMODAL_CLIP_EMBEDDER_PORT:-6990}:6990"
1210
ipc: host
1311
environment:
1412
no_proxy: ${no_proxy}
1513
http_proxy: ${http_proxy}
1614
https_proxy: ${https_proxy}
15+
healthcheck:
16+
test: ["CMD-SHELL", "sleep 30 && exit 0"]
17+
interval: 1s
18+
timeout: 35s
19+
retries: 1
1720
restart: unless-stopped
1821

1922
networks:

tests/embeddings/test_embeddings_clip.sh

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,25 @@ function build_docker_images() {
1717
else
1818
echo "opea/embedding built successful"
1919
fi
20+
21+
cd $WORKPATH
22+
echo $(pwd)
23+
docker build --no-cache -t opea/embedding-multimodal-clip:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/clip/src/Dockerfile .
24+
25+
if [ $? -ne 0 ]; then
26+
echo "opea/embedding-multimodal-clip built fail"
27+
exit 1
28+
else
29+
echo "opea/embedding-multimodal-clip built successful"
30+
fi
2031
}
2132

2233
function start_service() {
2334
export TAG=comps
2435
export host_ip=${ip_address}
2536
export EMBEDDER_PORT=10203
37+
export MULTIMODAL_CLIP_EMBEDDER_PORT=10204
38+
export CLIP_EMBEDDING_ENDPOINT=http://${host_ip}:${MULTIMODAL_CLIP_EMBEDDER_PORT}
2639
service_name="clip-embedding-server"
2740
cd $WORKPATH
2841
cd comps/embeddings/deployment/docker_compose/
@@ -42,6 +55,7 @@ function validate_service() {
4255
else
4356
echo "Result wrong. Received was $result"
4457
docker logs clip-embedding-server
58+
docker logs multimodal-clip-embedding-server
4559
exit 1
4660
fi
4761
}
@@ -57,7 +71,7 @@ function validate_microservice() {
5771
}
5872

5973
function stop_docker() {
60-
cid=$(docker ps -aq --filter "name=clip-embedding-server*")
74+
cid=$(docker ps -aq --filter "name=clip-embedding-server*" --filter "name=multimodal-clip-embedding-server*")
6175
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
6276
}
6377

0 commit comments

Comments
 (0)