Prediction Guard embeddings component (#675)

sharanshirodkar7 · web-flow · commit 191061b64200 · 2024-09-17T21:33:19.000+08:00
* added files for PG embeddingso component

Signed-off-by: sharanshirodkar7 &lt;ssharanshirodkar7@gmail.com&gt;

* added package

Signed-off-by: sharanshirodkar7 &lt;ssharanshirodkar7@gmail.com&gt;

* fixed dockerfile link

Signed-off-by: sharanshirodkar7 &lt;ssharanshirodkar7@gmail.com&gt;

* Fix pre-commit issues: end-of-file, requirements.txt, trailing whitespace, imports, and formatting

Signed-off-by: sharanshirodkar7 &lt;ssharanshirodkar7@gmail.com&gt;

* added package

Signed-off-by: sharanshirodkar7 &lt;ssharanshirodkar7@gmail.com&gt;

* added package

Signed-off-by: sharanshirodkar7 &lt;ssharanshirodkar7@gmail.com&gt;

* fixed embedoc call

Signed-off-by: sharanshirodkar7 &lt;ssharanshirodkar7@gmail.com&gt;

* file structure updated to latest

Signed-off-by: sharanshirodkar7 &lt;ssharanshirodkar7@gmail.com&gt;

* Fix pre-commit issues: end-of-file, requirements.txt, trailing whitespace, imports, and formatting

Signed-off-by: sharanshirodkar7 &lt;ssharanshirodkar7@gmail.com&gt;

* added package

Signed-off-by: sharanshirodkar7 &lt;ssharanshirodkar7@gmail.com&gt;

---------

Signed-off-by: sharanshirodkar7 &lt;ssharanshirodkar7@gmail.com&gt;
diff --git a/.github/workflows/docker/compose/embeddings-compose-cd.yaml b/.github/workflows/docker/compose/embeddings-compose-cd.yaml
@@ -26,3 +26,7 @@ services:
     build:
       dockerfile: comps/embeddings/multimodal/multimodal_langchain/Dockerfile
     image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest}
+  embedding-predictionguard:
+    build:
+      dockerfile: comps/embeddings/predictionguard/Dockerfile
+    image: ${REGISTRY:-opea}/embedding-predictionguard:${TAG:-latest}
diff --git a/comps/embeddings/README.md b/comps/embeddings/README.md
@@ -31,3 +31,7 @@ For details, please refer to this [readme](multimodal/README.md).
 ## Embeddings Microservice with Multimodal Clip
 
 For details, please refer to this [readme](multimodal_clip/README.md).
+
+## Embeddings Microservice with Prediction Guard
+
+For details, please refer to this [readme](predictionguard/README.md).
diff --git a/comps/embeddings/predictionguard/Dockerfile b/comps/embeddings/predictionguard/Dockerfile
@@ -0,0 +1,16 @@
+# Copyright (C) 2024 Prediction Guard, Inc
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+COPY comps /home/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/comps/embeddings/predictionguard/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home
+
+WORKDIR /home/comps/embeddings/predictionguard
+
+ENTRYPOINT ["python", "embedding_predictionguard.py"]
+
diff --git a/comps/embeddings/predictionguard/README.md b/comps/embeddings/predictionguard/README.md
@@ -0,0 +1,39 @@
+# Embedding Generation Prediction Guard Microservice
+
+[Prediction Guard](https://docs.predictionguard.com) allows you to utilize hosted open access LLMs, LVMs, and embedding functionality with seamlessly integrated safeguards. In addition to providing a scalable access to open models, Prediction Guard allows you to configure factual consistency checks, toxicity filters, PII filters, and prompt injection blocking. Join the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd) and request an API key to get started.
+
+This embedding microservice is designed to efficiently convert text into vectorized embeddings using the [BridgeTower model](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-itc). Thus, it is ideal for both RAG or semantic search applications.
+
+**Note** - The BridgeTower model implemented in Prediction Guard can actually embed text, images, or text + images (jointly). For now this service only embeds text, but a follow on contribution will enable the multimodal functionality.
+
+# 🚀 Start Microservice with Docker
+
+## Setup Environment Variables
+
+Setup the following environment variables first
+
+```bash
+export PREDICTIONGUARD_API_KEY=${your_predictionguard_api_key}
+```
+
+## Build Docker Images
+
+```bash
+cd ../../..
+docker build -t opea/embedding-predictionguard:latest -f comps/embeddings/predictionguard/Dockerfile .
+```
+
+## Start Service
+
+```bash
+docker run -d --name="embedding-predictionguard" -p 6000:6000 -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY opea/embedding-predictionguard:latest
+```
+
+# 🚀 Consume Embeddings Service
+
+```bash
+curl localhost:6000/v1/embeddings \
+     -X POST \
+     -d '{"text":"Hello, world!"}' \
+     -H 'Content-Type: application/json'
+```
diff --git a/comps/embeddings/predictionguard/docker_compose_embedding.yaml b/comps/embeddings/predictionguard/docker_compose_embedding.yaml
@@ -0,0 +1,21 @@
+# Copyright (C) 2024 Prediction Guard, Inc
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  embedding:
+    image: opea/embedding-predictionguard:latest
+    container_name: embedding-predictionguard
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      PG_EMBEDDING_MODEL_NAME: ${PG_EMBEDDING_MODEL_NAME}
+      PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/comps/embeddings/predictionguard/embedding_predictionguard.py b/comps/embeddings/predictionguard/embedding_predictionguard.py
@@ -0,0 +1,47 @@
+# Copyright (C) 2024 Prediction Guard, Inc.
+# SPDX-License-Identified: Apache-2.0
+
+
+import os
+import time
+
+from predictionguard import PredictionGuard
+
+from comps import (
+    EmbedDoc,
+    ServiceType,
+    TextDoc,
+    opea_microservices,
+    register_microservice,
+    register_statistics,
+    statistics_dict,
+)
+
+# Initialize Prediction Guard client
+client = PredictionGuard()
+
+
+@register_microservice(
+    name="opea_service@embedding_predictionguard",
+    service_type=ServiceType.EMBEDDING,
+    endpoint="/v1/embeddings",
+    host="0.0.0.0",
+    port=6000,
+    input_datatype=TextDoc,
+    output_datatype=EmbedDoc,
+)
+@register_statistics(names=["opea_service@embedding_predictionguard"])
+def embedding(input: TextDoc) -> EmbedDoc:
+    start = time.time()
+    response = client.embeddings.create(model=pg_embedding_model_name, input=[{"text": input.text}])
+    embed_vector = response["data"][0]["embedding"]
+    embed_vector = embed_vector[:512]  # Keep only the first 512 elements
+    res = EmbedDoc(text=input.text, embedding=embed_vector)
+    statistics_dict["opea_service@embedding_predictionguard"].append_latency(time.time() - start, None)
+    return res
+
+
+if __name__ == "__main__":
+    pg_embedding_model_name = os.getenv("PG_EMBEDDING_MODEL_NAME", "bridgetower-large-itm-mlm-itc")
+    print("Prediction Guard Embedding initialized.")
+    opea_microservices["opea_service@embedding_predictionguard"].start()
diff --git a/comps/embeddings/predictionguard/requirements.txt b/comps/embeddings/predictionguard/requirements.txt
@@ -0,0 +1,12 @@
+aiohttp
+docarray
+fastapi
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+Pillow
+predictionguard==2.2.1
+prometheus-fastapi-instrumentator
+PyYAML
+shortuuid
+uvicorn
diff --git a/tests/embeddings/test_embeddings_predictionguard.sh b/tests/embeddings/test_embeddings_predictionguard.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -x
+
+WORKPATH=$(dirname "$PWD")
+ip_address=$(hostname -I | awk '{print $1}')  # Adjust to a more reliable command
+if [ -z "$ip_address" ]; then
+    ip_address="localhost"  # Default to localhost if IP address is empty
+fi
+
+function build_docker_images() {
+    cd $WORKPATH
+    echo $(pwd)
+    docker build --no-cache -t opea/embedding-pg:comps -f comps/embeddings/predictionguard/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/embedding-pg built fail"
+        exit 1
+    else
+        echo "opea/embedding-pg built successfully"
+    fi
+}
+
+function start_service() {
+    tei_service_port=6000
+    unset http_proxy
+    docker run -d --name=test-comps-embedding-pg-server \
+    -e http_proxy= -e https_proxy= \
+    -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \
+    -p 6000:6000 --ipc=host opea/embedding-pg:comps
+    sleep 60  # Sleep for 1 minute to allow the service to start
+}
+
+function validate_microservice() {
+    tei_service_port=6000
+    result=$(http_proxy="" curl http://${ip_address}:${tei_service_port}/v1/embeddings \
+        -X POST \
+        -d '{"text":"What is Deep Learning?"}' \
+        -H 'Content-Type: application/json')
+
+    # Check for a proper response format
+    if [[ $result == *"embedding"* ]]; then
+        echo "Result correct."
+    elif [[ $result == *"error"* || $result == *"detail"* ]]; then
+        echo "Result wrong. Error received was: $result"
+        docker logs test-comps-embedding-pg-server
+        exit 1
+    else
+        echo "Unexpected result format received was: $result"
+        docker logs test-comps-embedding-pg-server
+        exit 1
+    fi
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=test-comps-embedding-pg-*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+
+    stop_docker
+
+    build_docker_images
+    start_service
+
+    validate_microservice
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main