Prediction Guard LLM component (#674)

sharanshirodkar7 · web-flow · commit 391c4a580144 · 2024-09-17T21:57:06.000+08:00
Signed-off-by: sharanshirodkar7 &lt;ssharanshirodkar7@gmail.com&gt;
diff --git a/.github/workflows/docker/compose/llms-compose-cd.yaml b/.github/workflows/docker/compose/llms-compose-cd.yaml
@@ -23,3 +23,7 @@ services:
     build:
       dockerfile: comps/llms/text-generation/vllm/llama_index/dependency/Dockerfile.intel_hpu
     image: ${REGISTRY:-opea}/llm-vllm-llamaindex-hpu:${TAG:-latest}
+  llm-predictionguard:
+    build:
+      dockerfile: comps/llms/text-generation/predictionguard/Dockerfile
+    image: ${REGISTRY:-opea}/llm-textgen-predictionguard:${TAG:-latest}
diff --git a/comps/llms/text-generation/predictionguard/Dockerfile b/comps/llms/text-generation/predictionguard/Dockerfile
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Prediction Guard, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+COPY comps /home/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/comps/llms/text-generation/predictionguard/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home
+
+WORKDIR /home/comps/llms/text-generation/predictionguard
+
+ENTRYPOINT ["bash", "entrypoint.sh"]
diff --git a/comps/llms/text-generation/predictionguard/README.md b/comps/llms/text-generation/predictionguard/README.md
@@ -0,0 +1,54 @@
+# Introduction
+
+[Prediction Guard](https://docs.predictionguard.com) allows you to utilize hosted open access LLMs, LVMs, and embedding functionality with seamlessly integrated safeguards. In addition to providing a scalable access to open models, Prediction Guard allows you to configure factual consistency checks, toxicity filters, PII filters, and prompt injection blocking. Join the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd) and request an API key to get started.
+
+# Get Started
+
+## Build Docker Image
+
+```bash
+cd ../../..
+docker build -t opea/llm-textgen-predictionguard:latest -f comps/llms/text-generation/predictionguard/Dockerfile .
+```
+
+## Run the Predictionguard Microservice
+
+```bash
+docker run -d -p 9000:9000 -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY  --name llm-textgen-predictionguard opea/llm-textgen-predictionguard:latest
+```
+
+# Consume the Prediction Guard Microservice
+
+See the [Prediction Guard docs](https://docs.predictionguard.com/) for available model options.
+
+## Without streaming
+
+```bash
+curl -X POST http://localhost:9000/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+        "model": "Hermes-2-Pro-Llama-3-8B",
+        "query": "Tell me a joke.",
+        "max_new_tokens": 100,
+        "temperature": 0.7,
+        "top_p": 0.9,
+        "top_k": 50,
+        "stream": false
+    }'
+```
+
+## With streaming
+
+```bash
+curl -N -X POST http://localhost:9000/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+        "model": "Hermes-2-Pro-Llama-3-8B",
+        "query": "Tell me a joke.",
+        "max_new_tokens": 100,
+        "temperature": 0.7,
+        "top_p": 0.9,
+        "top_k": 50,
+        "stream": true
+    }'
+```
diff --git a/comps/llms/text-generation/predictionguard/__init__.py b/comps/llms/text-generation/predictionguard/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Prediction Guard, Inc.
+# SPDX-License-Identifier: Apache-2.0
diff --git a/comps/llms/text-generation/predictionguard/docker_compose_llm.yaml b/comps/llms/text-generation/predictionguard/docker_compose_llm.yaml
@@ -0,0 +1,20 @@
+# Copyright (C) 2024 Prediction Guard, Inc
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  llm:
+    image: opea/llm-textgen-predictionguard:latest
+    container_name: llm-textgen-predictionguard
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/comps/llms/text-generation/predictionguard/entrypoint.sh b/comps/llms/text-generation/predictionguard/entrypoint.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Prediction Guard, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+#pip --no-cache-dir install -r requirements-runtime.txt
+
+python llm_predictionguard.py
diff --git a/comps/llms/text-generation/predictionguard/llm_predictionguard.py b/comps/llms/text-generation/predictionguard/llm_predictionguard.py
@@ -0,0 +1,86 @@
+# Copyright (C) 2024 Prediction Guard, Inc.
+# SPDX-License-Identified: Apache-2.0
+
+
+import time
+
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import StreamingResponse
+from predictionguard import PredictionGuard
+from pydantic import BaseModel
+
+from comps import (
+    GeneratedDoc,
+    LLMParamsDoc,
+    ServiceType,
+    opea_microservices,
+    register_microservice,
+    register_statistics,
+    statistics_dict,
+)
+
+client = PredictionGuard()
+app = FastAPI()
+
+
+@register_microservice(
+    name="opea_service@llm_predictionguard",
+    service_type=ServiceType.LLM,
+    endpoint="/v1/chat/completions",
+    host="0.0.0.0",
+    port=9000,
+)
+@register_statistics(names=["opea_service@llm_predictionguard"])
+def llm_generate(input: LLMParamsDoc):
+    start = time.time()
+
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful assistant. Your goal is to provide accurate, detailed, and safe responses to the user's queries.",
+        },
+        {"role": "user", "content": input.query},
+    ]
+
+    if input.streaming:
+
+        async def stream_generator():
+            chat_response = ""
+            for res in client.chat.completions.create(
+                model=input.model,
+                messages=messages,
+                max_tokens=input.max_new_tokens,
+                temperature=input.temperature,
+                top_p=input.top_p,
+                top_k=input.top_k,
+                stream=True,
+            ):
+                if "choices" in res["data"] and "delta" in res["data"]["choices"][0]:
+                    delta_content = res["data"]["choices"][0]["delta"]["content"]
+                    chat_response += delta_content
+                    yield f"data: {delta_content}\n\n"
+                else:
+                    yield "data: [DONE]\n\n"
+
+        statistics_dict["opea_service@llm_predictionguard"].append_latency(time.time() - start, None)
+        return StreamingResponse(stream_generator(), media_type="text/event-stream")
+    else:
+        try:
+            response = client.chat.completions.create(
+                model=input.model,
+                messages=messages,
+                max_tokens=input.max_new_tokens,
+                temperature=input.temperature,
+                top_p=input.top_p,
+                top_k=input.top_k,
+            )
+            response_text = response["choices"][0]["message"]["content"]
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
+
+        statistics_dict["opea_service@llm_predictionguard"].append_latency(time.time() - start, None)
+        return GeneratedDoc(text=response_text, prompt=input.query)
+
+
+if __name__ == "__main__":
+    opea_microservices["opea_service@llm_predictionguard"].start()
diff --git a/comps/llms/text-generation/predictionguard/requirements.txt b/comps/llms/text-generation/predictionguard/requirements.txt
@@ -0,0 +1,12 @@
+aiohttp
+docarray
+fastapi
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+Pillow
+predictionguard
+prometheus-fastapi-instrumentator
+shortuuid
+transformers
+uvicorn
diff --git a/tests/llms/test_llms_text-generation_predictionguard.sh b/tests/llms/test_llms_text-generation_predictionguard.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# Copyright (C) 2024 Prediction Guard, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+set -x  # Print commands and their arguments as they are executed
+
+WORKPATH=$(dirname "$PWD")
+ip_address=$(hostname -I | awk '{print $1}')  # Adjust to a more reliable command
+if [ -z "$ip_address" ]; then
+    ip_address="localhost"  # Default to localhost if IP address is empty
+fi
+
+function build_docker_images() {
+    cd $WORKPATH
+    echo $(pwd)
+    docker build --no-cache -t opea/llm-pg:comps -f comps/llms/text-generation/predictionguard/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/llm-pg built failed"
+        exit 1
+    else
+        echo "opea/llm-pg built successfully"
+    fi
+}
+
+function start_service() {
+    llm_service_port=9000
+    unset http_proxy
+    docker run -d --name=test-comps-llm-pg-server \
+        -e http_proxy= -e https_proxy= \
+        -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \
+        -p 9000:9000 --ipc=host opea/llm-pg:comps
+    sleep 60  # Sleep for 1 minute to allow the service to start
+}
+
+function validate_microservice() {
+    llm_service_port=9000
+    result=$(http_proxy="" curl http://${ip_address}:${llm_service_port}/v1/chat/completions \
+        -X POST \
+        -d '{"model": "Hermes-2-Pro-Llama-3-8B", "query": "What is AI?", "streaming": false, "max_new_tokens": 100, "temperature": 0.7, "top_p": 1.0, "top_k": 50}' \
+        -H 'Content-Type: application/json')
+
+    if [[ $result == *"text"* ]]; then
+        echo "Service response is correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-llm-pg-server
+        exit 1
+    fi
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=test-comps-llm-pg-*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+    stop_docker
+
+    build_docker_images
+    start_service
+
+    validate_microservice
+
+    stop_docker
+    echo y | docker system prune
+}
+
+main

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# Copyright (C) 2024 Prediction Guard, Inc.`
	`2`	`+# SPDX-License-Identifier: Apache-2.0`