Skip to content

Commit 391c4a5

Browse files
Prediction Guard LLM component (#674)
Signed-off-by: sharanshirodkar7 <[email protected]>
1 parent 191061b commit 391c4a5

File tree

9 files changed

+269
-0
lines changed

9 files changed

+269
-0
lines changed

.github/workflows/docker/compose/llms-compose-cd.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,7 @@ services:
2323
build:
2424
dockerfile: comps/llms/text-generation/vllm/llama_index/dependency/Dockerfile.intel_hpu
2525
image: ${REGISTRY:-opea}/llm-vllm-llamaindex-hpu:${TAG:-latest}
26+
llm-predictionguard:
27+
build:
28+
dockerfile: comps/llms/text-generation/predictionguard/Dockerfile
29+
image: ${REGISTRY:-opea}/llm-textgen-predictionguard:${TAG:-latest}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Copyright (C) 2024 Prediction Guard, Inc.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
FROM python:3.11-slim
5+
6+
COPY comps /home/comps
7+
8+
RUN pip install --no-cache-dir --upgrade pip && \
9+
pip install --no-cache-dir -r /home/comps/llms/text-generation/predictionguard/requirements.txt
10+
11+
ENV PYTHONPATH=$PYTHONPATH:/home
12+
13+
WORKDIR /home/comps/llms/text-generation/predictionguard
14+
15+
ENTRYPOINT ["bash", "entrypoint.sh"]
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Introduction
2+
3+
[Prediction Guard](https://docs.predictionguard.com) allows you to utilize hosted open access LLMs, LVMs, and embedding functionality with seamlessly integrated safeguards. In addition to providing a scalable access to open models, Prediction Guard allows you to configure factual consistency checks, toxicity filters, PII filters, and prompt injection blocking. Join the [Prediction Guard Discord channel](https://discord.gg/TFHgnhAFKd) and request an API key to get started.
4+
5+
# Get Started
6+
7+
## Build Docker Image
8+
9+
```bash
10+
cd ../../..
11+
docker build -t opea/llm-textgen-predictionguard:latest -f comps/llms/text-generation/predictionguard/Dockerfile .
12+
```
13+
14+
## Run the Predictionguard Microservice
15+
16+
```bash
17+
docker run -d -p 9000:9000 -e PREDICTIONGUARD_API_KEY=$PREDICTIONGUARD_API_KEY --name llm-textgen-predictionguard opea/llm-textgen-predictionguard:latest
18+
```
19+
20+
# Consume the Prediction Guard Microservice
21+
22+
See the [Prediction Guard docs](https://docs.predictionguard.com/) for available model options.
23+
24+
## Without streaming
25+
26+
```bash
27+
curl -X POST http://localhost:9000/v1/chat/completions \
28+
-H "Content-Type: application/json" \
29+
-d '{
30+
"model": "Hermes-2-Pro-Llama-3-8B",
31+
"query": "Tell me a joke.",
32+
"max_new_tokens": 100,
33+
"temperature": 0.7,
34+
"top_p": 0.9,
35+
"top_k": 50,
36+
"stream": false
37+
}'
38+
```
39+
40+
## With streaming
41+
42+
```bash
43+
curl -N -X POST http://localhost:9000/v1/chat/completions \
44+
-H "Content-Type: application/json" \
45+
-d '{
46+
"model": "Hermes-2-Pro-Llama-3-8B",
47+
"query": "Tell me a joke.",
48+
"max_new_tokens": 100,
49+
"temperature": 0.7,
50+
"top_p": 0.9,
51+
"top_k": 50,
52+
"stream": true
53+
}'
54+
```
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Copyright (C) 2024 Prediction Guard, Inc.
2+
# SPDX-License-Identifier: Apache-2.0
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright (C) 2024 Prediction Guard, Inc
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
services:
5+
llm:
6+
image: opea/llm-textgen-predictionguard:latest
7+
container_name: llm-textgen-predictionguard
8+
ports:
9+
- "9000:9000"
10+
ipc: host
11+
environment:
12+
no_proxy: ${no_proxy}
13+
http_proxy: ${http_proxy}
14+
https_proxy: ${https_proxy}
15+
PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY}
16+
restart: unless-stopped
17+
18+
networks:
19+
default:
20+
driver: bridge
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright (C) 2024 Prediction Guard, Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
#pip --no-cache-dir install -r requirements-runtime.txt
7+
8+
python llm_predictionguard.py
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Copyright (C) 2024 Prediction Guard, Inc.
2+
# SPDX-License-Identified: Apache-2.0
3+
4+
5+
import time
6+
7+
from fastapi import FastAPI, HTTPException
8+
from fastapi.responses import StreamingResponse
9+
from predictionguard import PredictionGuard
10+
from pydantic import BaseModel
11+
12+
from comps import (
13+
GeneratedDoc,
14+
LLMParamsDoc,
15+
ServiceType,
16+
opea_microservices,
17+
register_microservice,
18+
register_statistics,
19+
statistics_dict,
20+
)
21+
22+
client = PredictionGuard()
23+
app = FastAPI()
24+
25+
26+
@register_microservice(
27+
name="opea_service@llm_predictionguard",
28+
service_type=ServiceType.LLM,
29+
endpoint="/v1/chat/completions",
30+
host="0.0.0.0",
31+
port=9000,
32+
)
33+
@register_statistics(names=["opea_service@llm_predictionguard"])
34+
def llm_generate(input: LLMParamsDoc):
35+
start = time.time()
36+
37+
messages = [
38+
{
39+
"role": "system",
40+
"content": "You are a helpful assistant. Your goal is to provide accurate, detailed, and safe responses to the user's queries.",
41+
},
42+
{"role": "user", "content": input.query},
43+
]
44+
45+
if input.streaming:
46+
47+
async def stream_generator():
48+
chat_response = ""
49+
for res in client.chat.completions.create(
50+
model=input.model,
51+
messages=messages,
52+
max_tokens=input.max_new_tokens,
53+
temperature=input.temperature,
54+
top_p=input.top_p,
55+
top_k=input.top_k,
56+
stream=True,
57+
):
58+
if "choices" in res["data"] and "delta" in res["data"]["choices"][0]:
59+
delta_content = res["data"]["choices"][0]["delta"]["content"]
60+
chat_response += delta_content
61+
yield f"data: {delta_content}\n\n"
62+
else:
63+
yield "data: [DONE]\n\n"
64+
65+
statistics_dict["opea_service@llm_predictionguard"].append_latency(time.time() - start, None)
66+
return StreamingResponse(stream_generator(), media_type="text/event-stream")
67+
else:
68+
try:
69+
response = client.chat.completions.create(
70+
model=input.model,
71+
messages=messages,
72+
max_tokens=input.max_new_tokens,
73+
temperature=input.temperature,
74+
top_p=input.top_p,
75+
top_k=input.top_k,
76+
)
77+
response_text = response["choices"][0]["message"]["content"]
78+
except Exception as e:
79+
raise HTTPException(status_code=500, detail=str(e))
80+
81+
statistics_dict["opea_service@llm_predictionguard"].append_latency(time.time() - start, None)
82+
return GeneratedDoc(text=response_text, prompt=input.query)
83+
84+
85+
if __name__ == "__main__":
86+
opea_microservices["opea_service@llm_predictionguard"].start()
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
aiohttp
2+
docarray
3+
fastapi
4+
opentelemetry-api
5+
opentelemetry-exporter-otlp
6+
opentelemetry-sdk
7+
Pillow
8+
predictionguard
9+
prometheus-fastapi-instrumentator
10+
shortuuid
11+
transformers
12+
uvicorn
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#!/bin/bash
2+
# Copyright (C) 2024 Prediction Guard, Inc.
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
set -x # Print commands and their arguments as they are executed
6+
7+
WORKPATH=$(dirname "$PWD")
8+
ip_address=$(hostname -I | awk '{print $1}') # Adjust to a more reliable command
9+
if [ -z "$ip_address" ]; then
10+
ip_address="localhost" # Default to localhost if IP address is empty
11+
fi
12+
13+
function build_docker_images() {
14+
cd $WORKPATH
15+
echo $(pwd)
16+
docker build --no-cache -t opea/llm-pg:comps -f comps/llms/text-generation/predictionguard/Dockerfile .
17+
if [ $? -ne 0 ]; then
18+
echo "opea/llm-pg built failed"
19+
exit 1
20+
else
21+
echo "opea/llm-pg built successfully"
22+
fi
23+
}
24+
25+
function start_service() {
26+
llm_service_port=9000
27+
unset http_proxy
28+
docker run -d --name=test-comps-llm-pg-server \
29+
-e http_proxy= -e https_proxy= \
30+
-e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \
31+
-p 9000:9000 --ipc=host opea/llm-pg:comps
32+
sleep 60 # Sleep for 1 minute to allow the service to start
33+
}
34+
35+
function validate_microservice() {
36+
llm_service_port=9000
37+
result=$(http_proxy="" curl http://${ip_address}:${llm_service_port}/v1/chat/completions \
38+
-X POST \
39+
-d '{"model": "Hermes-2-Pro-Llama-3-8B", "query": "What is AI?", "streaming": false, "max_new_tokens": 100, "temperature": 0.7, "top_p": 1.0, "top_k": 50}' \
40+
-H 'Content-Type: application/json')
41+
42+
if [[ $result == *"text"* ]]; then
43+
echo "Service response is correct."
44+
else
45+
echo "Result wrong. Received was $result"
46+
docker logs test-comps-llm-pg-server
47+
exit 1
48+
fi
49+
}
50+
51+
function stop_docker() {
52+
cid=$(docker ps -aq --filter "name=test-comps-llm-pg-*")
53+
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
54+
}
55+
56+
function main() {
57+
stop_docker
58+
59+
build_docker_images
60+
start_service
61+
62+
validate_microservice
63+
64+
stop_docker
65+
echo y | docker system prune
66+
}
67+
68+
main

0 commit comments

Comments
 (0)