Skip to content

Commit 1513998

Browse files
tileintelsiddhivelankar23sjagtap1803pre-commit-ci[bot]
authored
Retriever and lvm update for multimodal rag on videos (#606)
* updates Signed-off-by: Tiep Le <[email protected]> * cosmetic Signed-off-by: siddhivelankar23 <[email protected]> * update redis schema Signed-off-by: siddhivelankar23 <[email protected]> * update multimodal config and docker compose retriever Signed-off-by: siddhivelankar23 <[email protected]> * update requirements Signed-off-by: siddhivelankar23 <[email protected]> * update retriever redis Signed-off-by: siddhivelankar23 <[email protected]> * multimodal retriever implementation Signed-off-by: siddhivelankar23 <[email protected]> * test for multimodal retriever Signed-off-by: siddhivelankar23 <[email protected]> * include prompt preparation for multimodal rag on videos application Signed-off-by: sjagtap1803 <[email protected]> * fix template Signed-off-by: sjagtap1803 <[email protected]> * add test for llava for mm_rag_on_videos Signed-off-by: sjagtap1803 <[email protected]> * update test Signed-off-by: sjagtap1803 <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix index not found Signed-off-by: sjagtap1803 <[email protected]> * add LVMSearchedMultimodalDoc Signed-off-by: sjagtap1803 <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove INDEX_SCHEMA Signed-off-by: siddhivelankar23 <[email protected]> * revise folder structure to comps/retrievers/langchain/redis_multimodal Signed-off-by: siddhivelankar23 <[email protected]> * update test Signed-off-by: siddhivelankar23 <[email protected]> * change port of redis to resolve CI test Signed-off-by: siddhivelankar23 <[email protected]> * update test Signed-off-by: siddhivelankar23 <[email protected]> * update lvms test Signed-off-by: siddhivelankar23 <[email protected]> --------- Signed-off-by: Tiep Le <[email protected]> Signed-off-by: siddhivelankar23 <[email protected]> Signed-off-by: sjagtap1803 <[email protected]> Co-authored-by: siddhivelankar23 <[email protected]> Co-authored-by: sjagtap1803 <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 90cc44f commit 1513998

15 files changed

+525
-8
lines changed

comps/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
LLMParamsDoc,
1414
SearchedDoc,
1515
SearchedMultimodalDoc,
16+
LVMSearchedMultimodalDoc,
1617
RerankedDoc,
1718
TextDoc,
1819
RAGASParams,

comps/cores/proto/docarray.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,24 @@ class SearchedMultimodalDoc(SearchedDoc):
107107
metadata: List[Dict[str, Any]]
108108

109109

110+
class LVMSearchedMultimodalDoc(SearchedMultimodalDoc):
111+
max_new_tokens: conint(ge=0, le=1024) = 512
112+
top_k: int = 10
113+
top_p: float = 0.95
114+
typical_p: float = 0.95
115+
temperature: float = 0.01
116+
streaming: bool = False
117+
repetition_penalty: float = 1.03
118+
chat_template: Optional[str] = Field(
119+
default=None,
120+
description=(
121+
"A template to use for this conversion. "
122+
"If this is not passed, the model's default chat template will be "
123+
"used instead. We recommend that the template contains {context} and {question} for multimodal-rag on videos."
124+
),
125+
)
126+
127+
110128
class GeneratedDoc(BaseDoc):
111129
text: str
112130
prompt: str

comps/lvms/lvm.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,16 @@
55
import json
66
import os
77
import time
8+
from typing import Union
89

910
import requests
11+
from langchain_core.prompts import PromptTemplate
12+
from template import ChatTemplate
1013

1114
from comps import (
1215
CustomLogger,
1316
LVMDoc,
17+
LVMSearchedMultimodalDoc,
1418
ServiceType,
1519
TextDoc,
1620
opea_microservices,
@@ -29,20 +33,41 @@
2933
endpoint="/v1/lvm",
3034
host="0.0.0.0",
3135
port=9399,
32-
input_datatype=LVMDoc,
33-
output_datatype=TextDoc,
3436
)
3537
@register_statistics(names=["opea_service@lvm"])
36-
async def lvm(request: LVMDoc):
38+
async def lvm(request: Union[LVMDoc, LVMSearchedMultimodalDoc]) -> TextDoc:
3739
if logflag:
3840
logger.info(request)
3941
start = time.time()
40-
img_b64_str = request.image
41-
prompt = request.prompt
42-
max_new_tokens = request.max_new_tokens
42+
if isinstance(request, LVMSearchedMultimodalDoc):
43+
if logflag:
44+
logger.info("[LVMSearchedMultimodalDoc ] input from retriever microservice")
45+
retrieved_metadatas = request.metadata
46+
img_b64_str = retrieved_metadatas[0]["b64_img_str"]
47+
initial_query = request.initial_query
48+
context = retrieved_metadatas[0]["transcript_for_inference"]
49+
prompt = initial_query
50+
if request.chat_template is None:
51+
prompt = ChatTemplate.generate_multimodal_rag_on_videos_prompt(initial_query, context)
52+
else:
53+
prompt_template = PromptTemplate.from_template(request.chat_template)
54+
input_variables = prompt_template.input_variables
55+
if sorted(input_variables) == ["context", "question"]:
56+
prompt = prompt_template.format(question=initial_query, context=context)
57+
else:
58+
logger.info(
59+
f"[ LVMSearchedMultimodalDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']"
60+
)
61+
max_new_tokens = request.max_new_tokens
62+
if logflag:
63+
logger.info(f"prompt generated for [LVMSearchedMultimodalDoc ] input from retriever microservice: {prompt}")
4364

44-
inputs = {"img_b64_str": img_b64_str, "prompt": prompt, "max_new_tokens": max_new_tokens}
65+
else:
66+
img_b64_str = request.image
67+
prompt = request.prompt
68+
max_new_tokens = request.max_new_tokens
4569

70+
inputs = {"img_b64_str": img_b64_str, "prompt": prompt, "max_new_tokens": max_new_tokens}
4671
# forward to the LLaVA server
4772
response = requests.post(url=f"{lvm_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None})
4873

comps/lvms/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ datasets
22
docarray[full]
33
fastapi
44
huggingface_hub
5+
langchain-core
56
opentelemetry-api
67
opentelemetry-exporter-otlp
78
opentelemetry-sdk

comps/lvms/template.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
5+
class ChatTemplate:
6+
7+
@staticmethod
8+
def generate_multimodal_rag_on_videos_prompt(question: str, context: str):
9+
template = """The transcript associated with the image is '{context}'. {question}"""
10+
return template.format(context=context, question=question)
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
# Retriever Microservice
2+
3+
This retriever microservice is a highly efficient search service designed for handling and retrieving embedding vectors from multimodal data. It operates by receiving an embedding vector as input and conducting a similarity search against vectors stored in a VectorDB database. Users must specify the VectorDB's URL and the index name, and the service searches within that index to find documents with the highest similarity to the input vector.
4+
5+
The service primarily utilizes similarity measures in vector space to rapidly retrieve contentually similar documents. The vector-based retrieval approach is particularly suited for handling large datasets, offering fast and accurate search results that significantly enhance the efficiency and quality of information retrieval.
6+
7+
Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial.
8+
9+
## 🚀1. Start Microservice with Python (Option 1)
10+
11+
To start the retriever microservice, you must first install the required python packages.
12+
13+
### 1.1 Install Requirements
14+
15+
```bash
16+
pip install -r requirements.txt
17+
```
18+
19+
### 1.2 Setup VectorDB Service
20+
21+
You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database.
22+
23+
As for Redis, you could start a docker container using the following commands.
24+
Remember to ingest data into it manually.
25+
26+
```bash
27+
docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-stack:7.2.0-v9
28+
```
29+
30+
### 1.3 Ingest images or video
31+
32+
Upload a video or images using the dataprep microservice, instructions can be found [here](https://github.com/opea-project/GenAIComps/tree/main/comps/dataprep/redis/multimodal_langchain/README.md).
33+
34+
### 1.4 Start Retriever Service
35+
36+
```bash
37+
python retriever_redis.py
38+
```
39+
40+
## 🚀2. Start Microservice with Docker (Option 2)
41+
42+
### 2.1 Setup Environment Variables
43+
44+
```bash
45+
export your_ip=$(hostname -I | awk '{print $1}')
46+
export REDIS_URL="redis://${your_ip}:6379"
47+
export INDEX_NAME=${your_index_name}
48+
```
49+
50+
### 2.2 Build Docker Image
51+
52+
```bash
53+
cd ../../../../
54+
docker build -t opea/multimodal-retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/redis_multimodal/docker/Dockerfile .
55+
```
56+
57+
To start a docker container, you have two options:
58+
59+
- A. Run Docker with CLI
60+
- B. Run Docker with Docker Compose
61+
62+
You can choose one as needed.
63+
64+
### 2.3 Run Docker with CLI (Option A)
65+
66+
```bash
67+
docker run -d --name="multimodal-retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/multimodal-retriever-redis:latest
68+
```
69+
70+
### 2.4 Run Docker with Docker Compose (Option B)
71+
72+
```bash
73+
cd docker
74+
docker compose -f docker_compose_retriever.yaml up -d
75+
```
76+
77+
## 🚀3. Consume Retriever Service
78+
79+
### 3.1 Consume Embedding Service
80+
81+
To consume the Retriever Microservice, you can generate a mock embedding vector of length 512 with Python.
82+
83+
```bash
84+
your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
85+
curl http://${your_ip}:7000/v1/multimodal_retrieval \
86+
-X POST \
87+
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \
88+
-H 'Content-Type: application/json'
89+
```
90+
91+
You can set the parameters for the retriever.
92+
93+
```bash
94+
your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
95+
curl http://localhost:7000/v1/multimodal_retrieval \
96+
-X POST \
97+
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity\", \"k\":4}" \
98+
-H 'Content-Type: application/json'
99+
```
100+
101+
```bash
102+
your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
103+
curl http://localhost:7000/v1/multimodal_retrieval \
104+
-X POST \
105+
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity_distance_threshold\", \"k\":4, \"distance_threshold\":1.0}" \
106+
-H 'Content-Type: application/json'
107+
```
108+
109+
```bash
110+
your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
111+
curl http://localhost:7000/v1/multimodal_retrieval \
112+
-X POST \
113+
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"similarity_score_threshold\", \"k\":4, \"score_threshold\":0.2}" \
114+
-H 'Content-Type: application/json'
115+
```
116+
117+
```bash
118+
your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)")
119+
curl http://localhost:7000/v1/multimodal_retrieval \
120+
-X POST \
121+
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding},\"search_type\":\"mmr\", \"k\":4, \"fetch_k\":20, \"lambda_mult\":0.5}" \
122+
-H 'Content-Type: application/json'
123+
```
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
FROM langchain/langchain:latest
5+
6+
ARG ARCH="cpu"
7+
8+
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
9+
libgl1-mesa-glx \
10+
libjemalloc-dev \
11+
vim
12+
13+
RUN useradd -m -s /bin/bash user && \
14+
mkdir -p /home/user && \
15+
chown -R user /home/user/
16+
17+
COPY comps /home/user/comps
18+
19+
USER user
20+
21+
RUN pip install --no-cache-dir --upgrade pip && \
22+
if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \
23+
pip install --no-cache-dir -r /home/user/comps/retrievers/langchain/redis_multimodal/requirements.txt
24+
25+
ENV PYTHONPATH=$PYTHONPATH:/home/user
26+
27+
WORKDIR /home/user/comps/retrievers/langchain/redis_multimodal
28+
29+
ENTRYPOINT ["python", "retriever_redis.py"]
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
version: "1.0"
5+
6+
services:
7+
retriever:
8+
image: opea/multimodal-retriever-redis:latest
9+
container_name: multimodal-retriever-redis-server
10+
ports:
11+
- "7000:7000"
12+
ipc: host
13+
environment:
14+
no_proxy: ${no_proxy}
15+
http_proxy: ${http_proxy}
16+
https_proxy: ${https_proxy}
17+
REDIS_URL: ${REDIS_URL}
18+
INDEX_NAME: ${INDEX_NAME}
19+
restart: unless-stopped
20+
21+
networks:
22+
default:
23+
driver: bridge
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import os
5+
6+
current_file_path = os.path.abspath(__file__)
7+
parent_dir = os.path.dirname(current_file_path)
8+
9+
10+
def get_boolean_env_var(var_name, default_value=False):
11+
"""Retrieve the boolean value of an environment variable.
12+
13+
Args:
14+
var_name (str): The name of the environment variable to retrieve.
15+
default_value (bool): The default value to return if the variable
16+
is not found.
17+
Returns:
18+
bool: The value of the environment variable, interpreted as a boolean.
19+
"""
20+
true_values = {"true", "1", "t", "y", "yes"}
21+
false_values = {"false", "0", "f", "n", "no"}
22+
23+
# Retrieve the environment variable's value
24+
value = os.getenv(var_name, "").lower()
25+
26+
# Decide the boolean value based on the content of the string
27+
if value in true_values:
28+
return True
29+
elif value in false_values:
30+
return False
31+
else:
32+
return default_value
33+
34+
35+
# Check for openai API key
36+
# if "OPENAI_API_KEY" not in os.environ:
37+
# raise Exception("Must provide an OPENAI_API_KEY as an env var.")
38+
39+
40+
# Whether or not to enable langchain debugging
41+
DEBUG = get_boolean_env_var("DEBUG", False)
42+
# Set DEBUG env var to "true" if you wish to enable LC debugging module
43+
if DEBUG:
44+
import langchain
45+
46+
langchain.debug = True
47+
48+
49+
# Embedding model
50+
EMBED_MODEL = os.getenv("EMBED_MODEL", "BridgeTower/bridgetower-large-itm-mlm-itc")
51+
52+
# Redis Connection Information
53+
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
54+
REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
55+
56+
57+
def format_redis_conn_from_env():
58+
redis_url = os.getenv("REDIS_URL", None)
59+
if redis_url:
60+
return redis_url
61+
else:
62+
using_ssl = get_boolean_env_var("REDIS_SSL", False)
63+
start = "rediss://" if using_ssl else "redis://"
64+
65+
# if using RBAC
66+
password = os.getenv("REDIS_PASSWORD", None)
67+
username = os.getenv("REDIS_USERNAME", "default")
68+
if password is not None:
69+
start += f"{username}:{password}@"
70+
71+
return start + f"{REDIS_HOST}:{REDIS_PORT}"
72+
73+
74+
REDIS_URL = format_redis_conn_from_env()
75+
76+
# Vector Index Configuration
77+
INDEX_NAME = os.getenv("INDEX_NAME", "test-index")

0 commit comments

Comments
 (0)