Skip to content

Commit 2c48bc8

Browse files
BaoHuilingpre-commit-ci[bot]kevinintel
authored
Add Megaservice support for MMRAG VideoRAGQnA usecase (#603)
* add videoragqna gateway Signed-off-by: BaoHuiling <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add test script for gateway Signed-off-by: BaoHuiling <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rm ip Signed-off-by: BaoHuiling <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix exist bug Signed-off-by: BaoHuiling <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: BaoHuiling <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: kevinintel <[email protected]>
1 parent 23cc3ea commit 2c48bc8

File tree

7 files changed

+135
-3
lines changed

7 files changed

+135
-3
lines changed

comps/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
AudioQnAGateway,
4646
RetrievalToolGateway,
4747
FaqGenGateway,
48+
VideoRAGQnAGateway,
4849
VisualQnAGateway,
4950
MultimodalRAGWithVideosGateway,
5051
)

comps/cores/mega/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class MegaServiceEndpoint(Enum):
3838
CHAT_QNA = "/v1/chatqna"
3939
AUDIO_QNA = "/v1/audioqna"
4040
VISUAL_QNA = "/v1/visualqna"
41+
VIDEO_RAG_QNA = "/v1/videoragqna"
4142
CODE_GEN = "/v1/codegen"
4243
CODE_TRANS = "/v1/codetrans"
4344
DOC_SUMMARY = "/v1/docsum"

comps/cores/mega/gateway.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,55 @@ async def handle_request(self, request: Request):
548548
return ChatCompletionResponse(model="visualqna", choices=choices, usage=usage)
549549

550550

551+
class VideoRAGQnAGateway(Gateway):
552+
def __init__(self, megaservice, host="0.0.0.0", port=8888):
553+
super().__init__(
554+
megaservice,
555+
host,
556+
port,
557+
str(MegaServiceEndpoint.VIDEO_RAG_QNA),
558+
ChatCompletionRequest,
559+
ChatCompletionResponse,
560+
)
561+
562+
async def handle_request(self, request: Request):
563+
data = await request.json()
564+
stream_opt = data.get("stream", False)
565+
chat_request = ChatCompletionRequest.parse_obj(data)
566+
prompt = self._handle_message(chat_request.messages)
567+
parameters = LLMParams(
568+
max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
569+
top_k=chat_request.top_k if chat_request.top_k else 10,
570+
top_p=chat_request.top_p if chat_request.top_p else 0.95,
571+
temperature=chat_request.temperature if chat_request.temperature else 0.01,
572+
repetition_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 1.03,
573+
streaming=stream_opt,
574+
)
575+
result_dict, runtime_graph = await self.megaservice.schedule(
576+
initial_inputs={"text": prompt}, llm_parameters=parameters
577+
)
578+
for node, response in result_dict.items():
579+
# Here it suppose the last microservice in the megaservice is LVM.
580+
if (
581+
isinstance(response, StreamingResponse)
582+
and node == list(self.megaservice.services.keys())[-1]
583+
and self.megaservice.services[node].service_type == ServiceType.LVM
584+
):
585+
return response
586+
last_node = runtime_graph.all_leaves()[-1]
587+
response = result_dict[last_node]["text"]
588+
choices = []
589+
usage = UsageInfo()
590+
choices.append(
591+
ChatCompletionResponseChoice(
592+
index=0,
593+
message=ChatMessage(role="assistant", content=response),
594+
finish_reason="stop",
595+
)
596+
)
597+
return ChatCompletionResponse(model="videoragqna", choices=choices, usage=usage)
598+
599+
551600
class RetrievalToolGateway(Gateway):
552601
"""embed+retrieve+rerank."""
553602

comps/embeddings/multimodal_clip/embeddings_clip.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ def embed_query(self, texts):
2727
return text_features
2828

2929
def get_embedding_length(self):
30-
return len(self.embed_query("sample_text"))
30+
text_features = self.embed_query("sample_text")
31+
return text_features.shape[1]
3132

3233
def get_image_embeddings(self, images):
3334
"""Input is list of images."""

comps/retrievers/langchain/vdms/retriever_vdms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def retrieve(input: EmbedDoc) -> SearchedMultimodalDoc:
8989
# Create vectorstore
9090

9191
if use_clip:
92-
embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 4})
92+
embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 64})
9393
dimensions = embeddings.get_embedding_length()
9494
elif tei_embedding_endpoint:
9595
embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint, huggingfacehub_api_token=hf_token)

comps/retrievers/langchain/vdms/vdms_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,4 +77,4 @@ def get_boolean_env_var(var_name, default_value=False):
7777
# VDMS_SCHEMA = os.getenv("VDMS_SCHEMA", "vdms_schema.yml")
7878
# INDEX_SCHEMA = os.path.join(parent_dir, VDMS_SCHEMA)
7979
SEARCH_ENGINE = "FaissFlat"
80-
DISTANCE_STRATEGY = "L2"
80+
DISTANCE_STRATEGY = "IP"
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import json
5+
import unittest
6+
7+
from fastapi.responses import StreamingResponse
8+
9+
from comps import (
10+
ServiceOrchestrator,
11+
ServiceType,
12+
TextDoc,
13+
VideoRAGQnAGateway,
14+
opea_microservices,
15+
register_microservice,
16+
)
17+
from comps.cores.proto.docarray import LLMParams
18+
19+
20+
@register_microservice(name="s1", host="0.0.0.0", port=8083, endpoint="/v1/add")
21+
async def s1_add(request: TextDoc) -> TextDoc:
22+
req = request.model_dump_json()
23+
req_dict = json.loads(req)
24+
text = req_dict["text"]
25+
text += "opea "
26+
return {"text": text}
27+
28+
29+
@register_microservice(name="s2", host="0.0.0.0", port=8084, endpoint="/v1/add", service_type=ServiceType.LVM)
30+
async def s2_add(request: TextDoc) -> TextDoc:
31+
req = request.model_dump_json()
32+
req_dict = json.loads(req)
33+
text = req_dict["text"]
34+
35+
def streamer(text):
36+
yield f"{text}".encode("utf-8")
37+
for i in range(3):
38+
yield "project!".encode("utf-8")
39+
40+
return StreamingResponse(streamer(text), media_type="text/event-stream")
41+
42+
43+
class TestServiceOrchestrator(unittest.IsolatedAsyncioTestCase):
44+
def setUp(self):
45+
self.s1 = opea_microservices["s1"]
46+
self.s2 = opea_microservices["s2"]
47+
self.s1.start()
48+
self.s2.start()
49+
50+
self.service_builder = ServiceOrchestrator()
51+
52+
self.service_builder.add(opea_microservices["s1"]).add(opea_microservices["s2"])
53+
self.service_builder.flow_to(self.s1, self.s2)
54+
self.gateway = VideoRAGQnAGateway(self.service_builder, port=9898)
55+
56+
def tearDown(self):
57+
self.s1.stop()
58+
self.s2.stop()
59+
self.gateway.stop()
60+
61+
async def test_schedule(self):
62+
result_dict, _ = await self.service_builder.schedule(
63+
initial_inputs={"text": "hello, "}, llm_parameters=LLMParams(streaming=True)
64+
)
65+
streaming_response = result_dict[self.s2.name]
66+
67+
if isinstance(streaming_response, StreamingResponse):
68+
content = b""
69+
async for chunk in streaming_response.body_iterator:
70+
content += chunk
71+
final_text = content.decode("utf-8")
72+
73+
print("Streamed content from s2: ", final_text)
74+
75+
expected_result = "hello, opea project!project!project!"
76+
self.assertEqual(final_text, expected_result)
77+
78+
79+
if __name__ == "__main__":
80+
unittest.main()

0 commit comments

Comments
 (0)