opea-project · lvliang-intel · Mar 20, 2025 · Mar 14, 2025 · Mar 14, 2025 · Mar 17, 2025
@@ -1,2 +1,3 @@
 ModelIn
 modelin
+pressEnter
@@ -6,4 +6,4 @@ FROM opea/comps-base:$BASE_TAG
 
 COPY ./chatqna.py $HOME/chatqna.py
 
-ENTRYPOINT ["python", "chatqna.py"]
+ENTRYPOINT ["python", "chatqna.py"]
@@ -10,7 +10,7 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin
     poppler-utils \
     tesseract-ocr
 
-RUN apt-get update && apt-get install -y gnupg wget 
+RUN apt-get update && apt-get install -y gnupg wget git
 RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
     gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
 RUN echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \
@@ -33,15 +33,17 @@ RUN chown -R user /templates/default_prompt.txt
 
 COPY ./edgecraftrag /home/user/edgecraftrag
 
-RUN mkdir -p /home/user/gradio_cache 
-ENV GRADIO_TEMP_DIR=/home/user/gradio_cache
+RUN mkdir -p /home/user/ui_cache 
+ENV UI_UPLOAD_PATH=/home/user/ui_cache
 
 WORKDIR /home/user/edgecraftrag
 RUN pip install --no-cache-dir --upgrade pip setuptools==70.0.0 && \
     pip install --no-cache-dir -r requirements.txt
 
 WORKDIR /home/user/
+RUN git clone https://github.com/openvinotoolkit/openvino.genai.git genai
+ENV PYTHONPATH="$PYTHONPATH:/home/user/genai/tools/llm_bench"
 
 USER user
 
-ENTRYPOINT ["python", "-m", "edgecraftrag.server"]
+ENTRYPOINT ["python", "-m", "edgecraftrag.server"]
@@ -7,10 +7,10 @@ quality and performance.
 
 ## What's New in this release?
 
-- Support image/url data retrieval and display in EC-RAG
-- Support display of document source used by LLM in UI
-- Support pipeline remove operation in RESTful API and UI
-- Support RAG pipeline performance benchmark and display in UI
+- A sleek new UI with enhanced user experience, built on Vue and Ant Design
+- Support concurrent multi-requests handling on vLLM inference backend
+- Support pipeline configuration through json file
+- Support system prompt modification through API
 - Fixed known issues in EC-RAG UI and server
 
 ## Quick Start Guide
@@ -36,7 +36,7 @@ You can select "local" type in generation field which is the default approach to
 #### vLLM with OpenVINO for Intel Arc GPU
 
 You can also select "vLLM" as generation type, to enable this type, you'll need to build the vLLM image for Intel Arc GPU before service bootstrap.
-Please follow this link [vLLM with OpenVINO](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/text-generation/vllm/langchain#build-docker-image) to build the vLLM image.
+Please follow this link [vLLM with OpenVINO](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/vllm#23-vllm-with-openvino-on-intel-gpu-and-cpu) to build the vLLM image.
 
 ### Start Edge Craft RAG Services with Docker Compose
 
@@ -45,12 +45,12 @@ cd GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc
 
 export MODEL_PATH="your model path for all your models"
 export DOC_PATH="your doc path for uploading a dir of files"
-export GRADIO_PATH="your gradio cache path for transferring files"
+export UI_TMPFILE_PATH="your UI cache path for transferring files"
 # If you have a specific prompt template, please uncomment the following line
 # export PROMPT_PATH="your prompt path for prompt templates"
 
 # Make sure all 3 folders have 1000:1000 permission, otherwise
-# chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${GRADIO_PATH}
+# chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${UI_TMPFILE_PATH}
 # In addition, also make sure the .cache folder has 1000:1000 permission, otherwise
 # chown 1000:1000 $HOME/.cache
 
@@ -189,6 +189,12 @@ After the pipeline creation, you can upload your data in the `Chatbot` page.
 Then, you can submit messages in the chat box.
 ![chat_with_rag](assets/img/chat_with_rag.png)
 
+If you want to try Gradio UI, please launch service through compose_gradio.yaml, then access http://${HOST_IP}:8082 on your browser:
+
+```bash
+docker compose -f compose_gradio.yaml up -d
+```
+
 ## Advanced User Guide
 
 ### Pipeline Management
@@ -226,8 +232,26 @@ curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/rag_test_local_llm -
 curl -X DELETE http://${HOST_IP}:16010/v1/settings/pipelines/rag_test_local_llm -H "Content-Type: application/json" | jq '.'
 ```
 
+#### Get pipeline json
+
+```bash
+curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines/{name}/json -H "Content-Type: application/json" | jq '.'
+```
+
+#### Import pipeline from a json file
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines/import -H "Content-Type: multipart/form-data" -F "file=@your_test_pipeline_json_file.txt"| jq '.'
+```
+
 #### Enable and check benchmark for pipelines
 
+##### ⚠️ NOTICE ⚠️
+
+Benchmarking activities may significantly reduce system performance.
+
+**DO NOT** perform benchmarking in a production environment.
+
 ```bash
 # Set ENABLE_BENCHMARK as true before launch services
 export ENABLE_BENCHMARK="true"
@@ -308,3 +332,11 @@ curl -X DELETE http://${HOST_IP}:16010/v1/data/files/test2.docx -H "Content-Type
 ```bash
 curl -X PATCH http://${HOST_IP}:16010/v1/data/files/test.pdf -H "Content-Type: application/json" -d '{"local_path":"docs/#REPLACE WITH YOUR FILE WITHIN MOUNTED DOC PATH#"}' | jq '.'
 ```
+
+### System Prompt Management
+
+#### Use custom system prompt
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt -H "Content-Type: multipart/form-data" -F "file=@your_prompt_file.txt"
+```
@@ -15,7 +15,7 @@ services:
     volumes:
       - ${MODEL_PATH:-${PWD}}:/home/user/models
       - ${DOC_PATH:-${PWD}}:/home/user/docs
-      - ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache
+      - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
       - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
       - ${PROMPT_PATH:-${PWD}}:/templates/custom
     ports:
@@ -54,7 +54,7 @@ services:
       UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082}
       UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0}
     volumes:
-      - ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache
+      - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
     ports:
       - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082}
     restart: always

@@ -0,0 +1,94 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  server:
+    image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest}
+    container_name: edgecraftrag-server
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_ENDPOINT: ${HF_ENDPOINT}
+      vLLM_ENDPOINT: ${vLLM_ENDPOINT}
+      ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false}
+    volumes:
+      - ${MODEL_PATH:-${PWD}}:/home/user/models
+      - ${DOC_PATH:-${PWD}}:/home/user/docs
+      - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
+      - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
+      - ${PROMPT_PATH:-${PWD}}:/templates/custom
+    ports:
+      - ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010}
+    devices:
+      - /dev/dri:/dev/dri
+    group_add:
+      - ${VIDEOGROUPID:-44}
+      - ${RENDERGROUPID:-109}
+  ecrag:
+    image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest}
+    container_name: edgecraftrag
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
+      PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
+      PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
+    ports:
+      - ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011}
+    depends_on:
+      - server
+  ui:
+    image: ${REGISTRY:-opea}/edgecraftrag-ui-gradio:${TAG:-latest}
+    container_name: edgecraftrag-ui
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
+      PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
+      PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
+      UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082}
+      UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0}
+    volumes:
+      - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
+    ports:
+      - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082}
+    restart: always
+    depends_on:
+      - server
+      - ecrag
+  # vllm-openvino-server:
+  #   container_name: vllm-openvino-server
+  #   image: opea/vllm-arc:latest
+  #   ports:
+  #     - ${VLLM_SERVICE_PORT:-8008}:80
+  #   environment:
+  #     HTTPS_PROXY: ${https_proxy}
+  #     HTTP_PROXY: ${https_proxy}
+  #     VLLM_OPENVINO_DEVICE: GPU
+  #     HF_ENDPOINT: ${HF_ENDPOINT}
+  #     HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+  #   volumes:
+  #     - /dev/dri/by-path:/dev/dri/by-path
+  #     - $HOME/.cache/huggingface:/root/.cache/huggingface
+  #   devices:
+  #     - /dev/dri
+  #   group_add:
+  #     - ${VIDEOGROUPID:-44}
+  #     - ${RENDERGROUPID:-109}
+  #   entrypoint: /bin/bash -c "\
+  #     cd / && \
+  #     export VLLM_CPU_KVCACHE_SPACE=50 && \
+  #     export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \
+  #     python3 -m vllm.entrypoints.openai.api_server \
+  #       --model '${LLM_MODEL}' \
+  #       --max_model_len=1024 \
+  #       --host 0.0.0.0 \
+  #       --port 80"
+networks:
+  default:
+    driver: bridge
@@ -15,7 +15,7 @@ services:
     volumes:
       - ${MODEL_PATH:-${PWD}}:/home/user/models
       - ${DOC_PATH:-${PWD}}:/home/user/docs
-      - ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache
+      - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
       - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
       - ${PROMPT_PATH:-${PWD}}:/templates/custom
     ports:
@@ -54,7 +54,7 @@ services:
       UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082}
       UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0}
     volumes:
-      - ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache
+      - ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
     ports:
       - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082}
     restart: always

@@ -18,6 +18,14 @@ services:
         https_proxy: ${https_proxy}
       dockerfile: ./ui/docker/Dockerfile.ui
     image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest}
+  edgecraftrag-ui-gradio:
+    build:
+      context: ..
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+      dockerfile: ./ui/docker/Dockerfile.gradio
+    image: ${REGISTRY:-opea}/edgecraftrag-ui-gradio:${TAG:-latest}
   edgecraftrag:
     build:
       context: ..

@@ -5,7 +5,7 @@
 from comps.cores.proto.api_protocol import ChatCompletionRequest
 from edgecraftrag.api_schema import RagOut
 from edgecraftrag.context import ctx
-from fastapi import FastAPI
+from fastapi import FastAPI, File, HTTPException, UploadFile, status
 from fastapi.responses import StreamingResponse
 
 chatqna_app = FastAPI()
@@ -22,37 +22,69 @@ async def retrieval(request: ChatCompletionRequest):
             ret.append((n.node.node_id, n.node.text, n.score))
         return ret
 
-    return "Not found"
+    return None
 
 
 # ChatQnA
 @chatqna_app.post(path="/v1/chatqna")
 async def chatqna(request: ChatCompletionRequest):
-    generator = ctx.get_pipeline_mgr().get_active_pipeline().generator
-    if generator:
-        request.model = generator.model_id
-    if request.stream:
-        ret, retri_res = ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
-        return ret
-    else:
-        ret, retri_res = ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
-        return str(ret)
+    try:
+        generator = ctx.get_pipeline_mgr().get_active_pipeline().generator
+        if generator:
+            request.model = generator.model_id
+        if request.stream:
+            ret, retri_res = ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
+            return ret
+        else:
+            ret, retri_res = ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
+            return str(ret)
+    except Exception as e:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
 
 
 # RAGQnA
 @chatqna_app.post(path="/v1/ragqna")
 async def ragqna(request: ChatCompletionRequest):
-    res, retri_res = ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
-    if isinstance(res, GeneratedDoc):
-        res = res.text
-    elif isinstance(res, StreamingResponse):
-        collected_data = []
-        async for chunk in res.body_iterator:
-            collected_data.append(chunk)
-        res = "".join(collected_data)
-
-    ragout = RagOut(query=request.messages, contexts=[], response=str(res))
-    for n in retri_res:
-        origin_text = n.node.get_text()
-        ragout.contexts.append(origin_text.strip())
-    return ragout
+    try:
+        res, retri_res = ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
+        if isinstance(res, GeneratedDoc):
+            res = res.text
+        elif isinstance(res, StreamingResponse):
+            collected_data = []
+            async for chunk in res.body_iterator:
+                collected_data.append(chunk)
+            res = "".join(collected_data)
+
+        ragout = RagOut(query=request.messages, contexts=[], response=str(res))
+        for n in retri_res:
+            origin_text = n.node.get_text()
+            ragout.contexts.append(origin_text.strip())
+        return ragout
+    except Exception as e:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+
+
+# Upload prompt file for LLM ChatQnA
+@chatqna_app.post(path="/v1/chatqna/prompt")
+async def load_prompt(file: UploadFile = File(...)):
+    try:
+        generator = ctx.get_pipeline_mgr().get_active_pipeline().generator
+        if generator:
+            content = await file.read()
+            prompt_str = content.decode("utf-8")
+            generator.set_prompt(prompt_str)
+            return "Set LLM Prompt Successfully"
+    except Exception as e:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+
+
+# Reset prompt for LLM ChatQnA
+@chatqna_app.post(path="/v1/chatqna/prompt/reset")
+async def reset_prompt():
+    try:
+        generator = ctx.get_pipeline_mgr().get_active_pipeline().generator
+        if generator:
+            generator.reset_prompt()
+            return "Reset LLM Prompt Successfully"
+    except Exception as e:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,4 +6,4 @@ FROM opea/comps-base:$BASE_TAG

		COPY ./chatqna.py $HOME/chatqna.py

		ENTRYPOINT ["python", "chatqna.py"]
		ENTRYPOINT ["python", "chatqna.py"]