add microservice for intent detection (#131)

Liangyx2 · pre-commit-ci[bot] · web-flow · commit 84a7e573b88e · 2024-09-01T22:19:25.000+08:00
* add microservice for intent detection Signed-off-by: Liangyx2 <yuxiang.liang@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update license copyright Signed-off-by: Liangyx2 <yuxiang.liang@intel.com> * add ut Signed-off-by: Liangyx2 <yuxiang.liang@intel.com> * refine Signed-off-by: Liangyx2 <yuxiang.liang@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update folder Signed-off-by: Liangyx2 <yuxiang.liang@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix test Signed-off-by: Liangyx2 <yuxiang.liang@intel.com> --------- Signed-off-by: Liangyx2 <yuxiang.liang@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/comps/intent_detection/README.md b/comps/intent_detection/README.md
@@ -0,0 +1,99 @@
+# Intent Detection Microservice by TGI
+
+# 🚀1. Start Microservice with Python（Option 1）
+
+## 1.1 Install Requirements
+
+```bash
+pip install -r requirements.txt
+```
+
+## 1.2 Start TGI Service
+
+```bash
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export LANGCHAIN_TRACING_V2=true
+export LANGCHAIN_API_KEY=${your_langchain_api_key}
+export LANGCHAIN_PROJECT="opea/gen-ai-comps:llms"
+docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model}
+```
+
+## 1.3 Verify the TGI Service
+
+```bash
+curl http://${your_ip}:8008/generate \
+  -X POST \
+  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+  -H 'Content-Type: application/json'
+```
+
+## 1.4 Setup Environment Variables
+
+```bash
+export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
+export LANGCHAIN_TRACING_V2=true
+export LANGCHAIN_API_KEY=${your_langchain_api_key}
+export LANGCHAIN_PROJECT="opea/intent"
+```
+
+## 1.5 Start Intent Detection Microservice with Python Script
+
+Start intent detection microservice with below command.
+
+```bash
+cd /your_project_path/GenAIComps/
+cp comps/intent_detection/langchain/intent_detection.py .
+python intent_detection.py
+```
+
+# 🚀2. Start Microservice with Docker (Option 2)
+
+## 2.1 Start TGI Service
+
+Please refer to 1.2.
+
+## 2.2 Setup Environment Variables
+
+```bash
+export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
+export LANGCHAIN_TRACING_V2=true
+export LANGCHAIN_API_KEY=${your_langchain_api_key}
+export LANGCHAIN_PROJECT="opea/intent"
+```
+
+## 2.3 Build Docker Image
+
+```bash
+cd /your_project_path/GenAIComps
+docker build --no-cache -t opea/llm-tgi:latest -f comps/intent_detection/langchain/Dockerfile .
+```
+
+## 2.4 Run Docker with CLI (Option A)
+
+```bash
+docker run -it --name="intent-tgi-server" --net=host --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/llm-tgi:latest
+```
+
+## 2.5 Run with Docker Compose (Option B)
+
+```bash
+cd /your_project_path/GenAIComps/comps/intent_detection/langchain
+export LLM_MODEL_ID=${your_hf_llm_model}
+export http_proxy=${your_http_proxy}
+export https_proxy=${your_http_proxy}
+export TGI_LLM_ENDPOINT="http://tgi-service:80"
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export LANGCHAIN_API_KEY=${your_langchain_api_key}
+docker compose -f docker_compose_intent.yaml up -d
+```
+
+# 🚀3. Consume Microservice
+
+Once intent detection microservice is started, user can use below command to invoke the microservice.
+
+```bash
+curl http://${your_ip}:9000/v1/chat/intent\
+  -X POST \
+  -d '{"query":"What is Deep Learning?","max_new_tokens":10,"top_k":1,"temperature":0.001,"streaming":false}' \
+  -H 'Content-Type: application/json'
+```
diff --git a/comps/intent_detection/langchain/Dockerfile b/comps/intent_detection/langchain/Dockerfile
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM langchain/langchain:latest
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    vim
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+COPY comps /home/user/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/comps/intent_detection/langchain/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+WORKDIR /home/user/comps/intent_detection/langchain
+ENTRYPOINT ["python", "intent_detection.py"]
diff --git a/comps/intent_detection/langchain/docker_compose_intent.yaml b/comps/intent_detection/langchain/docker_compose_intent.yaml
@@ -0,0 +1,32 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  tgi_service:
+    image: ghcr.io/huggingface/text-generation-inference:1.4
+    container_name: tgi-service
+    ports:
+      - "8008:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    command: --model-id ${LLM_MODEL_ID}
+  llm:
+    image: opea/llm-tgi:latest
+    container_name: intent-tgi-server
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/comps/intent_detection/langchain/intent_detection.py b/comps/intent_detection/langchain/intent_detection.py
@@ -0,0 +1,47 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+from langchain import LLMChain, PromptTemplate
+from langchain_community.llms import HuggingFaceEndpoint
+from langsmith import traceable
+
+from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice
+from comps.intent_detection.langchain.template import IntentTemplate
+
+
+@register_microservice(
+    name="opea_service@llm_intent",
+    service_type=ServiceType.LLM,
+    endpoint="/v1/chat/intent",
+    host="0.0.0.0",
+    port=9000,
+)
+@traceable(run_type="llm")
+def llm_generate(input: LLMParamsDoc):
+    llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
+    llm = HuggingFaceEndpoint(
+        endpoint_url=llm_endpoint,
+        max_new_tokens=input.max_new_tokens,
+        top_k=input.top_k,
+        top_p=input.top_p,
+        typical_p=input.typical_p,
+        temperature=input.temperature,
+        repetition_penalty=input.repetition_penalty,
+        streaming=input.streaming,
+        timeout=600,
+    )
+
+    prompt = PromptTemplate(template=IntentTemplate.generate_intent_template, input_variables=["query"])
+
+    llm_chain = LLMChain(prompt=prompt, llm=llm)
+
+    response = llm_chain.invoke(input.query)
+    response = response["text"]
+    print("response", response)
+    return GeneratedDoc(text=response, prompt=input.query)
+
+
+if __name__ == "__main__":
+    opea_microservices["opea_service@llm_intent"].start()
diff --git a/comps/intent_detection/langchain/requirements.txt b/comps/intent_detection/langchain/requirements.txt
@@ -0,0 +1,9 @@
+docarray[full]
+fastapi
+huggingface_hub
+langchain==0.1.16
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+prometheus-fastapi-instrumentator
+shortuuid
diff --git a/comps/intent_detection/langchain/template.py b/comps/intent_detection/langchain/template.py
@@ -0,0 +1,8 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+class IntentTemplate:
+    def generate_intent_template(query):
+        return f"""Please identify the intent of the user query. You may only respond with "chitchat" or "QA" without explanations or engaging in conversation.
+### User Query: {query}, ### Response: """
diff --git a/tests/test_intent_detection_langchain.sh b/tests/test_intent_detection_langchain.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -xe
+
+WORKPATH=$(dirname "$PWD")
+ip_address=$(hostname -I | awk '{print $1}')
+function build_docker_images() {
+    cd $WORKPATH
+    docker build --no-cache -t opea/llm-tgi:latest -f comps/intent_detection/langchain/Dockerfile .
+}
+
+function start_service() {
+    tgi_endpoint=5004
+    # Remember to set HF_TOKEN before invoking this test!
+    export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+    model=Intel/neural-chat-7b-v3-3
+    docker run -d --name="test-comps-intent-tgi-endpoint" -p $tgi_endpoint:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id $model
+
+    export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint}"
+    tei_service_port=5005
+    unset http_proxy
+    docker run -d --name="test-comps-intent-tei-server" -p ${tei_service_port}:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/llm-tgi:latest
+    sleep 3m
+}
+
+function validate_microservice() {
+    tei_service_port=5005
+    http_proxy="" curl http://${ip_address}:${tei_service_port}/v1/chat/intent\
+        -X POST \
+        -d '{"query":"What is Deep Learning?","max_new_tokens":10,"top_k":1,"temperature":0.001,"streaming":false}' \
+        -H 'Content-Type: application/json'
+    docker logs test-comps-intent-tei-server
+    docker logs test-comps-intent-tgi-endpoint
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=test-comps-intent*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+
+    stop_docker
+
+    build_docker_images
+    start_service
+
+    validate_microservice
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main