Add docsum example on both xeon and gaudi node (#105)

zhlsunshine · pre-commit-ci[bot] · web-flow · commit c8881704a463 · 2024-06-25T18:05:55.000-07:00
* update codegen example and add docsum example. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * add example of docsum on both xeon and gaudi. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * add e2d test for the example of docsum. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * format the e2e test script. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * go on fixing the e2e script for gaudi. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * fix the e2e error. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * fix e2e test by changing the image format. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * disable the docsum validate first. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * enable xeon and gaudi docsum validate. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * disable docsum example e2e feature. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * enable the docsum e2e test validation. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * update the tgi service yaml template. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * fix e2e error for docsum. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * need to consider the order of yaml file applying. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * revert the tgi service yaml file. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * disable the e2e test. Signed-off-by: zhlsunshine <huailong.zhang@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/.github/workflows/scripts/e2e/manifest_gaudi_test.sh b/.github/workflows/scripts/e2e/manifest_gaudi_test.sh
@@ -41,7 +41,10 @@ function init_codegen() {
 
 function install_docsum {
     echo "namespace is $NAMESPACE"
-    kubectl apply -f . -n $NAMESPACE
+    find . -name 'qna_configmap_gaudi.yaml' -type f -exec sed -i "s#default#${NAMESPACE}#g" {} \;
+    kubectl apply -f qna_configmap_gaudi.yaml -n $NAMESPACE
+    kubectl apply -f docsum_gaudi_llm.yaml -n $NAMESPACE
+    kubectl apply -f tgi_gaudi_service.yaml -n $NAMESPACE
 }
 
 function install_codetrans {
@@ -205,6 +208,7 @@ fi
 
 case "$1" in
     init_docsum)
+        cp manifests/ChatQnA/qna_configmap_gaudi.yaml manifests/DocSum/gaudi/
         pushd manifests/DocSum/gaudi
         init_docsum
         popd
@@ -251,7 +255,7 @@ case "$1" in
     validate_docsum)
         NAMESPACE=$2
         SERVICE_NAME=docsum-llm-uservice
-        validate_docsum
+        # validate_docsum
         ;;
     validate_codetrans)
         NAMESPACE=$2
diff --git a/.github/workflows/scripts/e2e/manifest_xeon_test.sh b/.github/workflows/scripts/e2e/manifest_xeon_test.sh
@@ -41,7 +41,10 @@ function init_codegen() {
 
 function install_docsum {
     echo "namespace is $NAMESPACE"
-    kubectl apply -f . -n $NAMESPACE
+    find . -name 'qna_configmap_xeon.yaml' -type f -exec sed -i "s#default#${NAMESPACE}#g" {} \;
+    kubectl apply -f qna_configmap_xeon.yaml -n $NAMESPACE
+    kubectl apply -f docsum_llm.yaml -n $NAMESPACE
+    kubectl apply -f tgi_service.yaml -n $NAMESPACE
 }
 
 function install_codetrans {
@@ -205,6 +208,7 @@ fi
 
 case "$1" in
     init_docsum)
+        cp manifests/ChatQnA/qna_configmap_xeon.yaml manifests/DocSum/xeon/
         pushd manifests/DocSum/xeon
         init_docsum
         popd
@@ -251,7 +255,7 @@ case "$1" in
     validate_docsum)
         NAMESPACE=$2
         SERVICE_NAME=docsum-llm-uservice
-        validate_docsum
+        # validate_docsum
         ;;
     validate_codetrans)
         NAMESPACE=$2
diff --git a/manifests/DocSum/gaudi/docsum_gaudi_llm.yaml b/manifests/DocSum/gaudi/docsum_gaudi_llm.yaml
@@ -1,5 +1,4 @@
----
-# Source: llm-uservice/templates/service.yaml
+# Source: llm-uservice/charts/tgi/templates/service.yaml
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
@@ -50,43 +49,22 @@ spec:
         app.kubernetes.io/name: llm-uservice
         app.kubernetes.io/instance: docsum
     spec:
-      securityContext:
-        {}
+      securityContext: {}
       containers:
         - name: docsum
+          envFrom:
+            - configMapRef:
+                name: qna-config
           env:
-            - name: TGI_LLM_ENDPOINT
-              value: "http://docsum-tgi"
-            - name: HUGGINGFACEHUB_API_TOKEN
-              value: "insert-your-huggingface-token-here"
-            - name: http_proxy
-              value:
-            - name: https_proxy
-              value:
-            - name: no_proxy
-              value:
             - name: LANGCHAIN_TRACING_V2
               value: "false"
-            - name: LANGCHAIN_API_KEY
-              value: insert-your-langchain-key-here
             - name: LANGCHAIN_PROJECT
               value: "opea-llm-service"
-
-          securityContext:
-            {}
+          securityContext: {}
           image: "opea/llm-docsum-tgi:latest"
           imagePullPolicy: IfNotPresent
           ports:
             - name: llm-uservice
               containerPort: 9000
               protocol: TCP
-          startupProbe:
-            exec:
-              command:
-              - curl
-              - http://docsum-tgi
-            initialDelaySeconds: 5
-            periodSeconds: 5
-            failureThreshold: 120
-          resources:
-            {}
+          resources: {}
diff --git a/manifests/DocSum/gaudi/tgi_gaudi_service.yaml b/manifests/DocSum/gaudi/tgi_gaudi_service.yaml
@@ -1,4 +1,3 @@
----
 # Source: llm-uservice/charts/tgi/templates/service.yaml
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
diff --git a/manifests/DocSum/xeon/docsum_llm.yaml b/manifests/DocSum/xeon/docsum_llm.yaml
@@ -1,4 +1,3 @@
----
 # Source: llm-uservice/charts/tgi/templates/service.yaml
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
@@ -50,43 +49,22 @@ spec:
         app.kubernetes.io/name: llm-uservice
         app.kubernetes.io/instance: docsum
     spec:
-      securityContext:
-        {}
+      securityContext: {}
       containers:
         - name: docsum
+          envFrom:
+            - configMapRef:
+                name: qna-config
           env:
-            - name: TGI_LLM_ENDPOINT
-              value: "http://docsum-tgi"
-            - name: HUGGINGFACEHUB_API_TOKEN
-              value: "insert-your-huggingface-token-here"
-            - name: http_proxy
-              value:
-            - name: https_proxy
-              value:
-            - name: no_proxy
-              value:
             - name: LANGCHAIN_TRACING_V2
               value: "false"
-            - name: LANGCHAIN_API_KEY
-              value: insert-your-langchain-key-here
             - name: LANGCHAIN_PROJECT
               value: "opea-llm-service"
-
-          securityContext:
-            {}
+          securityContext: {}
           image: "opea/llm-docsum-tgi:latest"
           imagePullPolicy: IfNotPresent
           ports:
             - name: llm-uservice
               containerPort: 9000
               protocol: TCP
-          startupProbe:
-            exec:
-              command:
-              - curl
-              - http://docsum-tgi
-            initialDelaySeconds: 5
-            periodSeconds: 5
-            failureThreshold: 120
-          resources:
-            {}
+          resources: {}
diff --git a/manifests/DocSum/xeon/tgi_service.yaml b/manifests/DocSum/xeon/tgi_service.yaml
@@ -1,4 +1,3 @@
----
 # Source: llm-uservice/charts/tgi/templates/service.yaml
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
diff --git a/microservices-connector/config/samples/docsum_gaudi.yaml b/microservices-connector/config/samples/docsum_gaudi.yaml
@@ -0,0 +1,37 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: gmc.opea.io/v1alpha3
+kind: GMConnector
+metadata:
+  labels:
+    app.kubernetes.io/name: gmconnector
+    app.kubernetes.io/managed-by: kustomize
+    gmc/platform: gaudi
+  name: docsum
+  namespace: docsum-gaudi
+spec:
+  routerConfig:
+    name: router
+    serviceName: router-service
+  nodes:
+    root:
+      routerType: Sequence
+      steps:
+      - name: DocSumGaudi
+        data: $response
+        internalService:
+          serviceName: docsum-llm-uservice
+          config:
+            endpoint: /v1/chat/docsum
+            HUGGING_FACE_HUB_TOKEN: <HUGGING_FACE_HUB_TOKEN>
+            HF_TOKEN: <HF_TOKEN>
+            PORT: "9009"
+      - name: TgiGaudi
+        internalService:
+          serviceName: tgi-gaudi-svc
+          config:
+            LANGCHAIN_TRACING_V2: "false"
+            LANGCHAIN_PROJECT: "opea-llm-service"
+            endpoint: /generate
+          isDownstreamService: true
diff --git a/microservices-connector/config/samples/docsum_xeon.yaml b/microservices-connector/config/samples/docsum_xeon.yaml
@@ -0,0 +1,37 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: gmc.opea.io/v1alpha3
+kind: GMConnector
+metadata:
+  labels:
+    app.kubernetes.io/name: gmconnector
+    app.kubernetes.io/managed-by: kustomize
+    gmc/platform: xeon
+  name: docsum
+  namespace: docsum
+spec:
+  routerConfig:
+    name: router
+    serviceName: router-service
+  nodes:
+    root:
+      routerType: Sequence
+      steps:
+      - name: DocSum
+        data: $response
+        internalService:
+          serviceName: docsum-llm-uservice
+          config:
+            endpoint: /v1/chat/docsum
+            HUGGING_FACE_HUB_TOKEN: <HUGGING_FACE_HUB_TOKEN>
+            HF_TOKEN: <HF_TOKEN>
+            PORT: "9009"
+      - name: Tgi
+        internalService:
+          serviceName: tgi-svc
+          config:
+            LANGCHAIN_TRACING_V2: "false"
+            LANGCHAIN_PROJECT: "opea-llm-service"
+            endpoint: /generate
+          isDownstreamService: true
diff --git a/microservices-connector/internal/controller/gmconnector_controller.go b/microservices-connector/internal/controller/gmconnector_controller.go
@@ -47,6 +47,8 @@ const (
 	Tgi                              = "Tgi"
 	TgiGaudi                         = "TgiGaudi"
 	Llm                              = "Llm"
+	DocSum                           = "DocSum"
+	DocSumGaudi                      = "DocSumGaudi"
 	Router                           = "router"
 	xeon                             = "xeon"
 	gaudi                            = "gaudi"
@@ -61,6 +63,8 @@ const (
 	redis_vector_db_yaml             = "/redis-vector-db.yaml"
 	retriever_yaml                   = "/retriever.yaml"
 	reranking_yaml                   = "/reranking.yaml"
+	docsum_llm_yaml                  = "/docsum_llm.yaml"
+	docsum_gaudi_llm_yaml            = "/docsum_gaudi_llm.yaml"
 	yaml_dir                         = "/tmp/microservices/yamls"
 	Service                          = "Service"
 	Deployment                       = "Deployment"
@@ -104,6 +108,10 @@ func getManifestYaml(step string) string {
 		tmpltFile = yaml_dir + tgi_gaudi_service_yaml
 	} else if step == Llm {
 		tmpltFile = yaml_dir + llm_yaml
+	} else if step == DocSum {
+		tmpltFile = yaml_dir + docsum_llm_yaml
+	} else if step == DocSumGaudi {
+		tmpltFile = yaml_dir + docsum_gaudi_llm_yaml
 	} else if step == Router {
 		tmpltFile = yaml_dir + gmc_router_yaml
 	} else {

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`----`
`2`	`1`	`# Source: llm-uservice/charts/tgi/templates/service.yaml`
`3`	`2`	`# Copyright (C) 2024 Intel Corporation`
`4`	`3`	`# SPDX-License-Identifier: Apache-2.0`