Skip to content

Commit a888b8b

Browse files
authored
Add image build job in docker compose e2e gaudi test in CI (opea-project#305)
Signed-off-by: Yingchun Guo <[email protected]>
1 parent 62a34fa commit a888b8b

File tree

8 files changed

+244
-50
lines changed

8 files changed

+244
-50
lines changed

.github/workflows/manifest-e2e.yml

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ jobs:
2323
uses: ./.github/workflows/reuse-get-test-matrix.yml
2424
with:
2525
diff_excluded_files: '.github|deprecated|docker|assets|*.md|*.txt'
26-
xeon_server_label: 'k8s'
27-
gaudi_server_label: ""
26+
xeon_server_label: 'xeon'
27+
gaudi_server_label: 'gaudi'
2828

2929
mega-image-build:
3030
needs: job1
@@ -34,12 +34,13 @@ jobs:
3434
with:
3535
image_tag: ${{ github.event.pull_request.head.sha }}
3636
mega_service: "${{ matrix.example }}"
37+
runner_label: "docker-build-${{ matrix.hardware }}"
3738

3839
manifest-test:
3940
needs: [job1, mega-image-build]
4041
strategy:
4142
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
42-
runs-on: ${{ matrix.hardware }}
43+
runs-on: "k8s-${{ matrix.hardware }}"
4344
continue-on-error: true
4445
steps:
4546
- name: E2e test manifest
@@ -62,37 +63,40 @@ jobs:
6263
echo "NAMESPACE=$lower_example-$(date +%Y%m%d%H%M%S)" >> $GITHUB_ENV
6364
echo "ROLLOUT_TIMEOUT_SECONDS=1800s" >> $GITHUB_ENV
6465
echo "KUBECTL_TIMEOUT_SECONDS=60s" >> $GITHUB_ENV
66+
echo "continue_test=true" >> $GITHUB_ENV
6567
echo "should_cleanup=false" >> $GITHUB_ENV
6668
echo "skip_validate=true" >> $GITHUB_ENV
6769
echo "NAMESPACE=$NAMESPACE"
6870
69-
- name: Initialize manifest testing
70-
run: |
71-
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_xeon.sh init_${{ matrix.example }}
72-
7371
- name: Kubectl install
7472
id: install
7573
run: |
76-
echo "should_cleanup=true" >> $GITHUB_ENV
77-
kubectl create ns $NAMESPACE
78-
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_xeon.sh install_${{ matrix.example }} $NAMESPACE
79-
echo "Testing ${{ matrix.example }}, waiting for pod ready..."
80-
if kubectl rollout status deployment --namespace "$NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
81-
echo "Testing manifests ${{ matrix.example }}, waiting for pod ready done!"
82-
echo "skip_validate=false" >> $GITHUB_ENV
74+
if [[ ! -f ${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh ]]; then
75+
echo "No test script found, exist test!"
76+
exit 0
8377
else
84-
echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!"
85-
exit 1
78+
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh init_${{ matrix.example }}
79+
echo "should_cleanup=true" >> $GITHUB_ENV
80+
kubectl create ns $NAMESPACE
81+
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh install_${{ matrix.example }} $NAMESPACE
82+
echo "Testing ${{ matrix.example }}, waiting for pod ready..."
83+
if kubectl rollout status deployment --namespace "$NAMESPACE" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
84+
echo "Testing manifests ${{ matrix.example }}, waiting for pod ready done!"
85+
echo "skip_validate=false" >> $GITHUB_ENV
86+
else
87+
echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!"
88+
exit 1
89+
fi
90+
sleep 60
8691
fi
87-
sleep 60
8892
8993
- name: Validate e2e test
9094
if: always()
9195
run: |
9296
if $skip_validate; then
9397
echo "Skip validate"
9498
else
95-
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_xeon.sh validate_${{ matrix.example }} $NAMESPACE
99+
${{ github.workspace }}/${{ matrix.example }}/tests/test_manifest_on_${{ matrix.hardware }}.sh validate_${{ matrix.example }} $NAMESPACE
96100
fi
97101
98102
- name: Kubectl uninstall

.github/workflows/scripts/build_push.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ function docker_build() {
3535
echo "Building ${IMAGE_REPO}opea/$1:$IMAGE_TAG using Dockerfile $DOCKERFILE_PATH"
3636
# if https_proxy and http_proxy are set, pass them to docker build
3737
if [ -z "$https_proxy" ]; then
38-
#docker build --no-cache -t ${IMAGE_REPO}opea/$1:$IMAGE_TAG -f $DOCKERFILE_PATH .
39-
docker build -t ${IMAGE_REPO}opea/$1:$IMAGE_TAG -f $DOCKERFILE_PATH .
38+
docker build --no-cache -t ${IMAGE_REPO}opea/$1:$IMAGE_TAG -f $DOCKERFILE_PATH .
4039
else
4140
docker build --no-cache -t ${IMAGE_REPO}opea/$1:$IMAGE_TAG --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $DOCKERFILE_PATH .
4241
fi

ChatQnA/tests/test_chatqna_on_gaudi.sh

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,12 @@ function start_services() {
5858

5959
sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env
6060

61+
# Replace the container name with a test-specific name
62+
echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
63+
sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" docker_compose.yaml
64+
sed -i "s#image: opea/chatqna-ui:latest#image: opea/chatqna-ui:${IMAGE_TAG}#g" docker_compose.yaml
65+
sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" docker_compose.yaml
6166
# Start Docker Containers
62-
# TODO: Replace the container name with a test-specific name
63-
6467
docker compose -f docker_compose.yaml up -d
6568
n=0
6669
until [[ "$n" -ge 200 ]]; do
@@ -213,13 +216,13 @@ function main() {
213216

214217
stop_docker
215218
begin_time=$(date +%s)
216-
build_docker_images
217-
start_time=$(date +%s)
219+
# build_docker_images
220+
# start_time=$(date +%s)
218221
start_services
219222
end_time=$(date +%s)
220-
minimal_duration=$((end_time-start_time))
223+
# minimal_duration=$((end_time-start_time))
221224
maximal_duration=$((end_time-begin_time))
222-
echo "Mega service start minimal duration is "$minimal_duration"s, maximal duration(including docker image build) is "$maximal_duration"s"
225+
echo "Mega service start duration is "$maximal_duration"s"
223226

224227
validate_microservices
225228
validate_megaservice
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#!/bin/bash
2+
# Copyright (C) 2024 Intel Corporation
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
set -xe
6+
USER_ID=$(whoami)
7+
LOG_PATH=/home/$(whoami)/logs
8+
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
9+
IMAGE_REPO=${IMAGE_REPO:-}
10+
IMAGE_TAG=${IMAGE_TAG:-latest}
11+
12+
function init_chatqna() {
13+
# replace the mount dir "path: /mnt" with "path: $CHART_MOUNT"
14+
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/models#path: $MOUNT_DIR#g" {} \;
15+
# replace megaservice image tag
16+
find . -name '*.yaml' -type f -exec sed -i "s#image: opea/chatqna:latest#image: opea/chatqna:${IMAGE_TAG}#g" {} \;
17+
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/"
18+
find . -name '*.yaml' -type f -exec sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" {} \;
19+
# set huggingface token
20+
find . -name '*.yaml' -type f -exec sed -i "s#\${HUGGINGFACEHUB_API_TOKEN}#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
21+
}
22+
23+
function install_chatqna {
24+
# replace namespace "default" with real namespace
25+
find . -name '*.yaml' -type f -exec sed -i "s#default.svc#$NAMESPACE.svc#g" {} \;
26+
# for very yaml file in yaml_files, apply it to the k8s cluster
27+
yaml_files=("qna_configmap_gaudi" "redis-vector-db" "tei_embedding_gaudi_service" "tei_reranking_service" "tgi_gaudi_service" "retriever" "embedding" "reranking" "llm")
28+
for yaml_file in ${yaml_files[@]}; do
29+
kubectl apply -f $yaml_file.yaml -n $NAMESPACE
30+
done
31+
sleep 60
32+
kubectl apply -f chaqna-xeon-backend-server.yaml -n $NAMESPACE
33+
}
34+
35+
function validate_chatqna() {
36+
max_retry=20
37+
# make sure microservice retriever is ready
38+
# try to curl retriever-svc for max_retry times
39+
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
40+
for ((i=1; i<=max_retry; i++))
41+
do
42+
curl http://retriever-svc.$NAMESPACE:7000/v1/retrieval -X POST \
43+
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \
44+
-H 'Content-Type: application/json' && break
45+
sleep 10
46+
done
47+
# make sure microservice tgi-svc is ready
48+
for ((i=1; i<=max_retry; i++))
49+
do
50+
curl http://tgi-gaudi-svc.$NAMESPACE:9009/generate -X POST \
51+
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
52+
-H 'Content-Type: application/json' && break
53+
sleep 10
54+
done
55+
# if i is bigger than max_retry, then exit with error
56+
if [ $i -gt $max_retry ]; then
57+
echo "Microservice failed, exit with error."
58+
exit 1
59+
fi
60+
61+
# check megaservice works
62+
# generate a random logfile name to avoid conflict among multiple runners
63+
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
64+
curl http://chaqna-xeon-backend-server-svc.$NAMESPACE:8888/v1/chatqna -H "Content-Type: application/json" -d '{
65+
"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE
66+
exit_code=$?
67+
if [ $exit_code -ne 0 ]; then
68+
echo "Megaservice failed, please check the logs in $LOGFILE!"
69+
exit 1
70+
fi
71+
72+
echo "Checking response results, make sure the output is reasonable. "
73+
local status=false
74+
if [[ -f $LOGFILE ]] &&
75+
[[ $(grep -c "billion" $LOGFILE) != 0 ]]; then
76+
status=true
77+
fi
78+
if [ $status == false ]; then
79+
echo "Response check failed, please check the logs in artifacts!"
80+
exit 1
81+
else
82+
echo "Response check succeed!"
83+
fi
84+
}
85+
86+
if [ $# -eq 0 ]; then
87+
echo "Usage: $0 <function_name>"
88+
exit 1
89+
fi
90+
91+
case "$1" in
92+
init_ChatQnA)
93+
pushd ChatQnA/kubernetes/manifests
94+
init_chatqna
95+
popd
96+
;;
97+
install_ChatQnA)
98+
pushd ChatQnA/kubernetes/manifests
99+
NAMESPACE=$2
100+
install_chatqna
101+
popd
102+
;;
103+
validate_ChatQnA)
104+
NAMESPACE=$2
105+
SERVICE_NAME=chaqna-xeon-backend-server-svc
106+
validate_chatqna
107+
;;
108+
*)
109+
echo "Unknown function: $1"
110+
;;
111+
esac

CodeGen/tests/test_codegen_on_gaudi.sh

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,6 @@ WORKPATH=$(dirname "$PWD")
88
LOG_PATH="$WORKPATH/tests"
99
ip_address=$(hostname -I | awk '{print $1}')
1010

11-
function build_docker_images() {
12-
cd $WORKPATH
13-
git clone https://github.com/opea-project/GenAIComps.git
14-
cd GenAIComps
15-
16-
docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile .
17-
18-
docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
19-
20-
cd $WORKPATH/docker
21-
docker build --no-cache -t opea/codegen:latest -f Dockerfile .
22-
23-
cd $WORKPATH/docker/ui
24-
docker build --no-cache -t opea/codegen-ui:latest -f docker/Dockerfile .
25-
26-
docker images
27-
}
28-
2911
function start_services() {
3012
cd $WORKPATH/docker/gaudi
3113

@@ -38,8 +20,12 @@ function start_services() {
3820

3921
sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env
4022

23+
# Replace the container name with a test-specific name
24+
echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
25+
sed -i "s#image: opea/codegen:latest#image: opea/codegen:${IMAGE_TAG}#g" docker_compose.yaml
26+
sed -i "s#image: opea/codegen-ui:latest#image: opea/codegen-ui:${IMAGE_TAG}#g" docker_compose.yaml
27+
sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" docker_compose.yaml
4128
# Start Docker Containers
42-
# TODO: Replace the container name with a test-specific name
4329
docker compose -f docker_compose.yaml up -d
4430

4531
sleep 2m # Waits 2 minutes
@@ -141,7 +127,6 @@ function main() {
141127

142128
stop_docker
143129

144-
build_docker_images
145130
start_services
146131

147132
validate_microservices
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#!/bin/bash
2+
# Copyright (C) 2024 Intel Corporation
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
set -xe
6+
USER_ID=$(whoami)
7+
LOG_PATH=/home/$(whoami)/logs
8+
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
9+
IMAGE_REPO=${IMAGE_REPO:-}
10+
IMAGE_TAG=${IMAGE_TAG:-latest}
11+
12+
function init_codegen() {
13+
# executed under path manifest/codegen/xeon
14+
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
15+
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt#path: $MOUNT_DIR#g" {} \;
16+
# replace megaservice image tag
17+
find . -name '*.yaml' -type f -exec sed -i "s#image: opea/codegen:latest#image: opea/codegen:${IMAGE_TAG}#g" {} \;
18+
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/"
19+
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}opea/#g" {} \;
20+
# set huggingface token
21+
find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
22+
}
23+
24+
function install_codegen {
25+
echo "namespace is $NAMESPACE"
26+
kubectl apply -f . -n $NAMESPACE
27+
}
28+
29+
function validate_codegen() {
30+
ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
31+
port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
32+
echo "try to curl http://${ip_address}:${port}/v1/codegen..."
33+
34+
# generate a random logfile name to avoid conflict among multiple runners
35+
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
36+
# Curl the Mega Service
37+
curl http://${ip_address}:${port}/v1/codegen -H "Content-Type: application/json" \
38+
-d '{"messages": "def print_hello_world():"}' > $LOGFILE
39+
exit_code=$?
40+
if [ $exit_code -ne 0 ]; then
41+
echo "Megaservice codegen failed, please check the logs in $LOGFILE!"
42+
exit 1
43+
fi
44+
45+
echo "Checking response results, make sure the output is reasonable. "
46+
local status=false
47+
if [[ -f $LOGFILE ]] && \
48+
[[ $(grep -c "print" $LOGFILE) != 0 ]]; then
49+
status=true
50+
fi
51+
52+
if [ $status == false ]; then
53+
echo "Response check failed, please check the logs in artifacts!"
54+
else
55+
echo "Response check succeed!"
56+
fi
57+
}
58+
59+
if [ $# -eq 0 ]; then
60+
echo "Usage: $0 <function_name>"
61+
exit 1
62+
fi
63+
64+
case "$1" in
65+
init_CodeGen)
66+
pushd CodeGen/kubernetes/manifests/gaudi
67+
init_codegen
68+
popd
69+
;;
70+
install_CodeGen)
71+
pushd CodeGen/kubernetes/manifests/gaudi
72+
NAMESPACE=$2
73+
install_codegen
74+
popd
75+
;;
76+
validate_CodeGen)
77+
NAMESPACE=$2
78+
SERVICE_NAME=codegen
79+
validate_codegen
80+
;;
81+
*)
82+
echo "Unknown function: $1"
83+
;;
84+
esac

CodeTrans/tests/test_codetrans_on_gaudi.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,12 @@ function start_services() {
4040

4141
sed -i "s/backend_address/$ip_address/g" $WORKPATH/docker/ui/svelte/.env
4242

43+
# Replace the container name with a test-specific name
44+
echo "using image repository $IMAGE_REPO and image tag $IMAGE_TAG"
45+
sed -i "s#image: opea/codetrans:latest#image: opea/codetrans:${IMAGE_TAG}#g" docker_compose.yaml
46+
sed -i "s#image: opea/codetrans-ui:latest#image: opea/codetrans-ui:${IMAGE_TAG}#g" docker_compose.yaml
47+
sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" docker_compose.yaml
4348
# Start Docker Containers
44-
# TODO: Replace the container name with a test-specific name
4549
docker compose -f docker_compose.yaml up -d
4650

4751
sleep 2m # Waits 2 minutes
@@ -141,7 +145,7 @@ function main() {
141145

142146
stop_docker
143147

144-
build_docker_images
148+
# build_docker_images
145149
start_services
146150

147151
validate_microservices

0 commit comments

Comments
 (0)