Skip to content

Commit b7a04bf

Browse files
authored
Sync values yaml file for 1.3 release (opea-project#1524)
* Sync values yaml file for 1.3 release Signed-off-by: Dolpher Du <[email protected]>
1 parent dc5edc3 commit b7a04bf

File tree

11 files changed

+91
-14
lines changed

11 files changed

+91
-14
lines changed

comps/agent/deployment/kubernetes/gaudi-values.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,21 @@
44
# Accelerate inferencing in heaviest components to improve performance
55
# by overriding their subchart values
66

7+
tgi:
8+
enabled: false
9+
710
vllm:
811
enabled: true
12+
accelDevice: "gaudi"
913
image:
1014
repository: opea/vllm-gaudi
15+
resources:
16+
limits:
17+
habana.ai/gaudi: 4
18+
LLM_MODEL_ID: "meta-llama/Llama-3.3-70B-Instruct"
19+
OMPI_MCA_btl_vader_single_copy_mechanism: none
20+
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
21+
VLLM_SKIP_WARMUP: true
22+
extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
23+
1124
llm_endpoint_url: http://{{ .Release.Name }}-vllm
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
tei:
5+
enabled: true
6+
redis-vector-db:
7+
enabled: false
8+
milvus:
9+
enabled: false
10+
qdrant:
11+
enabled: true
12+
tag: "v1.13.1"
13+
config:
14+
cluster:
15+
enabled: false
16+
17+
DATAPREP_BACKEND: "QDRANT"
18+
# QDRANT_HOST: ""
19+
QDRANT_PORT: 6333
20+
COLLECTION_NAME: "rag_qdrant"
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
image:
5+
repository: opea/llm-docsum
6+
tag: "latest"
7+
8+
LLM_MODEL_ID: "Intel/neural-chat-7b-v3-3"
9+
MAX_INPUT_TOKENS: 2048
10+
MAX_TOTAL_TOKENS: 4096
11+
12+
tgi:
13+
LLM_MODEL_ID: "Intel/neural-chat-7b-v3-3"
14+
enabled: true
15+
MAX_INPUT_LENGTH: 2048
16+
MAX_TOTAL_TOKENS: 4096
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
image:
5+
repository: opea/llm-faqgen
6+
tag: "latest"
7+
8+
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
9+
10+
tgi:
11+
enabled: true
12+
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Copyright (C) 2024 Intel Corporation
22
# SPDX-License-Identifier: Apache-2.0
33

4-
tgi:
4+
LVM_BACKEND: "vLLM"
5+
vllm:
56
enabled: true
7+
tgi:
8+
enabled: false
9+
lvm-serve:
10+
enabled: false
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
tei:
5+
enabled: true
6+
redis-vector-db:
7+
enabled: false
8+
milvus:
9+
enabled: false
10+
qdrant:
11+
enabled: true
12+
tag: "v1.13.1"
13+
config:
14+
cluster:
15+
enabled: false
16+
17+
RETRIEVER_BACKEND: "QDRANT"
18+
# QDRANT_HOST: ""
19+
QDRANT_PORT: 6333
20+
QDRANT_INDEX_NAME: "rag_qdrant"

comps/third_parties/tei/deployment/kubernetes/gaudi-values.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,5 @@ resources:
1616
limits:
1717
habana.ai/gaudi: 1
1818

19-
livenessProbe:
20-
timeoutSeconds: 1
2119
readinessProbe:
2220
timeoutSeconds: 1

comps/third_parties/teirerank/deployment/kubernetes/gaudi-values.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,5 @@ resources:
1616
limits:
1717
habana.ai/gaudi: 1
1818

19-
livenessProbe:
20-
timeoutSeconds: 1
2119
readinessProbe:
2220
timeoutSeconds: 1

comps/third_parties/tgi/deployment/kubernetes/cpu-values.yaml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,6 @@ resources:
1010
cpu: 6
1111
memory: 65Gi
1212

13-
livenessProbe:
14-
initialDelaySeconds: 8
15-
periodSeconds: 8
16-
failureThreshold: 24
17-
timeoutSeconds: 4
1813
readinessProbe:
1914
initialDelaySeconds: 16
2015
periodSeconds: 8

comps/third_parties/tgi/deployment/kubernetes/gaudi-values.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,6 @@ resources:
2323
cpu: 1
2424
memory: 16Gi
2525

26-
livenessProbe:
27-
initialDelaySeconds: 5
28-
periodSeconds: 5
29-
timeoutSeconds: 1
3026
readinessProbe:
3127
initialDelaySeconds: 5
3228
periodSeconds: 5

0 commit comments

Comments
 (0)