Skip to content

Commit 622fffa

Browse files
Adding files to deploy AgentQnA application on ROCm vLLM (opea-project#1613)
Signed-off-by: Chingis Yundunov <[email protected]> Signed-off-by: Chingis Yundunov <[email protected]>
1 parent a3cfbc8 commit 622fffa

15 files changed

+1177
-145
lines changed

AgentQnA/docker_compose/amd/gpu/rocm/README.md

Lines changed: 301 additions & 68 deletions
Large diffs are not rendered by default.
Lines changed: 49 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,24 @@
1-
# Copyright (C) 2024 Intel Corporation
2-
# SPDX-License-Identifier: Apache-2.0
1+
# Copyright (C) 2025 Advanced Micro Devices, Inc.
32

43
services:
5-
agent-tgi-server:
6-
image: ${AGENTQNA_TGI_IMAGE}
7-
container_name: agent-tgi-server
4+
tgi-service:
5+
image: ghcr.io/huggingface/text-generation-inference:3.0.0-rocm
6+
container_name: tgi-service
87
ports:
9-
- "${AGENTQNA_TGI_SERVICE_PORT-8085}:80"
8+
- "${TGI_SERVICE_PORT-8085}:80"
109
volumes:
11-
- ${HF_CACHE_DIR:-/var/opea/agent-service/}:/data
10+
- "${MODEL_CACHE:-./data}:/data"
1211
environment:
1312
no_proxy: ${no_proxy}
1413
http_proxy: ${http_proxy}
1514
https_proxy: ${https_proxy}
16-
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${AGENTQNA_TGI_SERVICE_PORT}"
15+
TGI_LLM_ENDPOINT: "http://${ip_address}:${TGI_SERVICE_PORT}"
1716
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
1817
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
19-
shm_size: 1g
18+
shm_size: 32g
2019
devices:
2120
- /dev/kfd:/dev/kfd
22-
- /dev/dri/${AGENTQNA_CARD_ID}:/dev/dri/${AGENTQNA_CARD_ID}
23-
- /dev/dri/${AGENTQNA_RENDER_ID}:/dev/dri/${AGENTQNA_RENDER_ID}
21+
- /dev/dri:/dev/dri
2422
cap_add:
2523
- SYS_PTRACE
2624
group_add:
@@ -34,14 +32,14 @@ services:
3432
image: opea/agent:latest
3533
container_name: rag-agent-endpoint
3634
volumes:
37-
# - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
38-
- ${TOOLSET_PATH}:/home/user/tools/
35+
- "${TOOLSET_PATH}:/home/user/tools/"
3936
ports:
40-
- "9095:9095"
37+
- "${WORKER_RAG_AGENT_PORT:-9095}:9095"
4138
ipc: host
4239
environment:
4340
ip_address: ${ip_address}
4441
strategy: rag_agent_llama
42+
with_memory: false
4543
recursion_limit: ${recursion_limit_worker}
4644
llm_engine: tgi
4745
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -61,29 +59,57 @@ services:
6159
LANGCHAIN_PROJECT: "opea-worker-agent-service"
6260
port: 9095
6361

62+
worker-sql-agent:
63+
image: opea/agent:latest
64+
container_name: sql-agent-endpoint
65+
volumes:
66+
- "${WORKDIR}/tests/Chinook_Sqlite.sqlite:/home/user/chinook-db/Chinook_Sqlite.sqlite:rw"
67+
ports:
68+
- "${WORKER_SQL_AGENT_PORT:-9096}:9096"
69+
ipc: host
70+
environment:
71+
ip_address: ${ip_address}
72+
strategy: sql_agent_llama
73+
with_memory: false
74+
db_name: ${db_name}
75+
db_path: ${db_path}
76+
use_hints: false
77+
recursion_limit: ${recursion_limit_worker}
78+
llm_engine: vllm
79+
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
80+
llm_endpoint_url: ${LLM_ENDPOINT_URL}
81+
model: ${LLM_MODEL_ID}
82+
temperature: ${temperature}
83+
max_new_tokens: ${max_new_tokens}
84+
stream: false
85+
require_human_feedback: false
86+
no_proxy: ${no_proxy}
87+
http_proxy: ${http_proxy}
88+
https_proxy: ${https_proxy}
89+
port: 9096
90+
6491
supervisor-react-agent:
6592
image: opea/agent:latest
6693
container_name: react-agent-endpoint
6794
depends_on:
68-
- agent-tgi-server
6995
- worker-rag-agent
7096
volumes:
71-
# - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
72-
- ${TOOLSET_PATH}:/home/user/tools/
97+
- "${TOOLSET_PATH}:/home/user/tools/"
7398
ports:
74-
- "${AGENTQNA_FRONTEND_PORT}:9090"
99+
- "${SUPERVISOR_REACT_AGENT_PORT:-9090}:9090"
75100
ipc: host
76101
environment:
77102
ip_address: ${ip_address}
78-
strategy: react_langgraph
103+
strategy: react_llama
104+
with_memory: true
79105
recursion_limit: ${recursion_limit_supervisor}
80106
llm_engine: tgi
81107
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
82108
llm_endpoint_url: ${LLM_ENDPOINT_URL}
83109
model: ${LLM_MODEL_ID}
84110
temperature: ${temperature}
85111
max_new_tokens: ${max_new_tokens}
86-
stream: false
112+
stream: true
87113
tools: /home/user/tools/supervisor_agent_tools.yaml
88114
require_human_feedback: false
89115
no_proxy: ${no_proxy}
@@ -92,6 +118,7 @@ services:
92118
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
93119
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
94120
LANGCHAIN_PROJECT: "opea-supervisor-agent-service"
95-
CRAG_SERVER: $CRAG_SERVER
96-
WORKER_AGENT_URL: $WORKER_AGENT_URL
121+
CRAG_SERVER: ${CRAG_SERVER}
122+
WORKER_AGENT_URL: ${WORKER_AGENT_URL}
123+
SQL_AGENT_URL: ${SQL_AGENT_URL}
97124
port: 9090
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# Copyright (C) 2025 Advanced Micro Devices, Inc.
2+
3+
services:
4+
vllm-service:
5+
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
6+
container_name: vllm-service
7+
ports:
8+
- "${VLLM_SERVICE_PORT:-8081}:8011"
9+
environment:
10+
no_proxy: ${no_proxy}
11+
http_proxy: ${http_proxy}
12+
https_proxy: ${https_proxy}
13+
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
14+
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
15+
HF_HUB_DISABLE_PROGRESS_BARS: 1
16+
HF_HUB_ENABLE_HF_TRANSFER: 0
17+
WILM_USE_TRITON_FLASH_ATTENTION: 0
18+
PYTORCH_JIT: 0
19+
volumes:
20+
- "${MODEL_CACHE:-./data}:/data"
21+
shm_size: 20G
22+
devices:
23+
- /dev/kfd:/dev/kfd
24+
- /dev/dri/:/dev/dri/
25+
cap_add:
26+
- SYS_PTRACE
27+
group_add:
28+
- video
29+
security_opt:
30+
- seccomp:unconfined
31+
- apparmor=unconfined
32+
command: "--model ${VLLM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
33+
ipc: host
34+
35+
worker-rag-agent:
36+
image: opea/agent:latest
37+
container_name: rag-agent-endpoint
38+
volumes:
39+
- ${TOOLSET_PATH}:/home/user/tools/
40+
ports:
41+
- "${WORKER_RAG_AGENT_PORT:-9095}:9095"
42+
ipc: host
43+
environment:
44+
ip_address: ${ip_address}
45+
strategy: rag_agent_llama
46+
with_memory: false
47+
recursion_limit: ${recursion_limit_worker}
48+
llm_engine: vllm
49+
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
50+
llm_endpoint_url: ${LLM_ENDPOINT_URL}
51+
model: ${LLM_MODEL_ID}
52+
temperature: ${temperature}
53+
max_new_tokens: ${max_new_tokens}
54+
stream: false
55+
tools: /home/user/tools/worker_agent_tools.yaml
56+
require_human_feedback: false
57+
RETRIEVAL_TOOL_URL: ${RETRIEVAL_TOOL_URL}
58+
no_proxy: ${no_proxy}
59+
http_proxy: ${http_proxy}
60+
https_proxy: ${https_proxy}
61+
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
62+
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
63+
LANGCHAIN_PROJECT: "opea-worker-agent-service"
64+
port: 9095
65+
66+
worker-sql-agent:
67+
image: opea/agent:latest
68+
container_name: sql-agent-endpoint
69+
volumes:
70+
- "${WORKDIR}/tests/Chinook_Sqlite.sqlite:/home/user/chinook-db/Chinook_Sqlite.sqlite:rw"
71+
ports:
72+
- "${WORKER_SQL_AGENT_PORT:-9096}:9096"
73+
ipc: host
74+
environment:
75+
ip_address: ${ip_address}
76+
strategy: sql_agent_llama
77+
with_memory: false
78+
db_name: ${db_name}
79+
db_path: ${db_path}
80+
use_hints: false
81+
recursion_limit: ${recursion_limit_worker}
82+
llm_engine: vllm
83+
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
84+
llm_endpoint_url: ${LLM_ENDPOINT_URL}
85+
model: ${LLM_MODEL_ID}
86+
temperature: ${temperature}
87+
max_new_tokens: ${max_new_tokens}
88+
stream: false
89+
require_human_feedback: false
90+
no_proxy: ${no_proxy}
91+
http_proxy: ${http_proxy}
92+
https_proxy: ${https_proxy}
93+
port: 9096
94+
95+
supervisor-react-agent:
96+
image: opea/agent:latest
97+
container_name: react-agent-endpoint
98+
depends_on:
99+
- worker-rag-agent
100+
volumes:
101+
- ${TOOLSET_PATH}:/home/user/tools/
102+
ports:
103+
- "${SUPERVISOR_REACT_AGENT_PORT:-9090}:9090"
104+
ipc: host
105+
environment:
106+
ip_address: ${ip_address}
107+
strategy: react_llama
108+
with_memory: true
109+
recursion_limit: ${recursion_limit_supervisor}
110+
llm_engine: vllm
111+
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
112+
llm_endpoint_url: ${LLM_ENDPOINT_URL}
113+
model: ${LLM_MODEL_ID}
114+
temperature: ${temperature}
115+
max_new_tokens: ${max_new_tokens}
116+
stream: true
117+
tools: /home/user/tools/supervisor_agent_tools.yaml
118+
require_human_feedback: false
119+
no_proxy: ${no_proxy}
120+
http_proxy: ${http_proxy}
121+
https_proxy: ${https_proxy}
122+
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
123+
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
124+
LANGCHAIN_PROJECT: "opea-supervisor-agent-service"
125+
CRAG_SERVER: ${CRAG_SERVER}
126+
WORKER_AGENT_URL: ${WORKER_AGENT_URL}
127+
SQL_AGENT_URL: ${SQL_AGENT_URL}
128+
port: 9090
Lines changed: 67 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,87 @@
11
# Copyright (C) 2024 Advanced Micro Devices, Inc.
22
# SPDX-License-Identifier: Apache-2.0
33

4-
WORKPATH=$(dirname "$PWD")/..
4+
# Before start script:
5+
# export host_ip="your_host_ip_or_host_name"
6+
# export HUGGINGFACEHUB_API_TOKEN="your_huggingface_api_token"
7+
# export LANGCHAIN_API_KEY="your_langchain_api_key"
8+
# export LANGCHAIN_TRACING_V2=""
9+
10+
# Set server hostname or IP address
511
export ip_address=${host_ip}
6-
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
7-
export AGENTQNA_TGI_IMAGE=ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
8-
export AGENTQNA_TGI_SERVICE_PORT="8085"
912

10-
# LLM related environment variables
11-
export AGENTQNA_CARD_ID="card1"
12-
export AGENTQNA_RENDER_ID="renderD136"
13-
export HF_CACHE_DIR=${HF_CACHE_DIR}
14-
ls $HF_CACHE_DIR
15-
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
16-
#export NUM_SHARDS=4
17-
export LLM_ENDPOINT_URL="http://${ip_address}:${AGENTQNA_TGI_SERVICE_PORT}"
13+
# Set services IP ports
14+
export TGI_SERVICE_PORT="18110"
15+
export WORKER_RAG_AGENT_PORT="18111"
16+
export WORKER_SQL_AGENT_PORT="18112"
17+
export SUPERVISOR_REACT_AGENT_PORT="18113"
18+
export CRAG_SERVER_PORT="18114"
19+
20+
export WORKPATH=$(dirname "$PWD")
21+
export WORKDIR=${WORKPATH}/../../../
22+
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
23+
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
24+
export HF_CACHE_DIR="./data"
25+
export MODEL_CACHE="./data"
26+
export TOOLSET_PATH=${WORKPATH}/../../../tools/
27+
export recursion_limit_worker=12
28+
export LLM_ENDPOINT_URL=http://${ip_address}:${TGI_SERVICE_PORT}
1829
export temperature=0.01
1930
export max_new_tokens=512
20-
21-
# agent related environment variables
22-
export AGENTQNA_WORKER_AGENT_SERVICE_PORT="9095"
23-
export TOOLSET_PATH=/home/huggingface/datamonsters/amd-opea/GenAIExamples/AgentQnA/tools/
24-
echo "TOOLSET_PATH=${TOOLSET_PATH}"
31+
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
32+
export LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY}
33+
export LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2}
34+
export db_name=Chinook
35+
export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
2536
export recursion_limit_worker=12
2637
export recursion_limit_supervisor=10
27-
export WORKER_AGENT_URL="http://${ip_address}:${AGENTQNA_WORKER_AGENT_SERVICE_PORT}/v1/chat/completions"
28-
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
29-
export CRAG_SERVER=http://${ip_address}:18881
30-
31-
export AGENTQNA_FRONTEND_PORT="9090"
32-
33-
#retrieval_tool
38+
export CRAG_SERVER=http://${ip_address}:${CRAG_SERVER_PORT}
39+
export WORKER_AGENT_URL="http://${ip_address}:${WORKER_RAG_AGENT_PORT}/v1/chat/completions"
40+
export SQL_AGENT_URL="http://${ip_address}:${WORKER_SQL_AGENT_PORT}/v1/chat/completions"
41+
export HF_CACHE_DIR=${HF_CACHE_DIR}
42+
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
43+
export no_proxy=${no_proxy}
44+
export http_proxy=${http_proxy}
45+
export https_proxy=${https_proxy}
46+
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
47+
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
3448
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
3549
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
36-
export REDIS_URL="redis://${host_ip}:26379"
50+
export REDIS_URL="redis://${host_ip}:6379"
3751
export INDEX_NAME="rag-redis"
52+
export RERANK_TYPE="tei"
3853
export MEGA_SERVICE_HOST_IP=${host_ip}
3954
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
4055
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
4156
export RERANK_SERVICE_HOST_IP=${host_ip}
4257
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
4358
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
44-
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
45-
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete"
59+
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
60+
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
61+
62+
echo ${WORKER_RAG_AGENT_PORT} > ${WORKPATH}/WORKER_RAG_AGENT_PORT_tmp
63+
echo ${WORKER_SQL_AGENT_PORT} > ${WORKPATH}/WORKER_SQL_AGENT_PORT_tmp
64+
echo ${SUPERVISOR_REACT_AGENT_PORT} > ${WORKPATH}/SUPERVISOR_REACT_AGENT_PORT_tmp
65+
echo ${CRAG_SERVER_PORT} > ${WORKPATH}/CRAG_SERVER_PORT_tmp
4666

67+
echo "Downloading chinook data..."
68+
echo Y | rm -R chinook-database
69+
git clone https://github.com/lerocha/chinook-database.git
70+
echo Y | rm -R ../../../../../AgentQnA/tests/Chinook_Sqlite.sqlite
71+
cp chinook-database/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite ../../../../../AgentQnA/tests
72+
73+
docker compose -f ../../../../../DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml up -d
4774
docker compose -f compose.yaml up -d
75+
76+
n=0
77+
until [[ "$n" -ge 100 ]]; do
78+
docker logs tgi-service > ${WORKPATH}/tgi_service_start.log
79+
if grep -q Connected ${WORKPATH}/tgi_service_start.log; then
80+
break
81+
fi
82+
sleep 10s
83+
n=$((n+1))
84+
done
85+
86+
echo "Starting CRAG server"
87+
docker run -d --runtime=runc --name=kdd-cup-24-crag-service -p=${CRAG_SERVER_PORT}:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0

0 commit comments

Comments
 (0)