5
5
set -x
6
6
7
7
WORKPATH=" $( cd " $( dirname " $0 " ) " && pwd ) "
8
+ DOCKER_FILE=" $WORKPATH " /../../comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.openvino_arc
8
9
9
10
# Define variables
10
11
port=5033
11
12
RENDER_GROUP_ID=110
12
- HF_MODEL_FIR=$HOME /hf_model
13
13
DOCKER_IMAGE=" vllm-openvino:comps"
14
14
CONTAINER_NAME=" test-comps-vllm-openvino-container"
15
+ HF_CACHE_DIR=$HOME /.cache/huggingface
15
16
16
17
function build_container() {
17
- cd $WORKPATH
18
- git clone https://github.com/vllm-project/vllm.git vllm-openvino
19
- cd ./vllm-openvino
20
-
21
- git reset --hard 067e77f9a87c3466fce41c8fe8710fddc69ec26c # resolve circular import issue
22
-
23
- # Add ARC drive to dockerfile
24
- sed -i ' 9r /dev/stdin' Dockerfile.openvino << EndOfMessage
25
- RUN apt-get install -y gpg-agent wget
26
- RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \\
27
- echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" | \\
28
- tee /etc/apt/sources.list.d/intel-gpu-jammy.list && \\
29
- apt-get update -y && \\
30
- apt-get install -y \\
31
- intel-opencl-icd intel-level-zero-gpu level-zero \\
32
- intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \\
33
- libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \\
34
- libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \\
35
- mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
36
- EndOfMessage
37
-
38
18
docker build --no-cache -t $DOCKER_IMAGE \
39
- -f Dockerfile.openvino \
19
+ -f $DOCKER_FILE \
40
20
. \
41
21
--build-arg https_proxy=$https_proxy \
42
22
--build-arg http_proxy=$http_proxy
23
+
43
24
if [ $? -ne 0 ]; then
44
25
echo " vllm-openvino built fail"
45
26
exit 1
46
27
else
47
28
echo " vllm-openvino built successful"
48
29
fi
49
- cd $WORKPATH
50
- rm -rf vllm-openvino
51
30
}
52
31
53
32
# Function to start Docker container
@@ -58,14 +37,14 @@ start_container() {
58
37
--ipc=host \
59
38
-e HTTPS_PROXY=$https_proxy \
60
39
-e HTTP_PROXY=$https_proxy \
61
- -v $HF_MODEL_FIR :/hf_model \
40
+ -v $HF_CACHE_DIR :/root/.cache/huggingface \
62
41
--device=/dev/dri:/dev/dri \
63
- --group-add $RENDER_GROUP_ID
42
+ --group-add $RENDER_GROUP_ID \
64
43
vllm-openvino:comps /bin/bash -c " \
65
44
export VLLM_OPENVINO_DEVICE=GPU && \
66
45
export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \
67
46
python3 -m vllm.entrypoints.openai.api_server \
68
- --model /hf_model /neural-chat-7b-v3-3 \
47
+ --model Intel /neural-chat-7b-v3-3 \
69
48
--host 0.0.0.0 \
70
49
--port $port \
71
50
--max_model_len 8192"
@@ -131,8 +110,7 @@ function test_api_endpoint {
131
110
# Main function
132
111
main () {
133
112
134
- # use local image to skip slow network connection
135
- # build_container
113
+ build_container
136
114
start_container
137
115
138
116
# Sleep to allow the container to start up fully
0 commit comments