Skip to content

Commit d51a136

Browse files
Add native LLM microservice using IPEX (opea-project#1337)
Signed-off-by: lvliang-intel <[email protected]>
1 parent c96b124 commit d51a136

File tree

7 files changed

+1078
-0
lines changed

7 files changed

+1078
-0
lines changed

.github/workflows/docker/compose/third_parties-compose.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,8 @@ services:
9898
build:
9999
dockerfile: comps/third_parties/llama-vision/src/Dockerfile.guard
100100
image: ${REGISTRY:-opea}/lvm-llama-vision-guard:${TAG:-latest}
101+
ipex-llm:
102+
build:
103+
context: ipex-llm
104+
dockerfile: comps/third_parties/ipex/src/Dockerfile
105+
image: ${REGISTRY:-opea}/ipex-llm:${TAG:-latest}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
services:
5+
6+
ipex:
7+
image: ${REGISTRY:-opea}/ipex-llm:${TAG:-latest}
8+
container_name: ipex-llm-server
9+
ports:
10+
- ${IPEX_LLM_PORT:-8688}:8688
11+
ipc: host
12+
environment:
13+
no_proxy: ${no_proxy}
14+
http_proxy: ${http_proxy}
15+
https_proxy: ${https_proxy}
16+
MODEL_ID: ${MODEL_ID}
17+
HF_TOKEN: ${HF_TOKEN}
18+
restart: unless-stopped
19+
20+
networks:
21+
default:
22+
driver: bridge
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# Copyright (C) 2025 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# Most of this Dockerfile originates from https://github.com/intel/intel-extension-for-pytorch/blob/main/examples/cpu/llm/Dockerfile
5+
6+
ARG BASE_IMAGE=ubuntu:22.04
7+
FROM ${BASE_IMAGE} AS base
8+
RUN if [ -f /etc/apt/apt.conf.d/proxy.conf ]; then rm /etc/apt/apt.conf.d/proxy.conf; fi && \
9+
if [ ! -z ${HTTP_PROXY} ]; then echo "Acquire::http::Proxy \"${HTTP_PROXY}\";" >> /etc/apt/apt.conf.d/proxy.conf; fi && \
10+
if [ ! -z ${HTTPS_PROXY} ]; then echo "Acquire::https::Proxy \"${HTTPS_PROXY}\";" >> /etc/apt/apt.conf.d/proxy.conf; fi
11+
RUN apt update && \
12+
apt full-upgrade -y && \
13+
DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends -y \
14+
ca-certificates \
15+
git \
16+
curl \
17+
wget \
18+
vim \
19+
numactl \
20+
gcc-12 \
21+
g++-12 \
22+
make
23+
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 && \
24+
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 && \
25+
update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \
26+
update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100
27+
28+
WORKDIR /root
29+
30+
RUN curl -fsSL -v -o miniforge.sh -O https://github.com/conda-forge/miniforge/releases/download/24.7.1-2/Miniforge3-24.7.1-2-Linux-x86_64.sh && \
31+
bash miniforge.sh -b -p ./miniforge3 && \
32+
rm miniforge.sh
33+
34+
# --build-arg COMPILE=ON to compile from source
35+
FROM base AS dev
36+
ARG COMPILE
37+
RUN git clone https://github.com/intel/intel-extension-for-pytorch.git
38+
RUN . ~/miniforge3/bin/activate && conda create -y -n compile_py310 python=3.10 && conda activate compile_py310 && \
39+
cd intel-extension-for-pytorch/examples/cpu/llm && \
40+
export CC=gcc && export CXX=g++ && \
41+
if [ -z ${COMPILE} ]; then bash tools/env_setup.sh 14; else bash tools/env_setup.sh 10; fi && \
42+
unset CC && unset CXX
43+
44+
FROM base AS deploy
45+
RUN apt update && \
46+
DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends -y \
47+
google-perftools \
48+
openssh-server \
49+
net-tools && \
50+
apt clean && \
51+
rm -rf /var/lib/apt/lists/* && \
52+
if [ -f /etc/apt/apt.conf.d/proxy.conf ]; then rm /etc/apt/apt.conf.d/proxy.conf; fi
53+
COPY --from=dev /root/intel-extension-for-pytorch/examples/cpu/llm ./llm
54+
COPY --from=dev /root/intel-extension-for-pytorch/tools/get_libstdcpp_lib.sh ./llm/tools
55+
RUN . ~/miniforge3/bin/activate && conda create -y -n py310 python=3.10 && conda activate py310 && \
56+
cd /usr/lib/x86_64-linux-gnu/ && ln -s libtcmalloc.so.4 libtcmalloc.so && cd && \
57+
cd ./llm && \
58+
bash tools/env_setup.sh 9 && \
59+
python -m pip cache purge && \
60+
mv ./oneCCL_release /opt/oneCCL && \
61+
chown -R root:root /opt/oneCCL && \
62+
sed -i "s|ONECCL_PATH=.*|ONECCL_PATH=/opt/oneCCL|" ./tools/env_activate.sh && \
63+
pip install backoff fastapi uvicorn
64+
ARG PORT_SSH=22
65+
RUN mkdir /var/run/sshd && \
66+
sed -i "s/#Port.*/Port ${PORT_SSH}/" /etc/ssh/sshd_config && \
67+
echo "service ssh start" >> /root/.bashrc && \
68+
ssh-keygen -b 4096 -f /root/.ssh/id_rsa -N "" && \
69+
mv /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys && \
70+
echo "Host *\n Port ${PORT_SSH}\n IdentityFile /root/.ssh/id_rsa\n StrictHostKeyChecking no" > /root/.ssh/config
71+
EXPOSE ${PORT_SSH}
72+
COPY ./comps/third_parties/ipex/src/ipex_inference.py /root
73+
COPY ./comps/third_parties/ipex/src/openai_protocol.py /root
74+
COPY ./comps/third_parties/ipex/src/entrypoint.sh /usr/local/bin/entrypoint.sh
75+
RUN chmod +x /usr/local/bin/entrypoint.sh
76+
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
77+
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# IPEX Serving microservice
2+
3+
[Intel® Extension for PyTorch](https://github.com/intel/intel-extension-for-pytorch) delivers advanced optimizations to accelerate Large Language Model (LLM) inference on Intel hardware. It enhances performance through techniques such as paged attention and ROPE fusion, while also supporting a range of precision formats, including FP32, BF16, Smooth Quantization INT8, and prototype weight-only quantization in INT8/INT4.
4+
5+
For more details, refer to the [README](https://github.com/intel/intel-extension-for-pytorch/blob/main/examples/cpu/llm/README.md)
6+
7+
## 🚀1. Build the Docker Image
8+
9+
The Dockerfile used here is primarily sourced from the IPEX project, with additions to incorporate serving capabilities for LLM inference. This Dockerfile enables SSH passwordless login, primarily for implementing distributed inference, although distributed inference is not currently applied but will be added soon.
10+
11+
```bash
12+
cd ../../../../
13+
docker build -f comps/third_parties/ipex/src/Dockerfile --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg COMPILE=ON --build-arg PORT_SSH=2345 -t opea/ipex-llm:latest .
14+
```
15+
16+
## 🚀2. Start the microservice
17+
18+
```bash
19+
export MODEL_ID="microsoft/phi-4"
20+
21+
cd comps/third_parties/ipex/deployment/docker_compose
22+
docker compose -f compose.yaml up -d
23+
```
24+
25+
## 🚀3. Access the service
26+
27+
Then you need to test your service using the following commands:
28+
29+
```bash
30+
http_proxy="" curl -X POST -H "Content-Type: application/json" -d '{"model": "microsoft/phi-4", "messages": [{"role": "user", "content": "Hello! What is your name?"}], "max_tokens": 128}' http://localhost:8688/v1/chat/completions
31+
```
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
3+
# Copyright (C) 2025 Intel Corporation
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
# Collect all command-line arguments into an array
7+
CMDS=()
8+
while [ $# -gt 0 ]; do
9+
CMDS+=("$1")
10+
shift
11+
done
12+
13+
# Activate the Miniforge environment
14+
. ~/miniforge3/bin/activate
15+
conda activate py310
16+
17+
# Set environment variables for oneCCL bindings for PyTorch
18+
TMP=$(python -c "import torch; import os; print(os.path.abspath(os.path.dirname(torch.__file__)))")
19+
. ${TMP}/../oneccl_bindings_for_pytorch/env/setvars.sh
20+
21+
# Print a performance note
22+
echo "**Note:** For better performance, please consider to launch workloads with command 'ipexrun'."
23+
24+
# Run the inference script
25+
python /root/ipex_inference.py "${CMDS[@]}"

0 commit comments

Comments
 (0)