Skip to content
This repository was archived by the owner on Jul 20, 2025. It is now read-only.

Commit 9c4c15a

Browse files
authored
Merge branch 'master' into vulkan
2 parents 48ad459 + 0f64857 commit 9c4c15a

File tree

94 files changed

+22139
-2294
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+22139
-2294
lines changed

.devops/main-intel.Dockerfile

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04
2+
ARG UBUNTU_VERSION=22.04
3+
4+
FROM intel/hpckit:$ONEAPI_VERSION as build
5+
6+
RUN apt-get update && \
7+
apt-get install -y git
8+
9+
WORKDIR /app
10+
11+
COPY . .
12+
13+
# for some reasons, "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DLLAMA_NATIVE=ON" give worse performance
14+
RUN mkdir build && \
15+
cd build && \
16+
cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx && \
17+
cmake --build . --config Release --target main server
18+
19+
FROM ubuntu:$UBUNTU_VERSION as runtime
20+
21+
COPY --from=build /app/build/bin/main /main
22+
COPY --from=build /app/build/bin/server /server
23+
24+
ENV LC_ALL=C.utf8
25+
26+
ENTRYPOINT [ "/main" ]

.devops/nix/nixpkgs-instances.nix

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@
77
{ system, ... }:
88
{
99
_module.args = {
10+
# Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
11+
# again, the below creates several nixpkgs instances which the
12+
# flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
13+
#
14+
# This is currently "slow" and "expensive", on a certain scale.
15+
# This also isn't "right" in that this hinders dependency injection at
16+
# the level of flake inputs. This might get removed in the foreseeable
17+
# future.
18+
#
19+
# Note that you can use these expressions without Nix
20+
# (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
21+
1022
pkgsCuda = import inputs.nixpkgs {
1123
inherit system;
1224
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,

.devops/nix/package.nix

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ let
7373
ps: [
7474
ps.numpy
7575
ps.sentencepiece
76+
ps.tiktoken
7677
ps.torchWithoutCuda
7778
ps.transformers
7879
]
@@ -114,14 +115,22 @@ effectiveStdenv.mkDerivation (
114115
pname = "llama-cpp${pnameSuffix}";
115116
version = llamaVersion;
116117

118+
# Note: none of the files discarded here are visible in the sandbox or
119+
# affect the output hash. This also means they can be modified without
120+
# triggering a rebuild.
117121
src = lib.cleanSourceWith {
118122
filter =
119123
name: type:
120-
!(builtins.any (_: _) [
124+
let
125+
noneOf = builtins.all (x: !x);
126+
baseName = baseNameOf name;
127+
in
128+
noneOf [
121129
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
122-
(name == "README.md") # Ignore *.md changes whe computing outPaths
123-
(lib.hasPrefix "." name) # Skip hidden files and directories
124-
]);
130+
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
131+
(lib.hasPrefix "." baseName) # Skip hidden files and directories
132+
(baseName == "flake.lock")
133+
];
125134
src = lib.cleanSource ../../.;
126135
};
127136

@@ -159,7 +168,7 @@ effectiveStdenv.mkDerivation (
159168

160169
cmakeFlags =
161170
[
162-
(cmakeBool "LLAMA_NATIVE" true)
171+
(cmakeBool "LLAMA_NATIVE" false)
163172
(cmakeBool "LLAMA_BUILD_SERVER" true)
164173
(cmakeBool "BUILD_SHARED_LIBS" true)
165174
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
@@ -216,6 +225,9 @@ effectiveStdenv.mkDerivation (
216225
description = "contains numpy and sentencepiece";
217226
buildInputs = [ llama-python ];
218227
inputsFrom = [ finalAttrs.finalPackage ];
228+
shellHook = ''
229+
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
230+
'';
219231
};
220232

221233
shell-extra = mkShell {

.devops/nix/scope.nix

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
llamaVersion ? "0.0.0",
55
}:
66

7+
# We're using `makeScope` instead of just writing out an attrset
8+
# because it allows users to apply overlays later using `overrideScope'`.
9+
# Cf. https://noogle.dev/f/lib/makeScope
10+
711
lib.makeScope newScope (
812
self: {
913
inherit llamaVersion;

.devops/server-cuda.Dockerfile

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
ARG UBUNTU_VERSION=22.04
2+
# This needs to generally match the container host's environment.
3+
ARG CUDA_VERSION=11.7.1
4+
# Target the CUDA build image
5+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6+
# Target the CUDA runtime image
7+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_CUDA_DEV_CONTAINER} as build
10+
11+
# Unless otherwise specified, we make a fat build.
12+
ARG CUDA_DOCKER_ARCH=all
13+
14+
RUN apt-get update && \
15+
apt-get install -y build-essential git
16+
17+
WORKDIR /app
18+
19+
COPY . .
20+
21+
# Set nvcc architecture
22+
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23+
# Enable cuBLAS
24+
ENV LLAMA_CUBLAS=1
25+
26+
RUN make
27+
28+
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
29+
30+
COPY --from=build /app/server /server
31+
32+
ENTRYPOINT [ "/server" ]

.devops/server-intel.Dockerfile

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04
2+
ARG UBUNTU_VERSION=22.04
3+
4+
FROM intel/hpckit:$ONEAPI_VERSION as build
5+
6+
RUN apt-get update && \
7+
apt-get install -y git
8+
9+
WORKDIR /app
10+
11+
COPY . .
12+
13+
# for some reasons, "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DLLAMA_NATIVE=ON" give worse performance
14+
RUN mkdir build && \
15+
cd build && \
16+
cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx && \
17+
cmake --build . --config Release --target main server
18+
19+
FROM ubuntu:$UBUNTU_VERSION as runtime
20+
21+
COPY --from=build /app/build/bin/server /server
22+
23+
ENV LC_ALL=C.utf8
24+
25+
ENTRYPOINT [ "/server" ]

.devops/server-rocm.Dockerfile

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
ARG UBUNTU_VERSION=22.04
2+
3+
# This needs to generally match the container host's environment.
4+
ARG ROCM_VERSION=5.6
5+
6+
# Target the CUDA build image
7+
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
8+
9+
FROM ${BASE_ROCM_DEV_CONTAINER} as build
10+
11+
# Unless otherwise specified, we make a fat build.
12+
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
13+
# This is mostly tied to rocBLAS supported archs.
14+
ARG ROCM_DOCKER_ARCH=\
15+
gfx803 \
16+
gfx900 \
17+
gfx906 \
18+
gfx908 \
19+
gfx90a \
20+
gfx1010 \
21+
gfx1030 \
22+
gfx1100 \
23+
gfx1101 \
24+
gfx1102
25+
26+
COPY requirements.txt requirements.txt
27+
COPY requirements requirements
28+
29+
RUN pip install --upgrade pip setuptools wheel \
30+
&& pip install -r requirements.txt
31+
32+
WORKDIR /app
33+
34+
COPY . .
35+
36+
# Set nvcc architecture
37+
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
38+
# Enable ROCm
39+
ENV LLAMA_HIPBLAS=1
40+
ENV CC=/opt/rocm/llvm/bin/clang
41+
ENV CXX=/opt/rocm/llvm/bin/clang++
42+
43+
RUN make
44+
45+
ENTRYPOINT [ "/app/server" ]

.devops/server.Dockerfile

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
ARG UBUNTU_VERSION=22.04
2+
3+
FROM ubuntu:$UBUNTU_VERSION as build
4+
5+
RUN apt-get update && \
6+
apt-get install -y build-essential git
7+
8+
WORKDIR /app
9+
10+
COPY . .
11+
12+
RUN make
13+
14+
FROM ubuntu:$UBUNTU_VERSION as runtime
15+
16+
COPY --from=build /app/server /server
17+
18+
ENV LC_ALL=C.utf8
19+
20+
ENTRYPOINT [ "/server" ]

.github/workflows/build.yml

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ jobs:
7272
id: cmake_test
7373
run: |
7474
cd build
75-
ctest --verbose --timeout 900
75+
ctest -L main --verbose --timeout 900
7676
7777
ubuntu-latest-cmake-sanitizer:
7878
runs-on: ubuntu-latest
@@ -107,7 +107,7 @@ jobs:
107107
id: cmake_test
108108
run: |
109109
cd build
110-
ctest --verbose --timeout 900
110+
ctest -L main --verbose --timeout 900
111111
112112
ubuntu-latest-cmake-mpi:
113113
runs-on: ubuntu-latest
@@ -141,7 +141,48 @@ jobs:
141141
id: cmake_test
142142
run: |
143143
cd build
144-
ctest --verbose
144+
ctest -L main --verbose
145+
146+
ubuntu-22-cmake-sycl:
147+
runs-on: ubuntu-22.04
148+
149+
continue-on-error: true
150+
151+
steps:
152+
- uses: actions/checkout@v2
153+
154+
- name: add oneAPI to apt
155+
shell: bash
156+
run: |
157+
cd /tmp
158+
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
159+
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
160+
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
161+
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
162+
163+
- name: install oneAPI dpcpp compiler
164+
shell: bash
165+
run: |
166+
sudo apt update
167+
sudo apt install intel-oneapi-compiler-dpcpp-cpp
168+
169+
- name: install oneAPI MKL library
170+
shell: bash
171+
run: |
172+
sudo apt install intel-oneapi-mkl-devel
173+
174+
- name: Clone
175+
id: checkout
176+
uses: actions/checkout@v3
177+
178+
- name: Build
179+
id: cmake_build
180+
run: |
181+
source /opt/intel/oneapi/setvars.sh
182+
mkdir build
183+
cd build
184+
cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
185+
cmake --build . --config Release -j $(nproc)
145186
146187
# TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
147188
# how to debug it.
@@ -202,7 +243,7 @@ jobs:
202243
id: cmake_test
203244
run: |
204245
cd build
205-
ctest --verbose --timeout 900
246+
ctest -L main --verbose --timeout 900
206247
207248
macOS-latest-cmake-ios:
208249
runs-on: macos-latest
@@ -295,7 +336,7 @@ jobs:
295336
OPENBLAS_VERSION: 0.3.23
296337
OPENCL_VERSION: 2023.04.17
297338
CLBLAST_VERSION: 1.6.0
298-
SDE_VERSION: 9.21.1-2023-04-24
339+
SDE_VERSION: 9.33.0-2024-01-07
299340

300341
strategy:
301342
matrix:
@@ -394,19 +435,19 @@ jobs:
394435
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512
395436
run: |
396437
cd build
397-
ctest -C Release --verbose --timeout 900
438+
ctest -L main -C Release --verbose --timeout 900
398439
399440
- name: Test (Intel SDE)
400441
id: cmake_test_sde
401442
if: ${{ matrix.build == 'avx512' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
402443
run: |
403-
curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/777395/sde-external-${env:SDE_VERSION}-win.tar.xz"
444+
curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
404445
# for some weird reason windows tar doesn't like sde tar.xz
405446
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
406447
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
407448
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
408449
cd build
409-
& $sde -future -- ctest -C Release --verbose --timeout 900
450+
& $sde -future -- ctest -L main -C Release --verbose --timeout 900
410451
411452
- name: Determine tag name
412453
id: tag

.github/workflows/docker.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,18 @@ jobs:
2828
config:
2929
- { tag: "light", dockerfile: ".devops/main.Dockerfile", platforms: "linux/amd64,linux/arm64" }
3030
- { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" }
31+
- { tag: "server", dockerfile: ".devops/server.Dockerfile", platforms: "linux/amd64,linux/arm64" }
3132
# NOTE(canardletter): The CUDA builds on arm64 are very slow, so I
3233
# have disabled them for now until the reason why
3334
# is understood.
3435
- { tag: "light-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platforms: "linux/amd64" }
3536
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
37+
- { tag: "server-cuda", dockerfile: ".devops/server-cuda.Dockerfile", platforms: "linux/amd64" }
3638
- { tag: "light-rocm", dockerfile: ".devops/main-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
3739
- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
40+
- { tag: "server-rocm", dockerfile: ".devops/server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
41+
- { tag: "light-intel", dockerfile: ".devops/main-intel.Dockerfile", platforms: "linux/amd64" }
42+
- { tag: "server-intel", dockerfile: ".devops/server-intel.Dockerfile", platforms: "linux/amd64" }
3843
steps:
3944
- name: Check out the repo
4045
uses: actions/checkout@v3

0 commit comments

Comments
 (0)