Skip to content

flake.nix: rewrite #4605

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Dec 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d3e73df
flake.lock: update to hotfix CUDA::cuda_driver
SomeoneSerge Dec 26, 2023
8364cf4
flake.nix: rewrite
philiptaron Dec 22, 2023
0607e24
flake.nix: use finalPackage instead of passing it manually
SomeoneSerge Dec 24, 2023
eab1c12
nix: unclutter darwin support
SomeoneSerge Dec 24, 2023
0259941
nix: pass most darwin frameworks unconditionally
SomeoneSerge Dec 24, 2023
0fa62c1
*.nix: nixfmt
SomeoneSerge Dec 25, 2023
69c56bc
flake.nix: add maintainers
SomeoneSerge Dec 25, 2023
a07407c
nix: move meta down to follow Nixpkgs style more closely
SomeoneSerge Dec 25, 2023
04bc417
nix: add missing meta attributes
SomeoneSerge Dec 25, 2023
d08690a
flake.nix: avoid re-evaluating nixpkgs too many times
SomeoneSerge Dec 25, 2023
a28c9ac
flake.nix: use flake-parts
SomeoneSerge Dec 25, 2023
a629371
nix: migrate to pname+version
SomeoneSerge Dec 25, 2023
e3b1ba2
flake.nix: overlay: expose both the namespace and the default attribute
SomeoneSerge Dec 25, 2023
12d4a68
ci: add the (Nix) flakestry workflow
SomeoneSerge Dec 25, 2023
a16f589
nix: cmakeFlags: explicit OFF bools
SomeoneSerge Dec 26, 2023
dd0e12c
nix: cuda: reduce runtime closure
SomeoneSerge Dec 26, 2023
4522c47
nix: fewer rebuilds
SomeoneSerge Dec 26, 2023
ae6bebc
nix: respect config.cudaCapabilities
SomeoneSerge Dec 26, 2023
1efbc6b
nix: add the impure driver's location to the DT_RUNPATHs
SomeoneSerge Dec 26, 2023
82e48e2
nix: clean sources more thoroughly
SomeoneSerge Dec 26, 2023
7bd8d8c
nix: explicit mpi support
SomeoneSerge Dec 26, 2023
d0adab6
nix: explicit jetson support
SomeoneSerge Dec 26, 2023
3f7003b
flake.nix: darwin: only expose the default
SomeoneSerge Dec 26, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .devops/nix/apps.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
perSystem =
{ config, lib, ... }:
{
apps =
let
inherit (config.packages) default;
binaries = [
"llama"
"llama-embedding"
"llama-server"
"quantize"
"train-text-from-scratch"
];
mkApp = name: {
type = "app";
program = "${default}/bin/${name}";
};
in
lib.genAttrs binaries mkApp;
};
}
13 changes: 13 additions & 0 deletions .devops/nix/devshells.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
perSystem =
{ config, lib, ... }:
{
devShells =
lib.concatMapAttrs
(name: package: {
${name} = package.passthru.shell;
${name + "-extra"} = package.passthru.shell-extra;
})
config.packages;
};
}
32 changes: 32 additions & 0 deletions .devops/nix/jetson-support.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{ inputs, ... }:
{
perSystem =
{
config,
system,
lib,
pkgsCuda,
...
}:
lib.optionalAttrs (system == "aarch64-linux") {
packages =
let
caps.jetson-xavier = "7.2";
caps.jetson-orin = "8.7";
caps.jetson-nano = "5.3";

pkgsFor =
cap:
import inputs.nixpkgs {
inherit system;
config = {
cudaSupport = true;
cudaCapabilities = [ cap ];
cudaEnableForwardCompat = false;
inherit (pkgsCuda.config) allowUnfreePredicate;
};
};
in
builtins.mapAttrs (name: cap: ((pkgsFor cap).callPackage ./scope.nix { }).llama-cpp) caps;
};
}
35 changes: 35 additions & 0 deletions .devops/nix/nixpkgs-instances.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{ inputs, ... }:
{
# The _module.args definitions are passed on to modules as arguments. E.g.
# the module `{ pkgs ... }: { /* config */ }` implicitly uses
# `_module.args.pkgs` (defined in this case by flake-parts).
perSystem =
{ system, ... }:
{
_module.args = {
pkgsCuda = import inputs.nixpkgs {
inherit system;
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
# and ucx are built with CUDA support)
config.cudaSupport = true;
config.allowUnfreePredicate =
p:
builtins.all
(
license:
license.free
|| builtins.elem license.shortName [
"CUDA EULA"
"cuDNN EULA"
]
)
(p.meta.licenses or [ p.meta.license ]);
};
# Ensure dependencies use ROCm consistently
pkgsRocm = import inputs.nixpkgs {
inherit system;
config.rocmSupport = true;
};
};
};
}
265 changes: 265 additions & 0 deletions .devops/nix/package.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
{
lib,
config,
stdenv,
mkShell,
cmake,
ninja,
pkg-config,
git,
python3,
mpi,
openblas, # TODO: Use the generic `blas` so users could switch betwen alternative implementations
cudaPackages,
darwin,
rocmPackages,
clblast,
useBlas ? builtins.all (x: !x) [
useCuda
useMetalKit
useOpenCL
useRocm
],
useCuda ? config.cudaSupport,
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
useMpi ? false, # Increases the runtime closure size by ~700M
useOpenCL ? false,
useRocm ? config.rocmSupport,
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
}@inputs:

let
inherit (lib)
cmakeBool
cmakeFeature
optionals
strings
versionOlder
;

# It's necessary to consistently use backendStdenv when building with CUDA support,
# otherwise we get libstdc++ errors downstream.
stdenv = throw "Use effectiveStdenv instead";
effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;

suffices =
lib.optionals useBlas [ "BLAS" ]
++ lib.optionals useCuda [ "CUDA" ]
++ lib.optionals useMetalKit [ "MetalKit" ]
++ lib.optionals useMpi [ "MPI" ]
++ lib.optionals useOpenCL [ "OpenCL" ]
++ lib.optionals useRocm [ "ROCm" ];

pnameSuffix =
strings.optionalString (suffices != [ ])
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
descriptionSuffix =
strings.optionalString (suffices != [ ])
", accelerated with ${strings.concatStringsSep ", " suffices}";

# TODO: package the Python in this repository in a Nix-like way.
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
# https://peps.python.org/pep-0517/
llama-python = python3.withPackages (
ps: [
ps.numpy
ps.sentencepiece
]
);

# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
llama-python-extra = python3.withPackages (
ps: [
ps.numpy
ps.sentencepiece
ps.torchWithoutCuda
ps.transformers
]
);

# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
# separately
darwinBuildInputs =
with darwin.apple_sdk.frameworks;
[
Accelerate
CoreVideo
CoreGraphics
]
++ optionals useMetalKit [ MetalKit ];

cudaBuildInputs = with cudaPackages; [
cuda_cccl.dev # <nv/target>

# A temporary hack for reducing the closure size, remove once cudaPackages
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
cuda_cudart.dev
cuda_cudart.lib
cuda_cudart.static
libcublas.dev
libcublas.lib
libcublas.static
];

rocmBuildInputs = with rocmPackages; [
clr
hipblas
rocblas
];
in

effectiveStdenv.mkDerivation (
finalAttrs: {
pname = "llama-cpp${pnameSuffix}";
version = llamaVersion;

src = lib.cleanSourceWith {
filter =
name: type:
!(builtins.any (_: _) [
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
(name == "README.md") # Ignore *.md changes whe computing outPaths
(lib.hasPrefix "." name) # Skip hidden files and directories
]);
src = lib.cleanSource ../../.;
};

postPatch = ''
substituteInPlace ./ggml-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"

# TODO: Package up each Python script or service appropriately.
# If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
# we could make those *.py into setuptools' entrypoints
substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
'';

nativeBuildInputs =
[
cmake
ninja
pkg-config
git
]
++ optionals useCuda [
cudaPackages.cuda_nvcc

# TODO: Replace with autoAddDriverRunpath
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
cudaPackages.autoAddOpenGLRunpathHook
];

buildInputs =
optionals effectiveStdenv.isDarwin darwinBuildInputs
++ optionals useCuda cudaBuildInputs
++ optionals useMpi [ mpi ]
++ optionals useOpenCL [ clblast ]
++ optionals useRocm rocmBuildInputs;

cmakeFlags =
[
(cmakeBool "LLAMA_NATIVE" true)
(cmakeBool "LLAMA_BUILD_SERVER" true)
(cmakeBool "BUILD_SHARED_LIBS" true)
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
(cmakeBool "LLAMA_BLAS" useBlas)
(cmakeBool "LLAMA_CLBLAST" useOpenCL)
(cmakeBool "LLAMA_CUBLAS" useCuda)
(cmakeBool "LLAMA_HIPBLAS" useRocm)
(cmakeBool "LLAMA_METAL" useMetalKit)
(cmakeBool "LLAMA_MPI" useMpi)
]
++ optionals useCuda [
(
with cudaPackages.flags;
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
)
)
]
++ optionals useRocm [
(cmakeFeature "CMAKE_C_COMPILER" "hipcc")
(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")

# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
# and select the line that matches the current nixpkgs version of rocBLAS.
# Should likely use `rocmPackages.clr.gpuTargets`.
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
]
++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];

# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
# if they haven't been added yet.
postInstall = ''
mv $out/bin/main $out/bin/llama
mv $out/bin/server $out/bin/llama-server
mkdir -p $out/include
cp $src/llama.h $out/include/
'';

# Define the shells here, but don't add in the inputsFrom to avoid recursion.
passthru = {
inherit
useBlas
useCuda
useMetalKit
useMpi
useOpenCL
useRocm
;

shell = mkShell {
name = "shell-${finalAttrs.finalPackage.name}";
description = "contains numpy and sentencepiece";
buildInputs = [ llama-python ];
inputsFrom = [ finalAttrs.finalPackage ];
};

shell-extra = mkShell {
name = "shell-extra-${finalAttrs.finalPackage.name}";
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
buildInputs = [ llama-python-extra ];
inputsFrom = [ finalAttrs.finalPackage ];
};
};

meta = {
# Configurations we don't want even the CI to evaluate. Results in the
# "unsupported platform" messages. This is mostly a no-op, because
# cudaPackages would've refused to evaluate anyway.
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;

# Configurations that are known to result in build failures. Can be
# overridden by importing Nixpkgs with `allowBroken = true`.
broken = (useMetalKit && !effectiveStdenv.isDarwin);

description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
homepage = "https://github.com/ggerganov/llama.cpp/";
license = lib.licenses.mit;

# Accommodates `nix run` and `lib.getExe`
mainProgram = "llama";

# These people might respond, on the best effort basis, if you ping them
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
# Consider adding yourself to this list if you want to ensure this flake
# stays maintained and you're willing to invest your time. Do not add
# other people without their consent. Consider removing people after
# they've been unreachable for long periods of time.

# Note that lib.maintainers is defined in Nixpkgs, but you may just add
# an attrset following the same format as in
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
maintainers = with lib.maintainers; [
philiptaron
SomeoneSerge
];

# Extend `badPlatforms` instead
platforms = lib.platforms.all;
};
}
)
12 changes: 12 additions & 0 deletions .devops/nix/scope.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
lib,
newScope,
llamaVersion ? "0.0.0",
}:

lib.makeScope newScope (
self: {
inherit llamaVersion;
llama-cpp = self.callPackage ./package.nix { };
}
)
Loading