|
| 1 | +{ lib |
| 2 | +, config |
| 3 | +, stdenv |
| 4 | +, cmake |
| 5 | +, ninja |
| 6 | +, pkg-config |
| 7 | +, git |
| 8 | +, python3 |
| 9 | +, openmpi |
| 10 | +, openblas |
| 11 | +, cudaPackages |
| 12 | +, rocmPackages |
| 13 | +, clblast |
| 14 | +, Accelerate ? null |
| 15 | +, MetalKit ? null |
| 16 | +, CoreVideo ? null |
| 17 | +, CoreGraphics ? null |
| 18 | +, useOpenCL ? false |
| 19 | +, useCuda ? config.cudaSupport |
| 20 | +, useRocm ? config.rocmSupport |
| 21 | +}@inputs: |
| 22 | + |
| 23 | +let |
| 24 | + inherit (lib) cmakeBool cmakeFeature optional optionals versionOlder; |
| 25 | + isDefault = !useOpenCL && !useCuda && !useRocm; |
| 26 | + |
| 27 | + # It's necessary to consistently use backendStdenv when building with CUDA support, |
| 28 | + # otherwise we get libstdc++ errors downstream. |
| 29 | + stdenv = throw "Use effectiveStdenv instead"; |
| 30 | + effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv; |
| 31 | + |
| 32 | + # Give a little description difference between the flavors. |
| 33 | + descriptionSuffix = if useOpenCL then |
| 34 | + " (OpenCL accelerated)" |
| 35 | + else if useCuda then |
| 36 | + " (CUDA accelerated)" |
| 37 | + else if useRocm then |
| 38 | + " (ROCm accelerated)" |
| 39 | + else if (MetalKit != null) then |
| 40 | + " (MetalKit accelerated)" |
| 41 | + else ""; |
| 42 | + |
| 43 | + # TODO: package the Python in this repository in a Nix-like way. |
| 44 | + llama-python = python3.withPackages (ps: [ ps.numpy ps.sentencepiece ]); |
| 45 | + |
| 46 | + # See ./overlay.nix for where these dependencies are passed in. |
| 47 | + defaultBuildInputs = builtins.filter (p: p != null) [ |
| 48 | + Accelerate |
| 49 | + MetalKit |
| 50 | + CoreVideo |
| 51 | + CoreGraphics |
| 52 | + ]; |
| 53 | + |
| 54 | + cudaBuildInputs = with cudaPackages; [ |
| 55 | + cuda_cccl.dev # <nv/target> |
| 56 | + cuda_cudart |
| 57 | + libcublas |
| 58 | + ]; |
| 59 | + |
| 60 | + rocmBuildInputs = with rocmPackages; [ clr hipblas rocblas ]; |
| 61 | +in |
| 62 | + |
| 63 | +effectiveStdenv.mkDerivation { |
| 64 | + name = "llama.cpp"; |
| 65 | + src = ../.; |
| 66 | + meta = { |
| 67 | + description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; |
| 68 | + mainProgram = "llama"; |
| 69 | + }; |
| 70 | + |
| 71 | + postPatch = '' |
| 72 | + substituteInPlace ./ggml-metal.m \ |
| 73 | + --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" |
| 74 | + substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python" |
| 75 | + ''; |
| 76 | + |
| 77 | + nativeBuildInputs = [ cmake ninja pkg-config git ] |
| 78 | + ++ optional useCuda [ cudaPackages.cuda_nvcc ]; |
| 79 | + |
| 80 | + buildInputs = [ openmpi ] |
| 81 | + ++ optional useOpenCL clblast |
| 82 | + ++ optionals useCuda cudaBuildInputs |
| 83 | + ++ optionals useRocm rocmBuildInputs |
| 84 | + ++ optionals isDefault defaultBuildInputs; |
| 85 | + |
| 86 | + cmakeFlags = [ |
| 87 | + (cmakeBool "LLAMA_NATIVE" true) |
| 88 | + (cmakeBool "LLAMA_BUILD_SERVER" true) |
| 89 | + (cmakeBool "BUILD_SHARED_LIBS" true) |
| 90 | + (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) |
| 91 | + ] |
| 92 | + ++ optional useOpenCL (cmakeBool "LLAMA_CLBLAST" true) |
| 93 | + ++ optional useCuda (cmakeBool "LLAMA_CUBLAS" true) |
| 94 | + ++ optionals useRocm [ |
| 95 | + (cmakeBool "LLAMA_HIPBLAS" true) |
| 96 | + (cmakeFeature "CMAKE_C_COMPILER" "hipcc") |
| 97 | + (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") |
| 98 | + |
| 99 | + # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM |
| 100 | + # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt |
| 101 | + # and select the line that matches the current nixpkgs version of rocBLAS. |
| 102 | + # Should likely use `rocmPackages.clr.gpuTargets`. |
| 103 | + "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" |
| 104 | + ] |
| 105 | + ++ optionals isDefault (if (MetalKit != null) then [ |
| 106 | + "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1" |
| 107 | + "-DLLAMA_METAL=ON" |
| 108 | + ] else [ |
| 109 | + "-DLLAMA_BLAS=ON" |
| 110 | + "-DLLAMA_BLAS_VENDOR=OpenBLAS" |
| 111 | + ]); |
| 112 | + |
| 113 | + postInstall = '' |
| 114 | + mv $out/bin/main $out/bin/llama |
| 115 | + mv $out/bin/server $out/bin/llama-server |
| 116 | + mkdir -p $out/include |
| 117 | + cp $src/llama.h $out/include/ |
| 118 | + ''; |
| 119 | +} |
0 commit comments