Skip to content

Overhaul ci/run.sh and enable tests with openllama model files #4586

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
id: cmake_test
run: |
cd build
ctest --verbose --timeout 900
ctest -L main --verbose --timeout 900

ubuntu-latest-cmake-sanitizer:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -107,7 +107,7 @@ jobs:
id: cmake_test
run: |
cd build
ctest --verbose --timeout 900
ctest -L main --verbose --timeout 900

ubuntu-latest-cmake-mpi:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -141,7 +141,7 @@ jobs:
id: cmake_test
run: |
cd build
ctest --verbose
ctest -L main --verbose

# TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
# how to debug it.
Expand Down Expand Up @@ -202,7 +202,7 @@ jobs:
id: cmake_test
run: |
cd build
ctest --verbose --timeout 900
ctest -L main --verbose --timeout 900

macOS-latest-cmake-ios:
runs-on: macos-latest
Expand Down Expand Up @@ -394,7 +394,7 @@ jobs:
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512
run: |
cd build
ctest -C Release --verbose --timeout 900
ctest -L main -C Release --verbose --timeout 900

- name: Test (Intel SDE)
id: cmake_test_sde
Expand All @@ -406,7 +406,7 @@ jobs:
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
cd build
& $sde -future -- ctest -C Release --verbose --timeout 900
& $sde -future -- ctest -L main -C Release --verbose --timeout 900

- name: Determine tag name
id: tag
Expand Down
19 changes: 1 addition & 18 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
lcov-report/
gcovr-report/

build*/
build*
out/
tmp/

Expand Down Expand Up @@ -89,20 +89,3 @@ examples/jeopardy/results.txt

poetry.lock
poetry.toml

# Test binaries
/tests/test-grammar-parser
/tests/test-llama-grammar
/tests/test-double-float
/tests/test-grad0
/tests/test-opt
/tests/test-quantize-fns
/tests/test-quantize-perf
/tests/test-sampling
/tests/test-tokenizer-0-llama
/tests/test-tokenizer-0-falcon
/tests/test-tokenizer-1-llama
/tests/test-tokenizer-1-bpe
/tests/test-rope
/tests/test-backend-ops
/tests/test-autorelease
7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ TEST_TARGETS = \
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
tests/test-backend-ops tests/test-autorelease
tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease

# Code coverage output files
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
Expand Down Expand Up @@ -748,5 +748,8 @@ tests/test-c.o: tests/test-c.c llama.h
tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
81 changes: 70 additions & 11 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ mkdir -p "$2"
OUT=$(realpath "$1")
MNT=$(realpath "$2")

rm -v $OUT/*.log
rm -v $OUT/*.exit
rm -v $OUT/*.md
rm -f "$OUT/*.log"
rm -f "$OUT/*.exit"
rm -f "$OUT/*.md"

sd=`dirname $0`
cd $sd/../
Expand Down Expand Up @@ -94,7 +94,7 @@ function gg_run_ctest_debug {
(time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log

(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log

set +e
}
Expand Down Expand Up @@ -123,9 +123,9 @@ function gg_run_ctest_release {
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log

if [ -z ${GG_BUILD_LOW_PERF} ]; then
(time ctest --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log
(time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
else
(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
fi

set +e
Expand All @@ -141,6 +141,61 @@ function gg_sum_ctest_release {
gg_printf '```\n'
}

function gg_get_model {
local gguf_3b="$MNT/models/open-llama/3B-v2/ggml-model-f16.gguf"
local gguf_7b="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf"
if [[ -s $gguf_3b ]]; then
echo -n "$gguf_3b"
elif [[ -s $gguf_7b ]]; then
echo -n "$gguf_7b"
else
echo >&2 "No model found. Can't run gg_run_ctest_with_model."
exit 1
fi
}

function gg_run_ctest_with_model_debug {
cd ${SRC}

local model; model=$(gg_get_model)
cd build-ci-debug
set -e
(LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
set +e
cd ..
}

function gg_run_ctest_with_model_release {
cd ${SRC}

local model; model=$(gg_get_model)
cd build-ci-release
set -e
(LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
set +e
cd ..
}

function gg_sum_ctest_with_model_debug {
gg_printf '### %s\n\n' "${ci}"

gg_printf 'Runs ctest with model files in debug mode\n'
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
gg_printf '```\n'
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
gg_printf '```\n'
}

function gg_sum_ctest_with_model_release {
gg_printf '### %s\n\n' "${ci}"

gg_printf 'Runs ctest with model files in release mode\n'
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
gg_printf '```\n'
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
gg_printf '```\n'
}

# open_llama_3b_v2

function gg_run_open_llama_3b_v2 {
Expand Down Expand Up @@ -183,8 +238,6 @@ function gg_run_open_llama_3b_v2 {

wiki_test_60="${path_wiki}/wiki.test-60.raw"

./bin/test-autorelease ${model_f16}

./bin/quantize ${model_f16} ${model_q8_0} q8_0
./bin/quantize ${model_f16} ${model_q4_0} q4_0
./bin/quantize ${model_f16} ${model_q4_1} q4_1
Expand Down Expand Up @@ -507,14 +560,18 @@ function gg_sum_open_llama_7b_v2 {
## main

if [ -z ${GG_BUILD_LOW_PERF} ]; then
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
rm -rf ${SRC}/models-mnt

mnt_models=${MNT}/models
mkdir -p ${mnt_models}
ln -sfn ${mnt_models} ${SRC}/models-mnt

python3 -m pip install -r ${SRC}/requirements.txt
python3 -m pip install --editable gguf-py
# Create a fresh python3 venv and enter it
python3 -m venv "$MNT/venv"
source "$MNT/venv/bin/activate"

pip install -r ${SRC}/requirements.txt --disable-pip-version-check
pip install --editable gguf-py --disable-pip-version-check
fi

ret=0
Expand All @@ -529,6 +586,8 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
else
test $ret -eq 0 && gg_run open_llama_7b_v2
fi
test $ret -eq 0 && gg_run ctest_with_model_debug
test $ret -eq 0 && gg_run ctest_with_model_release
fi
fi

Expand Down
50 changes: 50 additions & 0 deletions scripts/ci-run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash
set -euo pipefail
this=$(realpath "$0"); readonly this
cd "$(dirname "$this")"
shellcheck "$this"

if (( $# != 1 && $# != 2 )); then
cat >&2 <<'EOF'
usage:
ci-run.sh <tmp_dir> [<cache_dir>]

This script wraps ci/run.sh:
* If <tmp_dir> is a ramdisk, you can reduce writes to your SSD. If <tmp_dir> is not a ramdisk, keep in mind that total writes will increase by the size of <cache_dir>.
(openllama_3b_v2: quantized models are about 30GB)
* Persistent model and data files are synced to and from <cache_dir>,
excluding generated .gguf files.
(openllama_3b_v2: persistent files are about 6.6GB)
* <cache_dir> defaults to ~/.cache/llama.cpp
EOF
exit 1
fi

cd .. # => llama.cpp repo root

tmp="$1"
mkdir -p "$tmp"
tmp=$(realpath "$tmp")
echo >&2 "Using tmp=$tmp"

cache="${2-$HOME/.cache/llama.cpp}"
mkdir -p "$cache"
cache=$(realpath "$cache")
echo >&2 "Using cache=$cache"

_sync() {
local from="$1"; shift
local to="$1"; shift

echo >&2 "Syncing from $from to $to"
mkdir -p "$from" "$to"
rsync -a "$from" "$to" --delete-during "$@"
}

_sync "$(realpath .)/" "$tmp/llama.cpp"
_sync "$cache/ci-mnt/models/" "$tmp/llama.cpp/ci-mnt/models/"

cd "$tmp/llama.cpp"
bash ci/run.sh ci-out ci-mnt

_sync 'ci-mnt/models/' "$cache/ci-mnt/models/" --exclude='*.gguf' -P
2 changes: 2 additions & 0 deletions tests/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*
!*.*
14 changes: 11 additions & 3 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
function(llama_build_executable source)
get_filename_component(TEST_TARGET ${source} NAME_WE)
add_executable(${TEST_TARGET} ${source})
add_executable(${TEST_TARGET} ${source} get-model.cpp)
install(TARGETS ${TEST_TARGET} RUNTIME)
target_link_libraries(${TEST_TARGET} PRIVATE common)
endfunction()

function(llama_test_executable name source)
get_filename_component(TEST_TARGET ${source} NAME_WE)
add_test(NAME ${name} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
set_property(TEST ${name} PROPERTY LABELS "main")
endfunction()

function(llama_build_and_test_executable source)
llama_build_and_test_executable_with_label(${source} "main")
endfunction()

function(llama_build_and_test_executable_with_label source label)
get_filename_component(TEST_TARGET ${source} NAME_WE)
add_executable(${TEST_TARGET} ${source})
add_executable(${TEST_TARGET} ${source} get-model.cpp)
install(TARGETS ${TEST_TARGET} RUNTIME)
target_link_libraries(${TEST_TARGET} PRIVATE common)
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${label})
endfunction()

# llama_build_and_test_executable(test-double-float.cpp) # SLOW
Expand Down Expand Up @@ -49,10 +55,12 @@ llama_build_and_test_executable(test-llama-grammar.cpp)
llama_build_and_test_executable(test-grad0.cpp)
# llama_build_and_test_executable(test-opt.cpp) # SLOW
llama_build_and_test_executable(test-backend-ops.cpp)
llama_build_and_test_executable(test-autorelease.cpp)

llama_build_and_test_executable(test-rope.cpp)

llama_build_and_test_executable_with_label(test-model-load-cancel.cpp "model")
llama_build_and_test_executable_with_label(test-autorelease.cpp "model")

# dummy executable - not installed
get_filename_component(TEST_TARGET test-c.c NAME_WE)
add_executable(${TEST_TARGET} test-c.c)
Expand Down
21 changes: 21 additions & 0 deletions tests/get-model.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>

#include "get-model.h"

char * get_model_or_exit(int argc, char *argv[]) {
char * model_path;
if (argc > 1) {
model_path = argv[1];

} else {
model_path = getenv("LLAMACPP_TEST_MODELFILE");
if (!model_path || strlen(model_path) == 0) {
fprintf(stderr, "\033[33mWARNING: No model file provided. Skipping this test. Set LLAMACPP_TEST_MODELFILE=<gguf_model_path> to silence this warning and run this test.\n\033[0m");
exit(EXIT_SUCCESS);
}
}

return model_path;
}
2 changes: 2 additions & 0 deletions tests/get-model.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#pragma once
char * get_model_or_exit(int, char*[]);
12 changes: 4 additions & 8 deletions tests/test-autorelease.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,15 @@
#include <thread>

#include "llama.h"
#include "get-model.h"

// This creates a new context inside a pthread and then tries to exit cleanly.
int main(int argc, char ** argv) {
if (argc < 2) {
printf("Usage: %s model.gguf\n", argv[0]);
return 0; // intentionally return success
}
auto * model_path = get_model_or_exit(argc, argv);

const std::string fname = argv[1];

std::thread([&fname]() {
std::thread([&model_path]() {
llama_backend_init(false);
auto * model = llama_load_model_from_file(fname.c_str(), llama_model_default_params());
auto * model = llama_load_model_from_file(model_path, llama_model_default_params());
auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
llama_free(ctx);
llama_free_model(model);
Expand Down
Loading