@@ -562,6 +562,35 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
562
562
break ;
563
563
}
564
564
params.lora_base = argv[i];
565
+ } else if (arg == " --control-vector" ) {
566
+ if (++i >= argc) {
567
+ invalid_param = true ;
568
+ break ;
569
+ }
570
+ params.control_vectors .push_back (std::make_tuple (argv[i], 1 .0f ));
571
+ } else if (arg == " --control-vector-scaled" ) {
572
+ if (++i >= argc) {
573
+ invalid_param = true ;
574
+ break ;
575
+ }
576
+ const char * control_vector = argv[i];
577
+ if (++i >= argc) {
578
+ invalid_param = true ;
579
+ break ;
580
+ }
581
+ params.control_vectors .push_back (std::make_tuple (control_vector, std::stof (argv[i])));
582
+ } else if (arg == " --control-vector-layer-range" ) {
583
+ if (++i >= argc) {
584
+ invalid_param = true ;
585
+ break ;
586
+ }
587
+ int32_t start = std::stoi (argv[i]);
588
+ if (++i >= argc) {
589
+ invalid_param = true ;
590
+ break ;
591
+ }
592
+ int32_t end = std::stoi (argv[i]);
593
+ params.control_vector_layer_range = std::make_tuple (start, end);
565
594
} else if (arg == " --mmproj" ) {
566
595
if (++i >= argc) {
567
596
invalid_param = true ;
@@ -1087,6 +1116,12 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
1087
1116
printf (" --lora FNAME apply LoRA adapter (implies --no-mmap)\n " );
1088
1117
printf (" --lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap)\n " );
1089
1118
printf (" --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n " );
1119
+ printf (" --control-vector FNAME\n " );
1120
+ printf (" add a control vector\n " );
1121
+ printf (" --control-vector-scaled FNAME S\n " );
1122
+ printf (" add a control vector with user defined scaling S\n " );
1123
+ printf (" --control-vector-layer-range START END\n " );
1124
+ printf (" layer range to apply the control vector(s) to, start and end inclusive\n " );
1090
1125
printf (" -m FNAME, --model FNAME\n " );
1091
1126
printf (" model path (default: %s)\n " , params.model .c_str ());
1092
1127
printf (" -md FNAME, --model-draft FNAME\n " );
@@ -1351,6 +1386,35 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
1351
1386
return std::make_tuple (nullptr , nullptr );
1352
1387
}
1353
1388
1389
+ if (!params.control_vectors .empty ()) {
1390
+ int32_t layer_start, layer_end;
1391
+ std::tie (layer_start, layer_end) = params.control_vector_layer_range ;
1392
+
1393
+ if (layer_start == 0 ) layer_start = 1 ;
1394
+ if (layer_end == 0 ) layer_end = 31 ;
1395
+
1396
+ std::vector<float > control_vector;
1397
+ int n_embd;
1398
+ std::tie (control_vector, n_embd) = llama_control_vector_load (params.control_vectors );
1399
+ if (n_embd == -1 ) {
1400
+ llama_free (lctx);
1401
+ llama_free_model (model);
1402
+ return std::make_tuple (nullptr , nullptr );
1403
+ }
1404
+
1405
+ int err = llama_control_vector_apply (lctx,
1406
+ control_vector.data (),
1407
+ control_vector.size (),
1408
+ n_embd,
1409
+ layer_start,
1410
+ layer_end);
1411
+ if (err) {
1412
+ llama_free (lctx);
1413
+ llama_free_model (model);
1414
+ return std::make_tuple (nullptr , nullptr );
1415
+ }
1416
+ }
1417
+
1354
1418
for (unsigned int i = 0 ; i < params.lora_adapter .size (); ++i) {
1355
1419
const std::string& lora_adapter = std::get<0 >(params.lora_adapter [i]);
1356
1420
float lora_scale = std::get<1 >(params.lora_adapter [i]);
@@ -1867,3 +1931,156 @@ void llama_embd_normalize(const float * inp, float * out, int n) {
1867
1931
}
1868
1932
}
1869
1933
1934
+ //
1935
+ // Control vector utils
1936
+ //
1937
+
1938
+ static std::tuple<std::vector<float >, int > llama_control_vector_load_one (const std::string & path, float strength) {
1939
+ int n_tensors;
1940
+ size_t n_bytes = 0 ;
1941
+ uint32_t max_direction_layer = 0 ;
1942
+ int n_embd = -1 ;
1943
+
1944
+ // calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer
1945
+ {
1946
+ struct ggml_init_params meta_params = {
1947
+ /* .mem_size = */ ggml_tensor_overhead () * 128 + ggml_graph_overhead (),
1948
+ /* .mem_buffer = */ nullptr ,
1949
+ /* .no_alloc = */ true ,
1950
+ };
1951
+ ggml_context * meta_ctx = ggml_init (meta_params);
1952
+ struct gguf_init_params meta_gguf_params = {
1953
+ /* .no_alloc = */ true ,
1954
+ /* .ctx = */ &meta_ctx,
1955
+ };
1956
+ struct gguf_context * meta_ctx_gguf = gguf_init_from_file (path.c_str (), meta_gguf_params);
1957
+ if (!meta_ctx_gguf) {
1958
+ fprintf (stderr, " %s: failed to load control vector from %s\n " , __func__, path.c_str ());
1959
+ ggml_free (meta_ctx);
1960
+ return std::make_tuple (std::vector<float >(), -1 );
1961
+ }
1962
+
1963
+ n_tensors = gguf_get_n_tensors (meta_ctx_gguf);
1964
+ for (int i = 0 ; i < n_tensors; i++) {
1965
+ std::string name = gguf_get_tensor_name (meta_ctx_gguf, i);
1966
+
1967
+ // split on '.'
1968
+ size_t dotpos = name.find (' .' );
1969
+ if (dotpos != std::string::npos && name.substr (0 , dotpos) == " direction" ) {
1970
+ try {
1971
+ uint32_t layer = std::stoi (name.substr (dotpos + 1 ));
1972
+ if (layer == 0 ) {
1973
+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, path.c_str ());
1974
+ ggml_free (meta_ctx);
1975
+ gguf_free (meta_ctx_gguf);
1976
+ return std::make_tuple (std::vector<float >(), -1 );
1977
+ }
1978
+ if (layer > max_direction_layer) {
1979
+ max_direction_layer = layer;
1980
+ }
1981
+ } catch (...) {
1982
+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, path.c_str ());
1983
+ ggml_free (meta_ctx);
1984
+ gguf_free (meta_ctx_gguf);
1985
+ return std::make_tuple (std::vector<float >(), -1 );
1986
+ }
1987
+ }
1988
+
1989
+ struct ggml_tensor * tensor_meta = ggml_get_tensor (meta_ctx, name.c_str ());
1990
+ if (tensor_meta->type != GGML_TYPE_F32 || ggml_n_dims (tensor_meta) != 1 ) {
1991
+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, path.c_str ());
1992
+ ggml_free (meta_ctx);
1993
+ gguf_free (meta_ctx_gguf);
1994
+ return std::make_tuple (std::vector<float >(), -1 );
1995
+ }
1996
+ if (n_embd == -1 ) {
1997
+ n_embd = ggml_nelements (tensor_meta);
1998
+ } else if (ggml_nelements (tensor_meta) != n_embd) {
1999
+ fprintf (stderr, " %s: direction tensor sizes mismatched in %s\n " , __func__, path.c_str ());
2000
+ ggml_free (meta_ctx);
2001
+ gguf_free (meta_ctx_gguf);
2002
+ return std::make_tuple (std::vector<float >(), -1 );
2003
+ }
2004
+ n_bytes += ggml_nbytes (tensor_meta);
2005
+ }
2006
+ ggml_free (meta_ctx);
2007
+ gguf_free (meta_ctx_gguf);
2008
+ }
2009
+
2010
+ if (n_tensors == 0 ) {
2011
+ fprintf (stderr, " %s: no direction tensors found in %s\n " , __func__, path.c_str ());
2012
+ return std::make_tuple (std::vector<float >(), -1 );
2013
+ }
2014
+
2015
+ // load and scale tensors into final control vector context
2016
+ struct ggml_init_params ggml_params = {
2017
+ /* .mem_size = */ ggml_tensor_overhead () * n_tensors + n_bytes,
2018
+ /* .mem_buffer = */ nullptr ,
2019
+ /* .no_alloc = */ false ,
2020
+ };
2021
+ struct ggml_context * ctx = ggml_init (ggml_params);
2022
+
2023
+ struct gguf_init_params params = {
2024
+ /* .no_alloc = */ false ,
2025
+ /* .ctx = */ &ctx,
2026
+ };
2027
+ struct gguf_context * ctx_gguf = gguf_init_from_file (path.c_str (), params);
2028
+ if (!ctx_gguf) {
2029
+ fprintf (stderr, " %s: failed to load control vector from %s\n " , __func__, path.c_str ());
2030
+ ggml_free (ctx);
2031
+ return std::make_tuple (std::vector<float >(), -1 );
2032
+ }
2033
+
2034
+ std::vector<float > vector;
2035
+ for (uint32_t i = 1 ; i < max_direction_layer; i++) {
2036
+ std::string name = " direction." + std::to_string (i);
2037
+ ggml_tensor * tensor = ggml_get_tensor (ctx, name.c_str ());
2038
+ if (tensor) {
2039
+ const float * data = (const float *) tensor->data ;
2040
+ for (int i = 0 ; i < n_embd; i++) {
2041
+ vector.push_back (data[i] * strength);
2042
+ }
2043
+ } else {
2044
+ vector.insert (vector.end (), n_embd, 0 .); // as a filler
2045
+ }
2046
+ }
2047
+
2048
+ return std::make_tuple (vector, n_embd);
2049
+ }
2050
+
2051
+ std::tuple<std::vector<float >, int > llama_control_vector_load (const std::vector<std::tuple<std::string, float >> & vectors) {
2052
+ std::vector<float > vector;
2053
+ int n_embd = -1 ;
2054
+
2055
+ for (const auto & pair : vectors) {
2056
+ std::string path;
2057
+ float strength;
2058
+ std::tie (path, strength) = pair;
2059
+
2060
+ std::vector<float > v;
2061
+ int v_n_embd;
2062
+ std::tie (v, v_n_embd) = llama_control_vector_load_one (path, strength);
2063
+
2064
+ if (v_n_embd == -1 ) {
2065
+ return std::make_tuple (std::vector<float >(), -1 );
2066
+ }
2067
+ if (n_embd != -1 && (n_embd != v_n_embd || v.size () != vector.size ())) {
2068
+ fprintf (stderr, " %s: control vector in %s does not match previous vector dimensions\n " , __func__, path.c_str ());
2069
+ return std::make_tuple (std::vector<float >(), -1 );
2070
+ }
2071
+
2072
+ if (n_embd == -1 ) {
2073
+ vector = std::move (v);
2074
+ n_embd = v_n_embd;
2075
+ } else {
2076
+ for (size_t i = 0 ; i < vector.size (); i++) {
2077
+ vector[i] += v[i];
2078
+ }
2079
+ }
2080
+ }
2081
+
2082
+ if (n_embd == -1 ) {
2083
+ fprintf (stderr, " %s: no vectors passed\n " , __func__);
2084
+ }
2085
+ return std::make_tuple (vector, n_embd);
2086
+ }
0 commit comments