Skip to content

Commit 0cd22e1

Browse files
committed
llama : fix various warnings
1 parent 6456a4e commit 0cd22e1

File tree

2 files changed

+53
-33
lines changed

2 files changed

+53
-33
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ build-debug/
1616
build-release/
1717
build-static/
1818
build-cublas/
19+
build-opencl/
1920
build-no-accel/
2021
build-sanitize-addr/
2122
build-sanitize-thread/

llama.cpp

Lines changed: 52 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -50,49 +50,49 @@ static const size_t MB = 1024*1024;
5050

5151
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0()
5252
{
53-
static std::map<e_model, size_t> _MEM_REQ_SCRATCH0 = {
53+
static std::map<e_model, size_t> k_sizes = {
5454
{ MODEL_7B, 512ull * MB },
5555
{ MODEL_13B, 512ull * MB },
5656
{ MODEL_30B, 512ull * MB },
5757
{ MODEL_65B, 1024ull * MB },
5858
};
59-
return _MEM_REQ_SCRATCH0;
59+
return k_sizes;
6060
}
6161

6262
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
6363
{
64-
static std::map<e_model, size_t> _MEM_REQ_SCRATCH1 = {
64+
static std::map<e_model, size_t> k_sizes = {
6565
{ MODEL_7B, 512ull * MB },
6666
{ MODEL_13B, 512ull * MB },
6767
{ MODEL_30B, 512ull * MB },
6868
{ MODEL_65B, 1024ull * MB },
6969
};
70-
return _MEM_REQ_SCRATCH1;
70+
return k_sizes;
7171
}
7272

7373
// 2*n_embd*n_ctx*n_layer*sizeof(float16)
7474
static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
7575
{
76-
static std::map<e_model, size_t> _MEM_REQ_KV_SELF = {
76+
static std::map<e_model, size_t> k_sizes = {
7777
{ MODEL_7B, 1026ull * MB },
7878
{ MODEL_13B, 1608ull * MB },
7979
{ MODEL_30B, 3124ull * MB },
8080
{ MODEL_65B, 5120ull * MB },
8181
};
82-
return _MEM_REQ_KV_SELF;
82+
return k_sizes;
8383
}
8484

8585
// this is mostly needed for temporary mul_mat buffers to dequantize the data
8686
// not actually needed if BLAS is disabled
8787
static const std::map<e_model, size_t> & MEM_REQ_EVAL()
8888
{
89-
static std::map<e_model, size_t> _MEM_REQ_EVAL = {
89+
static std::map<e_model, size_t> k_sizes = {
9090
{ MODEL_7B, 768ull * MB },
9191
{ MODEL_13B, 1024ull * MB },
9292
{ MODEL_30B, 1280ull * MB },
9393
{ MODEL_65B, 1536ull * MB },
9494
};
95-
return _MEM_REQ_EVAL;
95+
return k_sizes;
9696
}
9797

9898
// default hparams (LLaMA 7B)
@@ -586,12 +586,12 @@ struct llama_model_loader {
586586
std::unique_ptr<llama_mmap> mapping;
587587

588588
llama_model_loader(const std::string & fname_base, bool use_mmap, bool vocab_only) {
589-
auto first_file = new llama_file_loader(fname_base.c_str(), 0, tensors_map);
589+
auto * first_file = new llama_file_loader(fname_base.c_str(), 0, tensors_map);
590590
file_loaders.emplace_back(first_file);
591591
uint32_t n_parts = vocab_only ? 1 : guess_n_parts();
592592
for (uint32_t i = 1; i < n_parts; i++) {
593593
std::string fname = fname_base + "." + std::to_string(i);
594-
auto ith_file = new llama_file_loader(fname.c_str(), i, tensors_map);
594+
auto * ith_file = new llama_file_loader(fname.c_str(), i, tensors_map);
595595
file_loaders.emplace_back(ith_file);
596596
if (ith_file->hparams != first_file->hparams) {
597597
throw format("llama.cpp: hparams inconsistent between files");
@@ -638,7 +638,7 @@ struct llama_model_loader {
638638
}
639639
}
640640

641-
struct ggml_tensor * get_tensor(const std::string & name, std::vector<uint32_t> ne) {
641+
struct ggml_tensor * get_tensor(const std::string & name, const std::vector<uint32_t> & ne) {
642642
auto it = tensors_map.name_to_idx.find(name);
643643
if (it == tensors_map.name_to_idx.end()) {
644644
throw format("llama.cpp: tensor '%s' is missing from model", name.c_str());
@@ -667,7 +667,7 @@ struct llama_model_loader {
667667
return tensor;
668668
}
669669

670-
void done_getting_tensors() {
670+
void done_getting_tensors() const {
671671
if (num_ggml_tensors_created != tensors_map.tensors.size()) {
672672
throw std::string("llama.cpp: file contained more tensors than expected");
673673
}
@@ -934,7 +934,8 @@ static void llama_model_load_internal(
934934

935935
auto & ctx = model.ctx;
936936

937-
size_t ctx_size, mmapped_size;
937+
size_t ctx_size;
938+
size_t mmapped_size;
938939
ml->calc_sizes(&ctx_size, &mmapped_size);
939940
fprintf(stderr, "%s: ggml ctx size = %6.2f KB\n", __func__, ctx_size/1024.0);
940941

@@ -1074,7 +1075,7 @@ static bool llama_eval_internal(
10741075
const auto & model = lctx.model;
10751076
const auto & hparams = model.hparams;
10761077

1077-
auto & kv_self = model.kv_self;
1078+
const auto & kv_self = model.kv_self;
10781079

10791080
LLAMA_ASSERT(!!kv_self.ctx);
10801081

@@ -1318,7 +1319,7 @@ static bool llama_eval_internal(
13181319
}
13191320

13201321
// extract embeddings
1321-
if (lctx.embedding.size()) {
1322+
if (!lctx.embedding.empty()) {
13221323
auto & embedding_out = lctx.embedding;
13231324

13241325
embedding_out.resize(n_embd);
@@ -1369,6 +1370,8 @@ struct llama_sp_symbol {
13691370
size_t n;
13701371
};
13711372

1373+
static_assert(std::is_trivially_copyable<llama_sp_symbol>::value, "llama_sp_symbol is not trivially copyable");
1374+
13721375
struct llama_sp_bigram {
13731376
struct comparator {
13741377
bool operator()(llama_sp_bigram & l, llama_sp_bigram & r) {
@@ -1401,7 +1404,7 @@ struct llama_tokenizer {
14011404
sym.prev = index - 1;
14021405
sym.next = offs == text.size() ? -1 : index + 1;
14031406
index++;
1404-
symbols_.emplace_back(std::move(sym));
1407+
symbols_.emplace_back(sym);
14051408
}
14061409

14071410
// seed the work queue with all possible 2-character tokens.
@@ -1492,7 +1495,7 @@ static std::vector<llama_vocab::id> llama_tokenize(const llama_vocab & vocab, co
14921495
llama_tokenizer tokenizer(vocab);
14931496
std::vector<llama_vocab::id> output;
14941497

1495-
if (text.size() == 0) {
1498+
if (text.empty()) {
14961499
return output;
14971500
}
14981501

@@ -1728,7 +1731,7 @@ void llama_sample_repetition_penalty(struct llama_context * ctx, llama_token_dat
17281731
const int64_t t_start_sample_us = ggml_time_us();
17291732

17301733
for (size_t i = 0; i < candidates->size; ++i) {
1731-
auto token_iter = std::find(last_tokens, last_tokens + last_tokens_size, candidates->data[i].id);
1734+
const auto * token_iter = std::find(last_tokens, last_tokens + last_tokens_size, candidates->data[i].id);
17321735
if (token_iter == last_tokens + last_tokens_size) {
17331736
continue;
17341737
}
@@ -1872,7 +1875,7 @@ llama_token llama_sample_token_greedy(struct llama_context * ctx, llama_token_da
18721875
const int64_t t_start_sample_us = ggml_time_us();
18731876

18741877
// Find max element
1875-
auto max_iter = std::max_element(candidates->data, candidates->data + candidates->size, [](const llama_token_data & a, const llama_token_data & b) {
1878+
auto * max_iter = std::max_element(candidates->data, candidates->data + candidates->size, [](const llama_token_data & a, const llama_token_data & b) {
18761879
return a.logit < b.logit;
18771880
});
18781881

@@ -1925,7 +1928,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
19251928
nthread = std::thread::hardware_concurrency();
19261929
}
19271930

1928-
std::unique_ptr<llama_model_loader> model_loader(new llama_model_loader(fname_inp.c_str(), /*use_mmap*/ false,
1931+
std::unique_ptr<llama_model_loader> model_loader(new llama_model_loader(fname_inp, /*use_mmap*/ false,
19291932
/*vocab_only*/ false));
19301933
llama_file_saver file_saver(fname_out.c_str(), model_loader->file_loaders.at(0).get(), ftype);
19311934

@@ -1979,7 +1982,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
19791982
} else if (tensor.type == GGML_TYPE_F16) {
19801983
f32_conv_buf.resize(nelements * sizeof(float));
19811984
f32_data = (float *) f32_conv_buf.addr;
1982-
auto f16_data = (const ggml_fp16_t *) tensor.data;
1985+
const auto * f16_data = (const ggml_fp16_t *) tensor.data;
19831986
for (size_t i = 0; i < nelements; i++) {
19841987
f32_data[i] = ggml_fp16_to_fp32(f16_data[i]);
19851988
}
@@ -2010,21 +2013,31 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
20102013
size_t first = counter; counter += chunk_size;
20112014
if (first >= nelements) {
20122015
if (!local_hist.empty()) {
2013-
for (int j=0; j<int(local_hist.size()); ++j) hist_cur[j] += local_hist[j];
2016+
for (int j=0; j<int(local_hist.size()); ++j) {
2017+
hist_cur[j] += local_hist[j];
2018+
}
20142019
new_size += local_size;
20152020
}
20162021
break;
20172022
}
20182023
lock.unlock();
20192024
size_t last = std::min(nelements, first + chunk_size);
2020-
if (local_hist.empty()) local_hist.resize(hist_cur.size(), 0);
2025+
if (local_hist.empty()) {
2026+
local_hist.resize(hist_cur.size(), 0);
2027+
}
20212028
local_size += ggml_quantize_chunk(new_type, f32_data, new_data, first, last - first, local_hist.data());
20222029
}
20232030
};
2024-
if (int(workers.size()) < nthread_use - 1) workers.resize(nthread_use - 1);
2025-
for (int it = 0; it < nthread_use - 1; ++it) workers[it] = std::thread(compute);
2031+
if ((int) workers.size() < nthread_use - 1) {
2032+
workers.resize(nthread_use - 1);
2033+
}
2034+
for (int it = 0; it < nthread_use - 1; ++it) {
2035+
workers[it] = std::thread(compute);
2036+
}
20262037
compute();
2027-
for (int it = 0; it < nthread_use - 1; ++it) workers[it].join();
2038+
for (int it = 0; it < nthread_use - 1; ++it) {
2039+
workers[it].join();
2040+
}
20282041
}
20292042

20302043
printf("size = %8.2f MB -> %8.2f MB | hist: ", tensor.size/1024.0/1024.0, new_size/1024.0/1024.0);
@@ -2222,7 +2235,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
22222235
fprintf(stderr, "%s: loading base model from '%s'\n", __func__, path_base_model);
22232236
model_loader.reset(new llama_model_loader(path_base_model, /*use_mmap*/ true, /*vocab_only*/ false));
22242237

2225-
size_t ctx_size, mmapped_size;
2238+
size_t ctx_size;
2239+
size_t mmapped_size;
22262240
model_loader->calc_sizes(&ctx_size, &mmapped_size);
22272241
base_buf.resize(ctx_size);
22282242

@@ -2261,8 +2275,12 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
22612275
fin.read(reinterpret_cast<char *>(&ne[i]), sizeof(ne[i]));
22622276
}
22632277

2264-
std::string name(length, 0);
2265-
fin.read(&name[0], length);
2278+
std::string name;
2279+
{
2280+
char buf[1024];
2281+
fin.read(buf, length);
2282+
name = std::string(buf, length);
2283+
}
22662284

22672285
// check for lora suffix and get the type of tensor
22682286
const std::string lora_suffix = ".lora";
@@ -2277,7 +2295,7 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
22772295
base_name.erase(pos);
22782296
// fprintf(stderr, "%s: %s => %s (lora type %s) ", __func__, name.c_str(),base_name.c_str(), lora_type.c_str());
22792297

2280-
if (model_tensors.find(base_name.data()) == model_tensors.end()) {
2298+
if (model_tensors.find(base_name) == model_tensors.end()) {
22812299
fprintf(stderr, "%s: unknown tensor '%s' in lora adapter\n", __func__, name.data());
22822300
return 1;
22832301
}
@@ -2379,8 +2397,9 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
23792397
lora_tensors.clear();
23802398

23812399
n_tensors++;
2382-
if (n_tensors % 4 == 0)
2400+
if (n_tensors % 4 == 0) {
23832401
fprintf(stderr, ".");
2402+
}
23842403
}
23852404
}
23862405

@@ -2409,7 +2428,7 @@ int llama_get_kv_cache_token_count(const struct llama_context * ctx) {
24092428
return ctx->model.kv_self.n;
24102429
}
24112430

2412-
#define LLAMA_MAX_RNG_STATE 64*1024
2431+
#define LLAMA_MAX_RNG_STATE (64*1024)
24132432

24142433
void llama_set_rng_seed(struct llama_context * ctx, int seed) {
24152434
if (seed < 0) {
@@ -2668,7 +2687,7 @@ bool llama_load_session_file(struct llama_context * ctx, const char * path_sessi
26682687
const uint32_t magic = file.read_u32();
26692688
const uint32_t version = file.read_u32();
26702689

2671-
if (!(magic == LLAMA_SESSION_MAGIC && version == LLAMA_SESSION_VERSION)) {
2690+
if (magic != LLAMA_SESSION_MAGIC || version != LLAMA_SESSION_VERSION) {
26722691
fprintf(stderr, "%s : unknown (magic, version) for session file: %08x, %08x\n", __func__, magic, version);
26732692
return false;
26742693
}

0 commit comments

Comments
 (0)