Fix merge

crasm · crasm · commit e4382571ca3c · 2023-12-21T18:54:27.000-05:00
see: #4462 (comment)
diff --git a/llama.cpp b/llama.cpp
@@ -2469,7 +2469,9 @@ struct llama_model_loader {
         }
 
         if (progress_callback) {
-            progress_callback(1.0f, progress_callback_user_data);
+            // Even though the model is done loading, we still honor
+            // cancellation since we need to free allocations.
+            return progress_callback(1.0f, progress_callback_user_data);
         }
         return true;
     }
@@ -3060,8 +3062,6 @@ static bool llm_load_tensors(
         void * progress_callback_user_data) {
     model.t_start_us = ggml_time_us();
 
-    bool ok = true; // if false, model load was cancelled
-
     auto & ctx     = model.ctx;
     auto & hparams = model.hparams;
 
@@ -3729,19 +3729,16 @@ static bool llm_load_tensors(
         model.tensors_by_name.emplace_back(ggml_get_name(cur), cur);
     }
 
-    ok = ok && ml.load_all_data(ctx, progress_callback, progress_callback_user_data, buf_mmap, use_mlock ? &model.mlock_mmap : NULL);
-    if (progress_callback) {
-        // Even though the model is done loading, we still honor
-        // cancellation since we need to free allocations.
-        ok = ok && progress_callback(1.0f, progress_callback_user_data);
+    if (!ml.load_all_data(ctx, progress_callback, progress_callback_user_data, buf_mmap, use_mlock ? &model.mlock_mmap : NULL)) {
+        return false;
     }
 
     model.mapping = std::move(ml.mapping);
 
     // loading time will be recalculate after the first eval, so
     // we take page faults deferred by mmap() into consideration
     model.t_load_us = ggml_time_us() - model.t_start_us;
-    return ok;
+    return true;
 }
 
 // Returns 0 on success, -1 on error, and -2 on cancellation via llama_progress_callback

Original file line number	Diff line number	Diff line change
`@@ -2469,7 +2469,9 @@ struct llama_model_loader {`
`2469`	`2469`	`}`
`2470`	`2470`
`2471`	`2471`	`if (progress_callback) {`
`2472`		`- progress_callback(1.0f, progress_callback_user_data);`
	`2472`	`+ // Even though the model is done loading, we still honor`
	`2473`	`+ // cancellation since we need to free allocations.`
	`2474`	`+ return progress_callback(1.0f, progress_callback_user_data);`
`2473`	`2475`	`}`
`2474`	`2476`	`return true;`
`2475`	`2477`	`}`
`@@ -3060,8 +3062,6 @@ static bool llm_load_tensors(`
`3060`	`3062`	`void * progress_callback_user_data) {`
`3061`	`3063`	`model.t_start_us = ggml_time_us();`
`3062`	`3064`
`3063`		`- bool ok = true; // if false, model load was cancelled`
`3064`		`-`
`3065`	`3065`	`auto & ctx = model.ctx;`
`3066`	`3066`	`auto & hparams = model.hparams;`
`3067`	`3067`
`@@ -3729,19 +3729,16 @@ static bool llm_load_tensors(`
`3729`	`3729`	`model.tensors_by_name.emplace_back(ggml_get_name(cur), cur);`
`3730`	`3730`	`}`
`3731`	`3731`
`3732`		`- ok = ok && ml.load_all_data(ctx, progress_callback, progress_callback_user_data, buf_mmap, use_mlock ? &model.mlock_mmap : NULL);`
`3733`		`- if (progress_callback) {`
`3734`		`- // Even though the model is done loading, we still honor`
`3735`		`- // cancellation since we need to free allocations.`
`3736`		`- ok = ok && progress_callback(1.0f, progress_callback_user_data);`
	`3732`	`+ if (!ml.load_all_data(ctx, progress_callback, progress_callback_user_data, buf_mmap, use_mlock ? &model.mlock_mmap : NULL)) {`
	`3733`	`+ return false;`
`3737`	`3734`	`}`
`3738`	`3735`
`3739`	`3736`	`model.mapping = std::move(ml.mapping);`
`3740`	`3737`
`3741`	`3738`	`// loading time will be recalculate after the first eval, so`
`3742`	`3739`	`// we take page faults deferred by mmap() into consideration`
`3743`	`3740`	`model.t_load_us = ggml_time_us() - model.t_start_us;`
`3744`		`- return ok;`
	`3741`	`+ return true;`
`3745`	`3742`	`}`
`3746`	`3743`
`3747`	`3744`	`// Returns 0 on success, -1 on error, and -2 on cancellation via llama_progress_callback`