Skip to content

Commit a45e843

Browse files
committed
Ensure --mlock works properly with mmap() support
1 parent 75d1e55 commit a45e843

File tree

3 files changed

+34
-16
lines changed

3 files changed

+34
-16
lines changed

ggml.c

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2885,36 +2885,47 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
28852885
return result;
28862886
}
28872887

2888+
#ifdef __APPLE__
2889+
#define MLOCK_SUGGESTION \
2890+
"Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or " \
2891+
"decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MLOCK (ulimit -l).\n"
2892+
#else
2893+
#define MLOCK_SUGGESTION \
2894+
"Try increasing RLIMIT_MLOCK ('ulimit -l' as root).\n"
2895+
#endif
2896+
28882897
bool ggml_mlock_supported(void) {
28892898
return GGML_MLOCK_SUPPORT;
28902899
}
28912900

2901+
bool ggml_mlock(
2902+
struct ggml_context * ctx,
2903+
const void *opt_extra_addr,
2904+
size_t opt_extra_len,
2905+
char **err_p) {
2906+
// TODO: Use SetProcessWorkingSetSize() + VirtualLock() on WIN32
28922907
#if GGML_MLOCK_SUPPORT
2893-
#ifdef __APPLE__
2894-
#define MLOCK_SUGGESTION "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or\n" \
2895-
"decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MLOCK (ulimit -l)."
2896-
#else
2897-
#define MLOCK_SUGGESTION "Try increasing RLIMIT_MLOCK (ulimit -l)."
2898-
#endif
2899-
bool ggml_mlock(struct ggml_context * ctx, char ** err_p) {
29002908
if (ctx->mem_buffer_mlocked) {
29012909
return true;
29022910
}
2903-
if (mlock(ctx->mem_buffer, ctx->mem_size)) {
2904-
int ret = asprintf(err_p, "failed to mlock %zu-byte buffer: %s\n" MLOCK_SUGGESTION,
2905-
ctx->mem_size, strerror(errno));
2906-
GGML_ASSERT(ret >= 0);
2911+
if (mlock(ctx->mem_buffer, ctx->mem_size) ||
2912+
(opt_extra_len &&
2913+
mlock(opt_extra_addr, opt_extra_len))) {
2914+
if ((*err_p = malloc(1024))) {
2915+
snprintf(*err_p, 1024,
2916+
"failed to mlock %zu-byte buffer: %s\n" MLOCK_SUGGESTION,
2917+
ctx->mem_size + opt_extra_len,
2918+
strerror(errno));
2919+
}
29072920
return false;
29082921
}
29092922
ctx->mem_buffer_mlocked = true;
29102923
return true;
2911-
}
29122924
#else // GGML_MLOCK_SUPPORT
2913-
bool ggml_mlock(struct ggml_context * ctx, char ** err_p) {
29142925
*err_p = strdup("can't mlock because it's not supported on this system");
29152926
return false;
2916-
}
29172927
#endif // GGML_MLOCK_SUPPORT
2928+
}
29182929

29192930
////////////////////////////////////////////////////////////////////////////////
29202931

ggml.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,11 @@ size_t ggml_used_mem(const struct ggml_context * ctx);
345345
size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
346346

347347
bool ggml_mlock_supported(void);
348-
bool ggml_mlock(struct ggml_context * ctx, char ** err_p);
348+
bool ggml_mlock(
349+
struct ggml_context * ctx,
350+
const void *opt_extra_addr,
351+
size_t opt_extra_len,
352+
char **err_p);
349353

350354
struct ggml_tensor * ggml_new_tensor(
351355
struct ggml_context * ctx,

llama.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1595,7 +1595,10 @@ struct llama_context * llama_init_from_file(
15951595

15961596
if (params.use_mlock) {
15971597
char *err;
1598-
if (!ggml_mlock(ctx->model.ctx, &err)) {
1598+
if (!ggml_mlock(ctx->model.ctx,
1599+
ctx->model.mm_addr,
1600+
ctx->model.mm_length,
1601+
&err)) {
15991602
fprintf(stderr, "%s\n", err);
16001603
free(err);
16011604
llama_free(ctx);

0 commit comments

Comments
 (0)