Skip to content

Commit d083c81

Browse files
committed
server: only add back deferred tasks when one slot is available
1 parent 1bd8678 commit d083c81

File tree

2 files changed

+12
-10
lines changed

2 files changed

+12
-10
lines changed

examples/server/server.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,6 +1383,7 @@ struct llama_server_context
13831383
slot.t_last_used = ggml_time_us();
13841384

13851385
LOG_TEE("slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
1386+
queue_tasks.notify_slot_changed();
13861387

13871388
continue;
13881389
}

examples/server/utils.hpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ struct llama_server_queue {
209209
return task.id;
210210
}
211211

212-
// Add a new task, but defer until the next loop
212+
// Add a new task, but defer until one slot is available
213213
void defer(task_server task) {
214214
std::unique_lock<std::mutex> lock(mutex_tasks);
215215
queue_tasks_deferred.push_back(std::move(task));
@@ -236,6 +236,16 @@ struct llama_server_queue {
236236
callback_all_task_finished = callback;
237237
}
238238

239+
// Call when the state of one slot is changed
240+
void notify_slot_changed() {
241+
// move deferred tasks back to main loop
242+
std::unique_lock<std::mutex> lock(mutex_tasks);
243+
for (auto & task : queue_tasks_deferred) {
244+
queue_tasks.push_back(std::move(task));
245+
}
246+
queue_tasks_deferred.clear();
247+
}
248+
239249
// Start the main loop. This call is blocking
240250
void start_loop() {
241251
while (true) {
@@ -255,15 +265,6 @@ struct llama_server_queue {
255265
LOG_VERBOSE("callback_new_task", {});
256266
callback_new_task(task);
257267
}
258-
// move deferred tasks back to main loop
259-
{
260-
std::unique_lock<std::mutex> lock(mutex_tasks);
261-
for (auto & task : queue_tasks_deferred) {
262-
queue_tasks.push_back(std::move(task));
263-
}
264-
queue_tasks_deferred.clear();
265-
lock.unlock();
266-
}
267268
LOG_VERBOSE("callback_all_task_finished", {});
268269
// process and update all the multitasks
269270
auto queue_iterator = queue_multitasks.begin();

0 commit comments

Comments
 (0)