@@ -209,7 +209,7 @@ struct llama_server_queue {
209
209
return task.id ;
210
210
}
211
211
212
- // Add a new task, but defer until the next loop
212
+ // Add a new task, but defer until one slot is available
213
213
void defer (task_server task) {
214
214
std::unique_lock<std::mutex> lock (mutex_tasks);
215
215
queue_tasks_deferred.push_back (std::move (task));
@@ -236,6 +236,16 @@ struct llama_server_queue {
236
236
callback_all_task_finished = callback;
237
237
}
238
238
239
+ // Call when the state of one slot is changed
240
+ void notify_slot_changed () {
241
+ // move deferred tasks back to main loop
242
+ std::unique_lock<std::mutex> lock (mutex_tasks);
243
+ for (auto & task : queue_tasks_deferred) {
244
+ queue_tasks.push_back (std::move (task));
245
+ }
246
+ queue_tasks_deferred.clear ();
247
+ }
248
+
239
249
// Start the main loop. This call is blocking
240
250
void start_loop () {
241
251
while (true ) {
@@ -255,15 +265,6 @@ struct llama_server_queue {
255
265
LOG_VERBOSE (" callback_new_task" , {});
256
266
callback_new_task (task);
257
267
}
258
- // move deferred tasks back to main loop
259
- {
260
- std::unique_lock<std::mutex> lock (mutex_tasks);
261
- for (auto & task : queue_tasks_deferred) {
262
- queue_tasks.push_back (std::move (task));
263
- }
264
- queue_tasks_deferred.clear ();
265
- lock.unlock ();
266
- }
267
268
LOG_VERBOSE (" callback_all_task_finished" , {});
268
269
// process and update all the multitasks
269
270
auto queue_iterator = queue_multitasks.begin ();
0 commit comments