From fbcf7657055a5a562fa7cc176e4bafec9e99c661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Mon, 24 Mar 2025 02:02:10 +0100 Subject: [PATCH 1/3] Revert "resume one waiter at a call" This reverts commit cc1e4ede9388d87750c3751f41e8c6c4f6cae995. --- compiler/rustc_query_system/src/query/job.rs | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/compiler/rustc_query_system/src/query/job.rs b/compiler/rustc_query_system/src/query/job.rs index 37b305d0a8b50..a8c2aa98cd083 100644 --- a/compiler/rustc_query_system/src/query/job.rs +++ b/compiler/rustc_query_system/src/query/job.rs @@ -477,8 +477,8 @@ fn remove_cycle( /// Detects query cycles by using depth first search over all active query jobs. /// If a query cycle is found it will break the cycle by finding an edge which /// uses a query latch and then resuming that waiter. -/// There may be multiple cycles involved in a deadlock, but we only search -/// one cycle at a call and resume one waiter at once. See `FIXME` below. +/// There may be multiple cycles involved in a deadlock, so this searches +/// all active queries for cycles before finally resuming all the waiters at once. pub fn break_query_cycles(query_map: QueryMap, registry: &rayon_core::Registry) { let mut wakelist = Vec::new(); let mut jobs: Vec = query_map.keys().cloned().collect(); @@ -488,19 +488,6 @@ pub fn break_query_cycles(query_map: QueryMap, registry: &rayon_core::Registry) while jobs.len() > 0 { if remove_cycle(&query_map, &mut jobs, &mut wakelist) { found_cycle = true; - - // FIXME(#137731): Resume all the waiters at once may cause deadlocks, - // so we resume one waiter at a call for now. It's still unclear whether - // it's due to possible issues in rustc-rayon or instead in the handling - // of query cycles. - // This seem to only appear when multiple query cycles errors - // are involved, so this reduction in parallelism, while suboptimal, is not - // universal and only the deadlock handler will encounter these cases. - // The workaround shows loss of potential gains, but there still are big - // improvements in the common case, and no regressions compared to the - // single-threaded case. More investigation is still needed, and once fixed, - // we can wake up all the waiters up. - break; } } From 14786ce645ef96c732f8b30154bc939ee4ba9faf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Mon, 24 Mar 2025 02:09:14 +0100 Subject: [PATCH 2/3] Batch mark waiters as unblocked when resuming in the deadlock handler --- compiler/rustc_query_system/src/query/job.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_query_system/src/query/job.rs b/compiler/rustc_query_system/src/query/job.rs index a8c2aa98cd083..e6ab7c4ef5745 100644 --- a/compiler/rustc_query_system/src/query/job.rs +++ b/compiler/rustc_query_system/src/query/job.rs @@ -506,9 +506,15 @@ pub fn break_query_cycles(query_map: QueryMap, registry: &rayon_core::Registry) ); } - // FIXME: Ensure this won't cause a deadlock before we return + // Mark all the thread we're about to wake up as unblocked. This needs to be done before + // we wake the threads up as otherwise Rayon could detect a deadlock if a thread we + // resumed fell asleep and this thread had yet to mark the remaining threads as unblocked. + for _ in 0..wakelist.len() { + rayon_core::mark_unblocked(registry); + } + for waiter in wakelist.into_iter() { - waiter.notify(registry); + waiter.condvar.notify_one(); } } From 38c39ffc6c6e19867f450c7204f341aeb0c495ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Mon, 24 Mar 2025 03:53:19 +0100 Subject: [PATCH 3/3] Remove `QueryWaiter::notify` --- compiler/rustc_query_system/src/query/job.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/compiler/rustc_query_system/src/query/job.rs b/compiler/rustc_query_system/src/query/job.rs index e6ab7c4ef5745..5ed8fb5393fe7 100644 --- a/compiler/rustc_query_system/src/query/job.rs +++ b/compiler/rustc_query_system/src/query/job.rs @@ -163,13 +163,6 @@ struct QueryWaiter { cycle: Mutex>, } -impl QueryWaiter { - fn notify(&self, registry: &rayon_core::Registry) { - rayon_core::mark_unblocked(registry); - self.condvar.notify_one(); - } -} - #[derive(Debug)] struct QueryLatchInfo { complete: bool, @@ -232,7 +225,8 @@ impl QueryLatch { info.complete = true; let registry = rayon_core::Registry::current(); for waiter in info.waiters.drain(..) { - waiter.notify(®istry); + rayon_core::mark_unblocked(®istry); + waiter.condvar.notify_one(); } }