Skip to content

Commit 4ef9fb1

Browse files
maleadtvchuravy
andauthored
When adopting a thread, spin until GC isn't running. (#49934)
Co-authored-by: Valentin Churavy <[email protected]>
1 parent 91cd521 commit 4ef9fb1

File tree

4 files changed

+28
-10
lines changed

4 files changed

+28
-10
lines changed

src/gc.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3100,7 +3100,7 @@ static void sweep_finalizer_list(arraylist_t *list)
31003100
}
31013101

31023102
// collector entry point and control
3103-
static _Atomic(uint32_t) jl_gc_disable_counter = 1;
3103+
_Atomic(uint32_t) jl_gc_disable_counter = 1;
31043104

31053105
JL_DLLEXPORT int jl_gc_enable(int on)
31063106
{
@@ -3497,7 +3497,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
34973497

34983498
jl_task_t *ct = jl_current_task;
34993499
jl_ptls_t ptls = ct->ptls;
3500-
if (jl_atomic_load_relaxed(&jl_gc_disable_counter)) {
3500+
if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
35013501
size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval;
35023502
jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
35033503
static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
@@ -3508,11 +3508,10 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
35083508

35093509
int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state);
35103510
jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
3511-
// `jl_safepoint_start_gc()` makes sure only one thread can
3512-
// run the GC.
3511+
// `jl_safepoint_start_gc()` makes sure only one thread can run the GC.
35133512
uint64_t t0 = jl_hrtime();
35143513
if (!jl_safepoint_start_gc()) {
3515-
// Multithread only. See assertion in `safepoint.c`
3514+
// either another thread is running GC, or the GC got disabled just now.
35163515
jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
35173516
return;
35183517
}
@@ -3549,7 +3548,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
35493548
gc_invoke_callbacks(jl_gc_cb_pre_gc_t,
35503549
gc_cblist_pre_gc, (collection));
35513550

3552-
if (!jl_atomic_load_relaxed(&jl_gc_disable_counter)) {
3551+
if (!jl_atomic_load_acquire(&jl_gc_disable_counter)) {
35533552
JL_LOCK_NOGC(&finalizers_lock); // all the other threads are stopped, so this does not make sense, right? otherwise, failing that, this seems like plausibly a deadlock
35543553
#ifndef __clang_gcanalyzer__
35553554
if (_jl_gc_collect(ptls, collection)) {

src/julia_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr)
884884
return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 3;
885885
}
886886
extern _Atomic(uint32_t) jl_gc_running;
887+
extern _Atomic(uint32_t) jl_gc_disable_counter;
887888
// All the functions are safe to be called from within a signal handler
888889
// provided that the thread will not be interrupted by another asynchronous
889890
// signal.

src/safepoint.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,14 @@ int jl_safepoint_start_gc(void)
124124
jl_safepoint_wait_gc();
125125
return 0;
126126
}
127+
// Foreign thread adoption disables the GC and waits for it to finish, however, that may
128+
// introduce a race between it and this thread checking if the GC is enabled and only
129+
// then setting jl_gc_running. To avoid that, check again now that we won that race.
130+
if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
131+
jl_atomic_store_release(&jl_gc_running, 0);
132+
uv_mutex_unlock(&safepoint_lock);
133+
return 0;
134+
}
127135
jl_safepoint_enable(1);
128136
jl_safepoint_enable(2);
129137
uv_mutex_unlock(&safepoint_lock);

src/threading.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -406,18 +406,28 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
406406
return ptls;
407407
}
408408

409-
JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void) JL_NOTSAFEPOINT_LEAVE
410-
{
409+
JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void)
410+
{
411+
// `jl_init_threadtls` puts us in a GC unsafe region, so ensure GC isn't running.
412+
// we can't use a normal safepoint because we don't have signal handlers yet.
413+
// we also can't use jl_safepoint_wait_gc because that assumes we're in a task.
414+
jl_atomic_fetch_add(&jl_gc_disable_counter, 1);
415+
while (jl_atomic_load_acquire(&jl_gc_running)) {
416+
jl_cpu_pause();
417+
}
418+
// this check is coupled with the one in `jl_safepoint_wait_gc`, where we observe if a
419+
// foreign thread has asked to disable the GC, guaranteeing the order of events.
420+
411421
// initialize this thread (assign tid, create heap, set up root task)
412422
jl_ptls_t ptls = jl_init_threadtls(-1);
413423
void *stack_lo, *stack_hi;
414424
jl_init_stack_limits(0, &stack_lo, &stack_hi);
415425

416-
(void)jl_gc_unsafe_enter(ptls);
417426
// warning: this changes `jl_current_task`, so be careful not to call that from this function
418-
jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
427+
jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi); // assumes the GC is disabled
419428
JL_GC_PROMISE_ROOTED(ct);
420429
uv_random(NULL, NULL, &ct->rngState, sizeof(ct->rngState), 0, NULL);
430+
jl_atomic_fetch_add(&jl_gc_disable_counter, -1);
421431
return &ct->gcstack;
422432
}
423433

0 commit comments

Comments
 (0)