Skip to content

Commit 6412a56

Browse files
pchintalapudivtjnashDilumAluthge
authored
Atomically order specsigflags, specptr, and invoke (#47832)
Co-authored-by: Jameson Nash <[email protected]> Co-authored-by: Dilum Aluthge <[email protected]>
1 parent 81f366d commit 6412a56

File tree

7 files changed

+146
-71
lines changed

7 files changed

+146
-71
lines changed

src/codegen.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4236,7 +4236,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
42364236
jl_value_t *ci = ctx.params->lookup(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here
42374237
jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
42384238
if (ci != jl_nothing) {
4239-
auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
4239+
auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
42404240
// check if we know how to handle this specptr
42414241
if (invoke == jl_fptr_const_return_addr) {
42424242
result = mark_julia_const(ctx, codeinst->rettype_const);
@@ -4262,10 +4262,13 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
42624262
// optimization: emit the correct name immediately, if we know it
42634263
// TODO: use `emitted` map here too to try to consolidate names?
42644264
// WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
4265-
auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
42664265
auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
42674266
if (fptr) {
4268-
if (specsig ? codeinst->isspecsig : invoke == jl_fptr_args_addr) {
4267+
while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
4268+
jl_cpu_pause();
4269+
}
4270+
invoke = jl_atomic_load_relaxed(&codeinst->invoke);
4271+
if (specsig ? jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1 : invoke == jl_fptr_args_addr) {
42694272
protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
42704273
need_to_emit = false;
42714274
}
@@ -5783,9 +5786,15 @@ static Function* gen_cfun_wrapper(
57835786
if (lam && params.cache) {
57845787
// TODO: this isn't ideal to be unconditionally calling type inference (and compile) from here
57855788
codeinst = jl_compile_method_internal(lam, world);
5786-
auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
5789+
auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
57875790
auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
57885791
assert(invoke);
5792+
if (fptr) {
5793+
while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
5794+
jl_cpu_pause();
5795+
}
5796+
invoke = jl_atomic_load_relaxed(&codeinst->invoke);
5797+
}
57895798
// WARNING: this invoke load is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
57905799
if (invoke == jl_fptr_args_addr) {
57915800
callptr = fptr;
@@ -5796,7 +5805,7 @@ static Function* gen_cfun_wrapper(
57965805
callptr = (void*)codeinst->rettype_const;
57975806
calltype = 2;
57985807
}
5799-
else if (codeinst->isspecsig) {
5808+
else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
58005809
callptr = fptr;
58015810
calltype = 3;
58025811
}
@@ -8526,18 +8535,25 @@ void jl_compile_workqueue(
85268535
"invalid world for code-instance");
85278536
StringRef preal_decl = "";
85288537
bool preal_specsig = false;
8529-
auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
8538+
auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
85308539
bool cache_valid = params.cache;
85318540
if (params.external_linkage) {
85328541
cache_valid = 0 && jl_object_in_image((jl_value_t*)codeinst);
85338542
}
85348543
// WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
85358544
if (cache_valid && invoke != NULL) {
85368545
auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
8546+
if (fptr) {
8547+
while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
8548+
jl_cpu_pause();
8549+
}
8550+
// in case we are racing with another thread that is emitting this function
8551+
invoke = jl_atomic_load_relaxed(&codeinst->invoke);
8552+
}
85378553
if (invoke == jl_fptr_args_addr) {
85388554
preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
85398555
}
8540-
else if (codeinst->isspecsig) {
8556+
else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
85418557
preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
85428558
preal_specsig = true;
85438559
}

src/gf.c

Lines changed: 65 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -414,13 +414,13 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
414414
if ((const_flags & 2) == 0)
415415
inferred_const = NULL;
416416
codeinst->rettype_const = inferred_const;
417-
jl_atomic_store_relaxed(&codeinst->invoke, NULL);
418417
jl_atomic_store_relaxed(&codeinst->specptr.fptr, NULL);
418+
jl_atomic_store_relaxed(&codeinst->invoke, NULL);
419419
if ((const_flags & 1) != 0) {
420420
assert(const_flags & 2);
421421
jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_const_return);
422422
}
423-
codeinst->isspecsig = 0;
423+
jl_atomic_store_relaxed(&codeinst->specsigflags, 0);
424424
jl_atomic_store_relaxed(&codeinst->precompile, 0);
425425
jl_atomic_store_relaxed(&codeinst->next, NULL);
426426
codeinst->ipo_purity_bits = ipo_effects;
@@ -2218,12 +2218,33 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
22182218
mi, codeinst2->rettype,
22192219
codeinst2->min_world, codeinst2->max_world);
22202220
if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL) {
2221-
// once set, don't change invoke-ptr, as that leads to race conditions
2222-
// with the (not) simultaneous updates to invoke and specptr
2223-
codeinst->isspecsig = codeinst2->isspecsig;
22242221
codeinst->rettype_const = codeinst2->rettype_const;
2225-
jl_atomic_store_release(&codeinst->specptr.fptr, jl_atomic_load_relaxed(&codeinst2->specptr.fptr));
2226-
jl_atomic_store_release(&codeinst->invoke, jl_atomic_load_relaxed(&codeinst2->invoke));
2222+
uint8_t specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
2223+
jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst2->invoke);
2224+
void *fptr = jl_atomic_load_relaxed(&codeinst2->specptr.fptr);
2225+
if (fptr != NULL) {
2226+
while (!(specsigflags & 0b10)) {
2227+
jl_cpu_pause();
2228+
specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
2229+
}
2230+
invoke = jl_atomic_load_relaxed(&codeinst2->invoke);
2231+
void *prev_fptr = NULL;
2232+
// see jitlayers.cpp for the ordering restrictions here
2233+
if (jl_atomic_cmpswap_acqrel(&codeinst->specptr.fptr, &prev_fptr, fptr)) {
2234+
jl_atomic_store_relaxed(&codeinst->specsigflags, specsigflags & 0b1);
2235+
jl_atomic_store_release(&codeinst->invoke, invoke);
2236+
jl_atomic_store_release(&codeinst->specsigflags, specsigflags);
2237+
} else {
2238+
// someone else already compiled it
2239+
while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
2240+
jl_cpu_pause();
2241+
}
2242+
// codeinst is now set up fully, safe to return
2243+
}
2244+
} else {
2245+
jl_callptr_t prev = NULL;
2246+
jl_atomic_cmpswap_acqrel(&codeinst->invoke, &prev, invoke);
2247+
}
22272248
}
22282249
// don't call record_precompile_statement here, since we already compiled it as mi2 which is better
22292250
return codeinst;
@@ -2248,14 +2269,22 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
22482269
jl_method_instance_t *unspecmi = jl_atomic_load_relaxed(&def->unspecialized);
22492270
if (unspecmi) {
22502271
jl_code_instance_t *unspec = jl_atomic_load_relaxed(&unspecmi->cache);
2251-
if (unspec && jl_atomic_load_acquire(&unspec->invoke)) {
2272+
jl_callptr_t unspec_invoke = NULL;
2273+
if (unspec && (unspec_invoke = jl_atomic_load_acquire(&unspec->invoke))) {
22522274
jl_code_instance_t *codeinst = jl_new_codeinst(mi,
22532275
(jl_value_t*)jl_any_type, NULL, NULL,
22542276
0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
2255-
codeinst->isspecsig = 0;
2256-
codeinst->specptr = unspec->specptr;
2277+
void *unspec_fptr = jl_atomic_load_relaxed(&unspec->specptr.fptr);
2278+
if (unspec_fptr) {
2279+
// wait until invoke and specsigflags are properly set
2280+
while (!(jl_atomic_load_acquire(&unspec->specsigflags) & 0b10)) {
2281+
jl_cpu_pause();
2282+
}
2283+
unspec_invoke = jl_atomic_load_relaxed(&unspec->invoke);
2284+
}
2285+
jl_atomic_store_release(&codeinst->specptr.fptr, unspec_fptr);
22572286
codeinst->rettype_const = unspec->rettype_const;
2258-
jl_atomic_store_relaxed(&codeinst->invoke, jl_atomic_load_relaxed(&unspec->invoke));
2287+
jl_atomic_store_release(&codeinst->invoke, unspec_invoke);
22592288
jl_mi_cache_insert(mi, codeinst);
22602289
record_precompile_statement(mi);
22612290
return codeinst;
@@ -2272,7 +2301,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
22722301
jl_code_instance_t *codeinst = jl_new_codeinst(mi,
22732302
(jl_value_t*)jl_any_type, NULL, NULL,
22742303
0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
2275-
jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_interpret_call);
2304+
jl_atomic_store_release(&codeinst->invoke, jl_fptr_interpret_call);
22762305
jl_mi_cache_insert(mi, codeinst);
22772306
record_precompile_statement(mi);
22782307
return codeinst;
@@ -2289,7 +2318,8 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
22892318
jl_method_instance_t *unspec = jl_get_unspecialized_from_mi(mi);
22902319
jl_code_instance_t *ucache = jl_get_method_inferred(unspec, (jl_value_t*)jl_any_type, 1, ~(size_t)0);
22912320
// ask codegen to make the fptr for unspec
2292-
if (jl_atomic_load_acquire(&ucache->invoke) == NULL) {
2321+
jl_callptr_t ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
2322+
if (ucache_invoke == NULL) {
22932323
if (def->source == jl_nothing && (jl_atomic_load_relaxed(&ucache->def->uninferred) == jl_nothing ||
22942324
jl_atomic_load_relaxed(&ucache->def->uninferred) == NULL)) {
22952325
jl_printf(JL_STDERR, "source not available for ");
@@ -2298,19 +2328,29 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
22982328
jl_error("source missing for method that needs to be compiled");
22992329
}
23002330
jl_generate_fptr_for_unspecialized(ucache);
2331+
ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
23012332
}
2302-
assert(jl_atomic_load_relaxed(&ucache->invoke) != NULL);
2303-
if (jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_sparam &&
2304-
jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_interpret_call) {
2333+
assert(ucache_invoke != NULL);
2334+
if (ucache_invoke != jl_fptr_sparam &&
2335+
ucache_invoke != jl_fptr_interpret_call) {
23052336
// only these care about the exact specTypes, otherwise we can use it directly
23062337
return ucache;
23072338
}
23082339
codeinst = jl_new_codeinst(mi, (jl_value_t*)jl_any_type, NULL, NULL,
23092340
0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
2310-
codeinst->isspecsig = 0;
2311-
codeinst->specptr = ucache->specptr;
2341+
void *unspec_fptr = jl_atomic_load_relaxed(&ucache->specptr.fptr);
2342+
if (unspec_fptr) {
2343+
// wait until invoke and specsigflags are properly set
2344+
while (!(jl_atomic_load_acquire(&ucache->specsigflags) & 0b10)) {
2345+
jl_cpu_pause();
2346+
}
2347+
ucache_invoke = jl_atomic_load_relaxed(&ucache->invoke);
2348+
}
2349+
// unspec is always not specsig, but might use specptr
2350+
jl_atomic_store_relaxed(&codeinst->specsigflags, jl_atomic_load_relaxed(&ucache->specsigflags) & 0b10);
2351+
jl_atomic_store_relaxed(&codeinst->specptr.fptr, unspec_fptr);
23122352
codeinst->rettype_const = ucache->rettype_const;
2313-
jl_atomic_store_relaxed(&codeinst->invoke, jl_atomic_load_relaxed(&ucache->invoke));
2353+
jl_atomic_store_release(&codeinst->invoke, ucache_invoke);
23142354
jl_mi_cache_insert(mi, codeinst);
23152355
}
23162356
else {
@@ -2328,23 +2368,17 @@ jl_value_t *jl_fptr_const_return(jl_value_t *f, jl_value_t **args, uint32_t narg
23282368
jl_value_t *jl_fptr_args(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
23292369
{
23302370
jl_fptr_args_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr1);
2331-
while (1) {
2332-
if (invoke)
2333-
return invoke(f, args, nargs);
2334-
invoke = jl_atomic_load_acquire(&m->specptr.fptr1); // require forward progress with acquire annotation
2335-
}
2371+
assert(invoke && "Forgot to set specptr for jl_fptr_args!");
2372+
return invoke(f, args, nargs);
23362373
}
23372374

23382375
jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
23392376
{
23402377
jl_svec_t *sparams = m->def->sparam_vals;
23412378
assert(sparams != jl_emptysvec);
23422379
jl_fptr_sparam_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr3);
2343-
while (1) {
2344-
if (invoke)
2345-
return invoke(f, args, nargs, sparams);
2346-
invoke = jl_atomic_load_acquire(&m->specptr.fptr3); // require forward progress with acquire annotation
2347-
}
2380+
assert(invoke && "Forgot to set specptr for jl_fptr_sparam!");
2381+
return invoke(f, args, nargs, sparams);
23482382
}
23492383

23502384
JL_DLLEXPORT jl_callptr_t jl_fptr_args_addr = &jl_fptr_args;
@@ -2667,7 +2701,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
26672701
jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mfunc->cache);
26682702
while (codeinst) {
26692703
if (codeinst->min_world <= world && world <= codeinst->max_world) {
2670-
jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
2704+
jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
26712705
if (invoke != NULL) {
26722706
jl_value_t *res = invoke(F, args, nargs, codeinst);
26732707
return verify_type(res);
@@ -2687,7 +2721,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
26872721
errno = last_errno;
26882722
if (jl_options.malloc_log)
26892723
jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
2690-
jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
2724+
jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
26912725
jl_value_t *res = invoke(F, args, nargs, codeinst);
26922726
return verify_type(res);
26932727
}

src/jitlayers.cpp

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -262,18 +262,31 @@ static jl_callptr_t _jl_compile_codeinst(
262262
addr = (jl_callptr_t)getAddressForFunction(decls.functionObject);
263263
isspecsig = true;
264264
}
265-
if (jl_atomic_load_relaxed(&this_code->invoke) == NULL) {
266-
// once set, don't change invoke-ptr, as that leads to race conditions
267-
// with the (not) simultaneous updates to invoke and specptr
268-
if (!decls.specFunctionObject.empty()) {
269-
jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject));
270-
this_code->isspecsig = isspecsig;
265+
if (!decls.specFunctionObject.empty()) {
266+
void *prev_specptr = NULL;
267+
auto spec = (void*)getAddressForFunction(decls.specFunctionObject);
268+
if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) {
269+
// only set specsig and invoke if we were the first to set specptr
270+
jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig);
271+
// we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr
272+
// either assumes that specptr was null, doesn't care about specptr,
273+
// or will wait until specsigflags has 0b10 set before reloading invoke
274+
jl_atomic_store_release(&this_code->invoke, addr);
275+
jl_atomic_store_release(&this_code->specsigflags, (uint8_t) (0b10 | isspecsig));
276+
} else {
277+
//someone else beat us, don't commit any results
278+
while (!(jl_atomic_load_acquire(&this_code->specsigflags) & 0b10)) {
279+
jl_cpu_pause();
280+
}
281+
addr = jl_atomic_load_relaxed(&this_code->invoke);
282+
}
283+
} else {
284+
jl_callptr_t prev_invoke = NULL;
285+
if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) {
286+
addr = prev_invoke;
287+
//TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other
288+
//known lesser function)
271289
}
272-
jl_atomic_store_release(&this_code->invoke, addr);
273-
}
274-
else if (jl_atomic_load_relaxed(&this_code->invoke) == jl_fptr_const_return_addr && !decls.specFunctionObject.empty()) {
275-
// hack to export this pointer value to jl_dump_method_disasm
276-
jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject));
277290
}
278291
if (this_code == codeinst)
279292
fptr = addr;
@@ -497,10 +510,9 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
497510
assert(src && jl_is_code_info(src));
498511
++UnspecFPtrCount;
499512
_jl_compile_codeinst(unspec, src, unspec->min_world, *jl_ExecutionEngine->getContext());
500-
if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
501-
// if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
502-
jl_atomic_store_release(&unspec->invoke, jl_fptr_interpret_call_addr);
503-
}
513+
jl_callptr_t null = nullptr;
514+
// if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
515+
jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr);
504516
JL_GC_POP();
505517
}
506518
JL_UNLOCK(&jl_codegen_lock); // Might GC
@@ -519,7 +531,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
519531
// printing via disassembly
520532
jl_code_instance_t *codeinst = jl_generate_fptr(mi, world);
521533
if (codeinst) {
522-
uintptr_t fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke);
534+
uintptr_t fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
523535
if (getwrapper)
524536
return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo, binary);
525537
uintptr_t specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
@@ -547,7 +559,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
547559
if (src && (jl_value_t*)src != jl_nothing)
548560
src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
549561
}
550-
fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke);
562+
fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
551563
specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
552564
if (src && jl_is_code_info(src)) {
553565
if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {

0 commit comments

Comments
 (0)