@@ -414,13 +414,13 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
414
414
if ((const_flags & 2 ) == 0 )
415
415
inferred_const = NULL ;
416
416
codeinst -> rettype_const = inferred_const ;
417
- jl_atomic_store_relaxed (& codeinst -> invoke , NULL );
418
417
jl_atomic_store_relaxed (& codeinst -> specptr .fptr , NULL );
418
+ jl_atomic_store_relaxed (& codeinst -> invoke , NULL );
419
419
if ((const_flags & 1 ) != 0 ) {
420
420
assert (const_flags & 2 );
421
421
jl_atomic_store_relaxed (& codeinst -> invoke , jl_fptr_const_return );
422
422
}
423
- codeinst -> isspecsig = 0 ;
423
+ jl_atomic_store_relaxed ( & codeinst -> specsigflags , 0 ) ;
424
424
jl_atomic_store_relaxed (& codeinst -> precompile , 0 );
425
425
jl_atomic_store_relaxed (& codeinst -> next , NULL );
426
426
codeinst -> ipo_purity_bits = ipo_effects ;
@@ -2218,12 +2218,33 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
2218
2218
mi , codeinst2 -> rettype ,
2219
2219
codeinst2 -> min_world , codeinst2 -> max_world );
2220
2220
if (jl_atomic_load_relaxed (& codeinst -> invoke ) == NULL ) {
2221
- // once set, don't change invoke-ptr, as that leads to race conditions
2222
- // with the (not) simultaneous updates to invoke and specptr
2223
- codeinst -> isspecsig = codeinst2 -> isspecsig ;
2224
2221
codeinst -> rettype_const = codeinst2 -> rettype_const ;
2225
- jl_atomic_store_release (& codeinst -> specptr .fptr , jl_atomic_load_relaxed (& codeinst2 -> specptr .fptr ));
2226
- jl_atomic_store_release (& codeinst -> invoke , jl_atomic_load_relaxed (& codeinst2 -> invoke ));
2222
+ uint8_t specsigflags = jl_atomic_load_acquire (& codeinst2 -> specsigflags );
2223
+ jl_callptr_t invoke = jl_atomic_load_acquire (& codeinst2 -> invoke );
2224
+ void * fptr = jl_atomic_load_relaxed (& codeinst2 -> specptr .fptr );
2225
+ if (fptr != NULL ) {
2226
+ while (!(specsigflags & 0b10 )) {
2227
+ jl_cpu_pause ();
2228
+ specsigflags = jl_atomic_load_acquire (& codeinst2 -> specsigflags );
2229
+ }
2230
+ invoke = jl_atomic_load_relaxed (& codeinst2 -> invoke );
2231
+ void * prev_fptr = NULL ;
2232
+ // see jitlayers.cpp for the ordering restrictions here
2233
+ if (jl_atomic_cmpswap_acqrel (& codeinst -> specptr .fptr , & prev_fptr , fptr )) {
2234
+ jl_atomic_store_relaxed (& codeinst -> specsigflags , specsigflags & 0b1 );
2235
+ jl_atomic_store_release (& codeinst -> invoke , invoke );
2236
+ jl_atomic_store_release (& codeinst -> specsigflags , specsigflags );
2237
+ } else {
2238
+ // someone else already compiled it
2239
+ while (!(jl_atomic_load_acquire (& codeinst -> specsigflags ) & 0b10 )) {
2240
+ jl_cpu_pause ();
2241
+ }
2242
+ // codeinst is now set up fully, safe to return
2243
+ }
2244
+ } else {
2245
+ jl_callptr_t prev = NULL ;
2246
+ jl_atomic_cmpswap_acqrel (& codeinst -> invoke , & prev , invoke );
2247
+ }
2227
2248
}
2228
2249
// don't call record_precompile_statement here, since we already compiled it as mi2 which is better
2229
2250
return codeinst ;
@@ -2248,14 +2269,22 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
2248
2269
jl_method_instance_t * unspecmi = jl_atomic_load_relaxed (& def -> unspecialized );
2249
2270
if (unspecmi ) {
2250
2271
jl_code_instance_t * unspec = jl_atomic_load_relaxed (& unspecmi -> cache );
2251
- if (unspec && jl_atomic_load_acquire (& unspec -> invoke )) {
2272
+ jl_callptr_t unspec_invoke = NULL ;
2273
+ if (unspec && (unspec_invoke = jl_atomic_load_acquire (& unspec -> invoke ))) {
2252
2274
jl_code_instance_t * codeinst = jl_new_codeinst (mi ,
2253
2275
(jl_value_t * )jl_any_type , NULL , NULL ,
2254
2276
0 , 1 , ~(size_t )0 , 0 , 0 , jl_nothing , 0 );
2255
- codeinst -> isspecsig = 0 ;
2256
- codeinst -> specptr = unspec -> specptr ;
2277
+ void * unspec_fptr = jl_atomic_load_relaxed (& unspec -> specptr .fptr );
2278
+ if (unspec_fptr ) {
2279
+ // wait until invoke and specsigflags are properly set
2280
+ while (!(jl_atomic_load_acquire (& unspec -> specsigflags ) & 0b10 )) {
2281
+ jl_cpu_pause ();
2282
+ }
2283
+ unspec_invoke = jl_atomic_load_relaxed (& unspec -> invoke );
2284
+ }
2285
+ jl_atomic_store_release (& codeinst -> specptr .fptr , unspec_fptr );
2257
2286
codeinst -> rettype_const = unspec -> rettype_const ;
2258
- jl_atomic_store_relaxed (& codeinst -> invoke , jl_atomic_load_relaxed ( & unspec -> invoke ) );
2287
+ jl_atomic_store_release (& codeinst -> invoke , unspec_invoke );
2259
2288
jl_mi_cache_insert (mi , codeinst );
2260
2289
record_precompile_statement (mi );
2261
2290
return codeinst ;
@@ -2272,7 +2301,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
2272
2301
jl_code_instance_t * codeinst = jl_new_codeinst (mi ,
2273
2302
(jl_value_t * )jl_any_type , NULL , NULL ,
2274
2303
0 , 1 , ~(size_t )0 , 0 , 0 , jl_nothing , 0 );
2275
- jl_atomic_store_relaxed (& codeinst -> invoke , jl_fptr_interpret_call );
2304
+ jl_atomic_store_release (& codeinst -> invoke , jl_fptr_interpret_call );
2276
2305
jl_mi_cache_insert (mi , codeinst );
2277
2306
record_precompile_statement (mi );
2278
2307
return codeinst ;
@@ -2289,7 +2318,8 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
2289
2318
jl_method_instance_t * unspec = jl_get_unspecialized_from_mi (mi );
2290
2319
jl_code_instance_t * ucache = jl_get_method_inferred (unspec , (jl_value_t * )jl_any_type , 1 , ~(size_t )0 );
2291
2320
// ask codegen to make the fptr for unspec
2292
- if (jl_atomic_load_acquire (& ucache -> invoke ) == NULL ) {
2321
+ jl_callptr_t ucache_invoke = jl_atomic_load_acquire (& ucache -> invoke );
2322
+ if (ucache_invoke == NULL ) {
2293
2323
if (def -> source == jl_nothing && (jl_atomic_load_relaxed (& ucache -> def -> uninferred ) == jl_nothing ||
2294
2324
jl_atomic_load_relaxed (& ucache -> def -> uninferred ) == NULL )) {
2295
2325
jl_printf (JL_STDERR , "source not available for " );
@@ -2298,19 +2328,29 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
2298
2328
jl_error ("source missing for method that needs to be compiled" );
2299
2329
}
2300
2330
jl_generate_fptr_for_unspecialized (ucache );
2331
+ ucache_invoke = jl_atomic_load_acquire (& ucache -> invoke );
2301
2332
}
2302
- assert (jl_atomic_load_relaxed ( & ucache -> invoke ) != NULL );
2303
- if (jl_atomic_load_relaxed ( & ucache -> invoke ) != jl_fptr_sparam &&
2304
- jl_atomic_load_relaxed ( & ucache -> invoke ) != jl_fptr_interpret_call ) {
2333
+ assert (ucache_invoke != NULL );
2334
+ if (ucache_invoke != jl_fptr_sparam &&
2335
+ ucache_invoke != jl_fptr_interpret_call ) {
2305
2336
// only these care about the exact specTypes, otherwise we can use it directly
2306
2337
return ucache ;
2307
2338
}
2308
2339
codeinst = jl_new_codeinst (mi , (jl_value_t * )jl_any_type , NULL , NULL ,
2309
2340
0 , 1 , ~(size_t )0 , 0 , 0 , jl_nothing , 0 );
2310
- codeinst -> isspecsig = 0 ;
2311
- codeinst -> specptr = ucache -> specptr ;
2341
+ void * unspec_fptr = jl_atomic_load_relaxed (& ucache -> specptr .fptr );
2342
+ if (unspec_fptr ) {
2343
+ // wait until invoke and specsigflags are properly set
2344
+ while (!(jl_atomic_load_acquire (& ucache -> specsigflags ) & 0b10 )) {
2345
+ jl_cpu_pause ();
2346
+ }
2347
+ ucache_invoke = jl_atomic_load_relaxed (& ucache -> invoke );
2348
+ }
2349
+ // unspec is always not specsig, but might use specptr
2350
+ jl_atomic_store_relaxed (& codeinst -> specsigflags , jl_atomic_load_relaxed (& ucache -> specsigflags ) & 0b10 );
2351
+ jl_atomic_store_relaxed (& codeinst -> specptr .fptr , unspec_fptr );
2312
2352
codeinst -> rettype_const = ucache -> rettype_const ;
2313
- jl_atomic_store_relaxed (& codeinst -> invoke , jl_atomic_load_relaxed ( & ucache -> invoke ) );
2353
+ jl_atomic_store_release (& codeinst -> invoke , ucache_invoke );
2314
2354
jl_mi_cache_insert (mi , codeinst );
2315
2355
}
2316
2356
else {
@@ -2328,23 +2368,17 @@ jl_value_t *jl_fptr_const_return(jl_value_t *f, jl_value_t **args, uint32_t narg
2328
2368
jl_value_t * jl_fptr_args (jl_value_t * f , jl_value_t * * args , uint32_t nargs , jl_code_instance_t * m )
2329
2369
{
2330
2370
jl_fptr_args_t invoke = jl_atomic_load_relaxed (& m -> specptr .fptr1 );
2331
- while (1 ) {
2332
- if (invoke )
2333
- return invoke (f , args , nargs );
2334
- invoke = jl_atomic_load_acquire (& m -> specptr .fptr1 ); // require forward progress with acquire annotation
2335
- }
2371
+ assert (invoke && "Forgot to set specptr for jl_fptr_args!" );
2372
+ return invoke (f , args , nargs );
2336
2373
}
2337
2374
2338
2375
jl_value_t * jl_fptr_sparam (jl_value_t * f , jl_value_t * * args , uint32_t nargs , jl_code_instance_t * m )
2339
2376
{
2340
2377
jl_svec_t * sparams = m -> def -> sparam_vals ;
2341
2378
assert (sparams != jl_emptysvec );
2342
2379
jl_fptr_sparam_t invoke = jl_atomic_load_relaxed (& m -> specptr .fptr3 );
2343
- while (1 ) {
2344
- if (invoke )
2345
- return invoke (f , args , nargs , sparams );
2346
- invoke = jl_atomic_load_acquire (& m -> specptr .fptr3 ); // require forward progress with acquire annotation
2347
- }
2380
+ assert (invoke && "Forgot to set specptr for jl_fptr_sparam!" );
2381
+ return invoke (f , args , nargs , sparams );
2348
2382
}
2349
2383
2350
2384
JL_DLLEXPORT jl_callptr_t jl_fptr_args_addr = & jl_fptr_args ;
@@ -2667,7 +2701,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
2667
2701
jl_code_instance_t * codeinst = jl_atomic_load_relaxed (& mfunc -> cache );
2668
2702
while (codeinst ) {
2669
2703
if (codeinst -> min_world <= world && world <= codeinst -> max_world ) {
2670
- jl_callptr_t invoke = jl_atomic_load_relaxed (& codeinst -> invoke );
2704
+ jl_callptr_t invoke = jl_atomic_load_acquire (& codeinst -> invoke );
2671
2705
if (invoke != NULL ) {
2672
2706
jl_value_t * res = invoke (F , args , nargs , codeinst );
2673
2707
return verify_type (res );
@@ -2687,7 +2721,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
2687
2721
errno = last_errno ;
2688
2722
if (jl_options .malloc_log )
2689
2723
jl_gc_sync_total_bytes (last_alloc ); // discard allocation count from compilation
2690
- jl_callptr_t invoke = jl_atomic_load_relaxed (& codeinst -> invoke );
2724
+ jl_callptr_t invoke = jl_atomic_load_acquire (& codeinst -> invoke );
2691
2725
jl_value_t * res = invoke (F , args , nargs , codeinst );
2692
2726
return verify_type (res );
2693
2727
}
0 commit comments