Skip to content

Commit 2d14631

Browse files
lambdageekliveans
authored andcommitted
[mono] Fix deadlock in static constructor initialization (dotnet#93875)
* [mono] Fix deadlock in static constructor initialization If two threads (A and B) need to call the static constructors for 3 classes X, Y, Z in this order: Thread A: X, Z, Y Thread B: Y, Z, X where the cctor for X in thread A invokes the cctor for Z and for Y, and the cctor for Y in thread B invokes the cctor for Z and X, then we can get into a situation where A and B both start the cctors for X and Y (so they will be in the type_initialization_hash for those two classes) and then both will try to init Z. In that case it could be that A will be responsible for initializing Z and B will block. Then A could finish initializing Z but B may not have woken up yet (and so it will be in blocked_thread_hash waiting for Z). At that point A (who is at this point already need to init Y) may think that it can wait for B to finish initializing Y. That is we get to `mono_coop_cond_timedwait (&lock->cond, &lock->mutex, timeout_ms)` with "lock" being the lock for `Y`. But in fact thread B will not be able to complete initializing Y because it will attempt to init X next - but meanwhile we did not indicate that thread A is blocked. As a result in thread A the timed wait will eventually timeout. And in this case we need to go back to the top and now correctly detect that A is waiting for Y and B is waiting for X. (At that point there is a cctor deadlock and ECMA rules allow one of the threads to return without calling the cctor) The old code here used to do an infinite wait: while (!lock->done) mono_coop_cond_wait (&lock->cond, &lock->mutex) This cannot succeed because "lock" (in thread A it's the lock for Y) will not be signaled since B (who is supposed to init Y) will instead block on the cctor for X. Fixes dotnet#93778 * Add test case * remove prototyping log messages * disable mt test on wasm * code review feedback
1 parent 32e3c55 commit 2d14631

File tree

4 files changed

+243
-2
lines changed

4 files changed

+243
-2
lines changed

src/mono/mono/metadata/object.c

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,7 @@ mono_runtime_class_init_full (MonoVTable *vtable, MonoError *error)
457457
* on this cond var.
458458
*/
459459

460+
retry_top:
460461
mono_type_initialization_lock ();
461462
/* double check... */
462463
if (vtable->initialized) {
@@ -506,6 +507,12 @@ mono_runtime_class_init_full (MonoVTable *vtable, MonoError *error)
506507
blocked = GUINT_TO_POINTER (MONO_NATIVE_THREAD_ID_TO_UINT (lock->initializing_tid));
507508
while ((pending_lock = (TypeInitializationLock*) g_hash_table_lookup (blocked_thread_hash, blocked))) {
508509
if (mono_native_thread_id_equals (pending_lock->initializing_tid, tid)) {
510+
if (mono_trace_is_traced (G_LOG_LEVEL_DEBUG, MONO_TRACE_TYPE)) {
511+
char* type_name = mono_type_full_name (m_class_get_byval_arg (klass));
512+
mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_TYPE, "Detected deadlock for class .cctor for %s from '%s'", type_name, m_class_get_image (klass)->name);
513+
g_free (type_name);
514+
}
515+
509516
if (!pending_lock->done) {
510517
mono_type_initialization_unlock ();
511518
goto return_true;
@@ -604,9 +611,49 @@ mono_runtime_class_init_full (MonoVTable *vtable, MonoError *error)
604611
} else {
605612
/* this just blocks until the initializing thread is done */
606613
mono_type_init_lock (lock);
607-
while (!lock->done)
608-
mono_coop_cond_wait (&lock->cond, &lock->mutex);
614+
if (!lock->done) {
615+
int timeout_ms = 500;
616+
int wait_result = mono_coop_cond_timedwait (&lock->cond, &lock->mutex, timeout_ms);
617+
if (wait_result == -1) {
618+
/* timed out - go around again from the beginning. If we got here
619+
* from the "is_blocked = FALSE" case, above (another thread was
620+
* blocked on the current thread, but on a lock that was already
621+
* done but it didn't get to wake up yet), then it might still be
622+
* the case that the current thread cannot proceed even if the other
623+
* thread got to wake up - there might be a new deadlock. We need
624+
* to re-evaluate.
625+
*
626+
* This can happen if two threads A and B need to call the cctors
627+
* for classes X and Y but in opposite orders, and also call a cctor
628+
* for a third class Z. (That is thread A wants to init: X, Z, Y;
629+
* thread B wants to init: Y, Z, X.) In that case, if B is waiting
630+
* for A to finish initializing Z, and A (the current thread )
631+
* already finished Z and wants to init Y. In A, control will come
632+
* here with "lock" being Y's lock. But we will time out because B
633+
* will see that A is responsible for initializing X and will also
634+
* block. So A is waiting for B to finish Y and B is waiting for A
635+
* to finish X. So the fact that A allowed B to wait for Z to
636+
* finish didn't actually let us make progress. Thread A must go
637+
* around to the top once more and try to init Y - and detect that
638+
* there is now a deadlock between X and Y.
639+
*/
640+
mono_type_init_unlock (lock);
641+
// clean up blocked thread hash and lock refcount.
642+
mono_type_initialization_lock ();
643+
g_hash_table_remove (blocked_thread_hash, GUINT_TO_POINTER (tid));
644+
gboolean deleted = unref_type_lock (lock);
645+
if (deleted)
646+
g_hash_table_remove (type_initialization_hash, vtable);
647+
mono_type_initialization_unlock ();
648+
goto retry_top;
649+
} else if (wait_result == 0) {
650+
/* Success: we were signaled that the other thread is done. Proceed */
651+
} else {
652+
g_assert_not_reached ();
653+
}
654+
}
609655
mono_type_init_unlock (lock);
656+
g_assert (lock->done);
610657
}
611658

612659
/* Do cleanup and setting vtable->initialized inside the global lock again */
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Runtime.CompilerServices;
6+
using System.Threading;
7+
8+
using Xunit;
9+
10+
// Regression test for https://github.com/dotnet/runtime/issues/93778
11+
namespace CircularCctorTwoThreadsThreeCC;
12+
13+
[Flags]
14+
public enum SlotConstants : int
15+
{
16+
ZInited = 1,
17+
YInitedFromX = 2,
18+
XInitedFromY = 4,
19+
20+
Thread1 = 1 << 16,
21+
Thread2 = 2 << 16,
22+
}
23+
24+
/// X and Y both try to use each other, and also both use Z.
25+
/// We expect to see exactly one thread initialize Z and
26+
/// either X inited from Y or Y inited from X.
27+
public class X
28+
{
29+
public static X Singleton = new();
30+
private X() {
31+
Z.Singleton.Ping();
32+
Y.Singleton?.Pong();
33+
}
34+
35+
public void Pong() => Coordinator.Note(SlotConstants.XInitedFromY);
36+
}
37+
38+
public class Y
39+
{
40+
public static Y Singleton = new();
41+
private Y() {
42+
Z.Singleton.Ping();
43+
X.Singleton?.Pong();
44+
}
45+
46+
public void Pong() => Coordinator.Note(SlotConstants.YInitedFromX);
47+
}
48+
49+
public class Z
50+
{
51+
public static Z Singleton = new();
52+
53+
private Z() {
54+
Coordinator.Note(SlotConstants.ZInited);
55+
}
56+
57+
public void Ping() { }
58+
59+
}
60+
61+
public class Coordinator
62+
{
63+
[ThreadStatic]
64+
private static SlotConstants t_threadTag;
65+
66+
private static int s_NextNote;
67+
private static readonly SlotConstants[] Notes = new SlotConstants[12];
68+
69+
private static SlotConstants DecorateWithThread(SlotConstants c)
70+
{
71+
return c | t_threadTag;
72+
}
73+
74+
public static void Note(SlotConstants s) {
75+
int idx = Interlocked.Increment(ref s_NextNote);
76+
idx--;
77+
Notes[idx] = DecorateWithThread (s);
78+
}
79+
80+
public static Coordinator CreateThread(bool xThenY, SlotConstants threadTag)
81+
{
82+
return new Coordinator(xThenY, threadTag);
83+
}
84+
85+
public readonly Thread Thread;
86+
private static readonly Barrier s_barrier = new (3);
87+
88+
private Coordinator(bool xThenY, SlotConstants threadTag)
89+
{
90+
var t = new Thread(() => {
91+
t_threadTag = threadTag;
92+
// Log("started");
93+
NextPhase();
94+
// Log("racing");
95+
DoConstructions(xThenY);
96+
NextPhase();
97+
// Log("done");
98+
});
99+
Thread = t;
100+
t.Start();
101+
}
102+
103+
public static void NextPhase() { s_barrier.SignalAndWait(); }
104+
105+
[MethodImpl(MethodImplOptions.NoInlining)]
106+
public static void DoConstructions(bool xThenY)
107+
{
108+
if (xThenY) {
109+
XCreate();
110+
} else {
111+
YCreate();
112+
}
113+
}
114+
115+
[MethodImpl(MethodImplOptions.NoInlining)]
116+
private static void XCreate()
117+
{
118+
var _ = X.Singleton;
119+
}
120+
121+
[MethodImpl(MethodImplOptions.NoInlining)]
122+
private static void YCreate()
123+
{
124+
var _ = Y.Singleton;
125+
}
126+
127+
public static void Log(string msg)
128+
{
129+
Console.WriteLine ($"{Thread.CurrentThread.ManagedThreadId}: {msg}");
130+
}
131+
132+
[Fact]
133+
public static void RunTestCase()
134+
{
135+
var c1 = CreateThread(xThenY: true, threadTag: SlotConstants.Thread1);
136+
var c2 = CreateThread(xThenY: false, threadTag: SlotConstants.Thread2);
137+
// created all threads
138+
NextPhase();
139+
// racing
140+
NextPhase();
141+
// done
142+
143+
// one second should be plenty for all these threads, but it's arbitrary
144+
int threadJoinTimeoutMS = 1000;
145+
var j1 = c1.Thread.Join(threadJoinTimeoutMS);
146+
var j2 = c2.Thread.Join(threadJoinTimeoutMS);
147+
Assert.True(j1);
148+
Assert.True(j2);
149+
// all joined
150+
151+
// exactly one thread inited Z
152+
Assert.Equal(1, CountNotes(SlotConstants.ZInited));
153+
// either X was inited or Y, not both.
154+
Assert.Equal(1, Count2Notes(SlotConstants.XInitedFromY, SlotConstants.YInitedFromX));
155+
}
156+
157+
private static int CountNotes(SlotConstants mask)
158+
{
159+
int found = 0;
160+
foreach (var note in Notes) {
161+
if ((note & mask) != (SlotConstants)0) {
162+
found++;
163+
}
164+
}
165+
return found;
166+
}
167+
168+
private static int Count2Notes(SlotConstants mask1, SlotConstants mask2)
169+
{
170+
int found = 0;
171+
foreach (var note in Notes) {
172+
if ((note & mask1) != (SlotConstants)0) {
173+
found++;
174+
}
175+
if ((note & mask2) != (SlotConstants)0) {
176+
found++;
177+
}
178+
}
179+
return found;
180+
}
181+
182+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
4+
<CLRTestPriority>0</CLRTestPriority>
5+
</PropertyGroup>
6+
<ItemGroup>
7+
<Compile Include="CircularCctorTwoThreadsThreeCC.cs" />
8+
</ItemGroup>
9+
</Project>

src/tests/issues.targets

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3221,6 +3221,9 @@
32213221
<ExcludeList Include = "$(XunitTestBinBase)/tracing/eventcounter/pollingcounter/**">
32223222
<Issue>System.Threading.Thread.UnsafeStart not supported</Issue>
32233223
</ExcludeList>
3224+
<ExcludeList Include = "$(XunitTestBinBase)/Loader/classloader/TypeInitialization/CircularCctors/CircularCctorTwoThreadsThreeCC/**">
3225+
<Issue>System.Threading.Thread.ThrowIfNoThreadStart: PlatformNotSupportedException</Issue>
3226+
</ExcludeList>
32243227
<ExcludeList Include = "$(XunitTestBinBase)/tracing/eventcounter/gh53564/**">
32253228
<Issue>Could not load legacy Microsoft.Diagnostics.Tools.RuntimeClient</Issue>
32263229
</ExcludeList>

0 commit comments

Comments
 (0)