Skip to content

[NativeAOT] Follow up changes for the suspension loop routine. #73741

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ GVAL_IMPL_INIT(PTR_VOID, g_RhpRethrow2Addr, PointerToRhpRethrow2);
StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PInvokeTransitionFrame* pInitialTransitionFrame)
{
STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ GC ]\n");
ASSERT(!pThreadToWalk->DangerousCrossThreadIsHijacked());
ASSERT(!pThreadToWalk->IsHijacked());

if (pInitialTransitionFrame == INTERRUPTED_THREAD_MARKER)
{
Expand Down Expand Up @@ -1463,7 +1463,7 @@ void StackFrameIterator::NextInternal()
else
{
// if the thread is safe to walk, it better not have a hijack in place.
ASSERT((ThreadStore::GetCurrentThread() == m_pThread) || !m_pThread->DangerousCrossThreadIsHijacked());
ASSERT(!m_pThread->IsHijacked());

SetControlPC(dac_cast<PTR_VOID>(*(m_RegDisplay.GetAddrOfIP())));

Expand Down
61 changes: 18 additions & 43 deletions src/coreclr/nativeaot/Runtime/thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ void Thread::WaitForGC(PInvokeTransitionFrame* pTransitionFrame)
// set preemptive mode
VolatileStoreWithoutBarrier(&m_pTransitionFrame, pTransitionFrame);

#ifdef FEATURE_SUSPEND_REDIRECTION
ClearState(TSF_Redirected);
#endif //FEATURE_SUSPEND_REDIRECTION

RedhawkGCInterface::WaitForGCCompletion();

// must be in cooperative mode when checking the trap flag
Expand Down Expand Up @@ -587,6 +591,14 @@ void Thread::Hijack()
return;
}

#ifdef FEATURE_SUSPEND_REDIRECTION
// if the thread is redirected, leave it as-is.
if (IsStateSet(TSF_Redirected))
{
return;
}
#endif //FEATURE_SUSPEND_REDIRECTION

// PalHijack will call HijackCallback or make the target thread call it.
// It may also do nothing if the target thread is in inconvenient state.
PalHijack(m_hPalThread, this);
Expand Down Expand Up @@ -806,6 +818,8 @@ bool Thread::Redirect()
if (!PalSetThreadContext(m_hPalThread, redirectionContext))
return false;

// the thread will now inevitably try to suspend
SetState(TSF_Redirected);
redirectionContext->SetIp(origIP);

STRESS_LOG2(LF_STACKWALK, LL_INFO10000, "InternalRedirect: TgtThread = %llx, IP = %p\n",
Expand Down Expand Up @@ -887,55 +901,16 @@ void Thread::UnhijackWorker()
m_uHijackedReturnValueFlags = 0;
}

// @TODO: it would be very, very nice if we did not have to bleed knowledge of hijacking
// and hijack state to other components in the runtime. For now, these are only used
// when getting EH info during exception dispatch. We should find a better way to encapsulate
// this.
bool Thread::IsHijacked()
{
// Note: this operation is only valid from the current thread. If one thread invokes
// this on another then it may be racing with other changes to the thread's hijack state.
ASSERT(ThreadStore::GetCurrentThread() == this);

return m_pvHijackedReturnAddress != NULL;
}
ASSERT(((ThreadStore::GetCurrentThread() == this) && IsCurrentThreadInCooperativeMode()) ||
ThreadStore::GetCurrentThread()->IsGCSpecial() ||
ThreadStore::GetCurrentThread() == ThreadStore::GetSuspendingThread()
);

//
// WARNING: This method must ONLY be called during stackwalks when we believe that all threads are
// synchronized and there is no other thread racing with us trying to apply hijacks.
//
bool Thread::DangerousCrossThreadIsHijacked()
{
// If we have a CachedTransitionFrame available, then we're in the proper state. Otherwise, this method
// was called from an improper state.
ASSERT(GetTransitionFrame() != NULL);
return m_pvHijackedReturnAddress != NULL;
}

void * Thread::GetHijackedReturnAddress()
{
// Note: this operation is only valid from the current thread. If one thread invokes
// this on another then it may be racing with other changes to the thread's hijack state.
ASSERT(IsHijacked());
ASSERT(ThreadStore::GetCurrentThread() == this);

return m_pvHijackedReturnAddress;
}

void * Thread::GetUnhijackedReturnAddress(void ** ppvReturnAddressLocation)
{
ASSERT(ThreadStore::GetCurrentThread() == this);

void * pvReturnAddress;
if (m_ppvHijackedReturnAddressLocation == ppvReturnAddressLocation)
pvReturnAddress = m_pvHijackedReturnAddress;
else
pvReturnAddress = *ppvReturnAddressLocation;

ASSERT(GetRuntimeInstance()->IsManaged(pvReturnAddress));
return pvReturnAddress;
}

void Thread::SetState(ThreadStateFlags flags)
{
PalInterlockedOr(&m_ThreadStateFlags, flags);
Expand Down
14 changes: 9 additions & 5 deletions src/coreclr/nativeaot/Runtime/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,12 @@ class Thread : private ThreadBuffer
#ifdef FEATURE_GC_STRESS
TSF_IsRandSeedSet = 0x00000040, // set to indicate the random number generator for GCStress was inited
#endif // FEATURE_GC_STRESS

#ifdef FEATURE_SUSPEND_REDIRECTION
TSF_Redirected = 0x00000080, // Set to indicate the thread is redirected and will inevitably
// suspend once resumed.
// If we see this flag, we skip hijacking as an optimization.
#endif //FEATURE_SUSPEND_REDIRECTION
};
private:

Expand Down Expand Up @@ -197,13 +203,11 @@ class Thread : private ThreadBuffer

void Hijack();
void Unhijack();
bool IsHijacked();

#ifdef FEATURE_GC_STRESS
static void HijackForGcStress(PAL_LIMITED_CONTEXT * pSuspendCtx);
#endif // FEATURE_GC_STRESS
bool IsHijacked();
void * GetHijackedReturnAddress();
void * GetUnhijackedReturnAddress(void** ppvReturnAddressLocation);
bool DangerousCrossThreadIsHijacked();

bool IsSuppressGcStressSet();
void SetSuppressGcStress();
Expand All @@ -230,7 +234,7 @@ class Thread : private ThreadBuffer
#endif // DACCESS_COMPILE
#ifdef FEATURE_GC_STRESS
void SetRandomSeed(uint32_t seed);
uint32_t NextRand();
uint32_t NextRand();
bool IsRandInited();
#endif // FEATURE_GC_STRESS
PTR_ExInfo GetCurExInfo();
Expand Down
92 changes: 64 additions & 28 deletions src/coreclr/nativeaot/Runtime/threadstore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,35 @@ void ThreadStore::UnlockThreadStore()
m_Lock.ReleaseReadLock();
}

// exponential spinwait with an approximate time limit for waiting in microsecond range.
// when iteration == -1, only usecLimit is used
void SpinWait(int iteration, int usecLimit)
{
LARGE_INTEGER li;
PalQueryPerformanceCounter(&li);
int64_t startTicks = li.QuadPart;

PalQueryPerformanceFrequency(&li);
int64_t ticksPerSecond = li.QuadPart;
int64_t endTicks = startTicks + (usecLimit * ticksPerSecond) / 1000000;

int l = min((unsigned)iteration, 30);
for (int i = 0; i < l; i++)
{
for (int j = 0; j < (1 << i); j++)
{
System_YieldProcessor();
}

PalQueryPerformanceCounter(&li);
int64_t currentTicks = li.QuadPart;
if (currentTicks > endTicks)
{
break;
}
}
}

void ThreadStore::SuspendAllThreads(bool waitForGCEvent)
{
Thread * pThisThread = GetCurrentThreadIfAvailable();
Expand All @@ -216,50 +245,57 @@ void ThreadStore::SuspendAllThreads(bool waitForGCEvent)
// reason for this is that we essentially implement Dekker's algorithm, which requires write ordering.
PalFlushProcessWriteBuffers();

bool keepWaiting;
YieldProcessorNormalizationInfo normalizationInfo;
int waitCycles = 1;
do
int retries = 0;
int prevRemaining = 0;
int remaining = 0;
bool observeOnly = false;

while(true)
{
keepWaiting = false;
prevRemaining = remaining;
remaining = 0;

FOREACH_THREAD(pTargetThread)
{
if (pTargetThread == pThisThread)
continue;

if (!pTargetThread->CacheTransitionFrameForSuspend())
{
// We drive all threads to preemptive mode by hijacking them with return-address hijack.
keepWaiting = true;
pTargetThread->Hijack();
remaining++;
if (!observeOnly)
{
pTargetThread->Hijack();
}
}
}
END_FOREACH_THREAD

if (keepWaiting)
if (!remaining)
break;

// if we see progress or have just done a hijacking pass
// do not hijack in the next iteration
if (remaining < prevRemaining || !observeOnly)
{
if (PalSwitchToThread() == 0 && g_RhNumberOfProcessors > 1)
// 5 usec delay, then check for more progress
SpinWait(-1, 5);
observeOnly = true;
}
else
{
SpinWait(retries++, 100);
observeOnly = false;

// make sure our spining is not starving other threads, but not too often,
// this can cause a 1-15 msec delay, depending on OS, and that is a lot while
// very rarely needed, since threads are supposed to be releasing their CPUs
if ((retries & 127) == 0)
{
// No threads are scheduled on this processor. Perhaps we're waiting for a thread
// that's scheduled on another processor. If so, let's give it a little time
// to make forward progress.
// Note that we do not call Sleep, because the minimum granularity of Sleep is much
// too long (we probably don't need a 15ms wait here). Instead, we'll just burn some
// cycles.
// @TODO: need tuning for spin
// @TODO: need tuning for this whole loop as well.
// we are likley too aggressive with interruptions which may result in longer pauses.
YieldProcessorNormalizedForPreSkylakeCount(normalizationInfo, waitCycles);

// simplistic linear backoff for now
// we could be catching threads in restartable sequences such as LL/SC style interlocked on ARM64
// and forcing them to restart.
// if interrupt mechanism is fast, eagerness could be hurting our overall progress.
waitCycles += 10000;
PalSwitchToThread();
}
}

} while (keepWaiting);
}

#if defined(TARGET_ARM) || defined(TARGET_ARM64)
// Flush the store buffers on all CPUs, to ensure that all changes made so far are seen
Expand Down