diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index fc2370a0759000..2fcceff6d0dd85 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -95,7 +95,7 @@ GVAL_IMPL_INIT(PTR_VOID, g_RhpRethrow2Addr, PointerToRhpRethrow2); StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PInvokeTransitionFrame* pInitialTransitionFrame) { STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ GC ]\n"); - ASSERT(!pThreadToWalk->DangerousCrossThreadIsHijacked()); + ASSERT(!pThreadToWalk->IsHijacked()); if (pInitialTransitionFrame == INTERRUPTED_THREAD_MARKER) { @@ -1463,7 +1463,7 @@ void StackFrameIterator::NextInternal() else { // if the thread is safe to walk, it better not have a hijack in place. - ASSERT((ThreadStore::GetCurrentThread() == m_pThread) || !m_pThread->DangerousCrossThreadIsHijacked()); + ASSERT(!m_pThread->IsHijacked()); SetControlPC(dac_cast(*(m_RegDisplay.GetAddrOfIP()))); diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index 91199d29a9202a..e3d214a6850cef 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -77,6 +77,10 @@ void Thread::WaitForGC(PInvokeTransitionFrame* pTransitionFrame) // set preemptive mode VolatileStoreWithoutBarrier(&m_pTransitionFrame, pTransitionFrame); +#ifdef FEATURE_SUSPEND_REDIRECTION + ClearState(TSF_Redirected); +#endif //FEATURE_SUSPEND_REDIRECTION + RedhawkGCInterface::WaitForGCCompletion(); // must be in cooperative mode when checking the trap flag @@ -587,6 +591,14 @@ void Thread::Hijack() return; } +#ifdef FEATURE_SUSPEND_REDIRECTION + // if the thread is redirected, leave it as-is. + if (IsStateSet(TSF_Redirected)) + { + return; + } +#endif //FEATURE_SUSPEND_REDIRECTION + // PalHijack will call HijackCallback or make the target thread call it. // It may also do nothing if the target thread is in inconvenient state. PalHijack(m_hPalThread, this); @@ -806,6 +818,8 @@ bool Thread::Redirect() if (!PalSetThreadContext(m_hPalThread, redirectionContext)) return false; + // the thread will now inevitably try to suspend + SetState(TSF_Redirected); redirectionContext->SetIp(origIP); STRESS_LOG2(LF_STACKWALK, LL_INFO10000, "InternalRedirect: TgtThread = %llx, IP = %p\n", @@ -887,55 +901,16 @@ void Thread::UnhijackWorker() m_uHijackedReturnValueFlags = 0; } -// @TODO: it would be very, very nice if we did not have to bleed knowledge of hijacking -// and hijack state to other components in the runtime. For now, these are only used -// when getting EH info during exception dispatch. We should find a better way to encapsulate -// this. bool Thread::IsHijacked() { - // Note: this operation is only valid from the current thread. If one thread invokes - // this on another then it may be racing with other changes to the thread's hijack state. - ASSERT(ThreadStore::GetCurrentThread() == this); - - return m_pvHijackedReturnAddress != NULL; -} + ASSERT(((ThreadStore::GetCurrentThread() == this) && IsCurrentThreadInCooperativeMode()) || + ThreadStore::GetCurrentThread()->IsGCSpecial() || + ThreadStore::GetCurrentThread() == ThreadStore::GetSuspendingThread() + ); -// -// WARNING: This method must ONLY be called during stackwalks when we believe that all threads are -// synchronized and there is no other thread racing with us trying to apply hijacks. -// -bool Thread::DangerousCrossThreadIsHijacked() -{ - // If we have a CachedTransitionFrame available, then we're in the proper state. Otherwise, this method - // was called from an improper state. - ASSERT(GetTransitionFrame() != NULL); return m_pvHijackedReturnAddress != NULL; } -void * Thread::GetHijackedReturnAddress() -{ - // Note: this operation is only valid from the current thread. If one thread invokes - // this on another then it may be racing with other changes to the thread's hijack state. - ASSERT(IsHijacked()); - ASSERT(ThreadStore::GetCurrentThread() == this); - - return m_pvHijackedReturnAddress; -} - -void * Thread::GetUnhijackedReturnAddress(void ** ppvReturnAddressLocation) -{ - ASSERT(ThreadStore::GetCurrentThread() == this); - - void * pvReturnAddress; - if (m_ppvHijackedReturnAddressLocation == ppvReturnAddressLocation) - pvReturnAddress = m_pvHijackedReturnAddress; - else - pvReturnAddress = *ppvReturnAddressLocation; - - ASSERT(GetRuntimeInstance()->IsManaged(pvReturnAddress)); - return pvReturnAddress; -} - void Thread::SetState(ThreadStateFlags flags) { PalInterlockedOr(&m_ThreadStateFlags, flags); diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h index 2c0881acf2fd16..eeebabf05ea8aa 100644 --- a/src/coreclr/nativeaot/Runtime/thread.h +++ b/src/coreclr/nativeaot/Runtime/thread.h @@ -136,6 +136,12 @@ class Thread : private ThreadBuffer #ifdef FEATURE_GC_STRESS TSF_IsRandSeedSet = 0x00000040, // set to indicate the random number generator for GCStress was inited #endif // FEATURE_GC_STRESS + +#ifdef FEATURE_SUSPEND_REDIRECTION + TSF_Redirected = 0x00000080, // Set to indicate the thread is redirected and will inevitably + // suspend once resumed. + // If we see this flag, we skip hijacking as an optimization. +#endif //FEATURE_SUSPEND_REDIRECTION }; private: @@ -197,13 +203,11 @@ class Thread : private ThreadBuffer void Hijack(); void Unhijack(); + bool IsHijacked(); + #ifdef FEATURE_GC_STRESS static void HijackForGcStress(PAL_LIMITED_CONTEXT * pSuspendCtx); #endif // FEATURE_GC_STRESS - bool IsHijacked(); - void * GetHijackedReturnAddress(); - void * GetUnhijackedReturnAddress(void** ppvReturnAddressLocation); - bool DangerousCrossThreadIsHijacked(); bool IsSuppressGcStressSet(); void SetSuppressGcStress(); @@ -230,7 +234,7 @@ class Thread : private ThreadBuffer #endif // DACCESS_COMPILE #ifdef FEATURE_GC_STRESS void SetRandomSeed(uint32_t seed); - uint32_t NextRand(); + uint32_t NextRand(); bool IsRandInited(); #endif // FEATURE_GC_STRESS PTR_ExInfo GetCurExInfo(); diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp index 772787246bc8da..f928e83f4ba785 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.cpp +++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp @@ -196,6 +196,35 @@ void ThreadStore::UnlockThreadStore() m_Lock.ReleaseReadLock(); } +// exponential spinwait with an approximate time limit for waiting in microsecond range. +// when iteration == -1, only usecLimit is used +void SpinWait(int iteration, int usecLimit) +{ + LARGE_INTEGER li; + PalQueryPerformanceCounter(&li); + int64_t startTicks = li.QuadPart; + + PalQueryPerformanceFrequency(&li); + int64_t ticksPerSecond = li.QuadPart; + int64_t endTicks = startTicks + (usecLimit * ticksPerSecond) / 1000000; + + int l = min((unsigned)iteration, 30); + for (int i = 0; i < l; i++) + { + for (int j = 0; j < (1 << i); j++) + { + System_YieldProcessor(); + } + + PalQueryPerformanceCounter(&li); + int64_t currentTicks = li.QuadPart; + if (currentTicks > endTicks) + { + break; + } + } +} + void ThreadStore::SuspendAllThreads(bool waitForGCEvent) { Thread * pThisThread = GetCurrentThreadIfAvailable(); @@ -216,12 +245,16 @@ void ThreadStore::SuspendAllThreads(bool waitForGCEvent) // reason for this is that we essentially implement Dekker's algorithm, which requires write ordering. PalFlushProcessWriteBuffers(); - bool keepWaiting; - YieldProcessorNormalizationInfo normalizationInfo; - int waitCycles = 1; - do + int retries = 0; + int prevRemaining = 0; + int remaining = 0; + bool observeOnly = false; + + while(true) { - keepWaiting = false; + prevRemaining = remaining; + remaining = 0; + FOREACH_THREAD(pTargetThread) { if (pTargetThread == pThisThread) @@ -229,37 +262,40 @@ void ThreadStore::SuspendAllThreads(bool waitForGCEvent) if (!pTargetThread->CacheTransitionFrameForSuspend()) { - // We drive all threads to preemptive mode by hijacking them with return-address hijack. - keepWaiting = true; - pTargetThread->Hijack(); + remaining++; + if (!observeOnly) + { + pTargetThread->Hijack(); + } } } END_FOREACH_THREAD - if (keepWaiting) + if (!remaining) + break; + + // if we see progress or have just done a hijacking pass + // do not hijack in the next iteration + if (remaining < prevRemaining || !observeOnly) { - if (PalSwitchToThread() == 0 && g_RhNumberOfProcessors > 1) + // 5 usec delay, then check for more progress + SpinWait(-1, 5); + observeOnly = true; + } + else + { + SpinWait(retries++, 100); + observeOnly = false; + + // make sure our spining is not starving other threads, but not too often, + // this can cause a 1-15 msec delay, depending on OS, and that is a lot while + // very rarely needed, since threads are supposed to be releasing their CPUs + if ((retries & 127) == 0) { - // No threads are scheduled on this processor. Perhaps we're waiting for a thread - // that's scheduled on another processor. If so, let's give it a little time - // to make forward progress. - // Note that we do not call Sleep, because the minimum granularity of Sleep is much - // too long (we probably don't need a 15ms wait here). Instead, we'll just burn some - // cycles. - // @TODO: need tuning for spin - // @TODO: need tuning for this whole loop as well. - // we are likley too aggressive with interruptions which may result in longer pauses. - YieldProcessorNormalizedForPreSkylakeCount(normalizationInfo, waitCycles); - - // simplistic linear backoff for now - // we could be catching threads in restartable sequences such as LL/SC style interlocked on ARM64 - // and forcing them to restart. - // if interrupt mechanism is fast, eagerness could be hurting our overall progress. - waitCycles += 10000; + PalSwitchToThread(); } } - - } while (keepWaiting); + } #if defined(TARGET_ARM) || defined(TARGET_ARM64) // Flush the store buffers on all CPUs, to ensure that all changes made so far are seen