-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Open
Labels
Milestone
Description
We observe increase in average GC time of .NET 6 (6.0-preview7 and v6.0.0) in comparison to .NET Core 3.1 (3.1.3 and v3.1.20). This slowdown is observed only on arm/arm64, there's no slowdown on x64.
test:
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
namespace gctest
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Hello World!");
RandomArray();
}
static List<Object> cacheObj = new List<Object>();
static private void RandomArray()
{
while (true)
{
Random r = new Random();
if (r.Next(100) == 0)
{
cacheObj = null;
cacheObj = new List<Object>();
}
else
{
byte[] buffer = new byte[r.Next(400000) + 1000];
for (int i = 0; i < buffer.Length; ++i)
{
buffer[i] = (byte)r.Next();
}
cacheObj.Add(buffer);
}
}
}
}
}1) v3.1.20 arm64 (rpi4, ubuntu 20.10)
coreclr: 63ecd17b41a826d8fbc77b238e8490df7bc92af4, v3.1.20 tag
corefx: c2011c5350eba4c83554205d06b9ed97c3fccf05, v3.1.20 tag
Patch to calculate time:
diff --git a/src/gc/gcee.cpp b/src/gc/gcee.cpp
index c75ba9bb238..eecfd37c729 100644
--- a/src/gc/gcee.cpp
+++ b/src/gc/gcee.cpp
@@ -20,6 +20,8 @@ uint32_t g_percentTimeInGCSinceLastGC = 0;
size_t g_GenerationSizes[NUMBERGENERATIONS];
size_t g_GenerationPromotedSizes[NUMBERGENERATIONS];
+uint64_t GetHighPrecisionTimeStamp();
+
void GCHeap::UpdatePreGCCounters()
{
#ifdef MULTIPLE_HEAPS
@@ -86,10 +88,16 @@ void GCHeap::UpdatePreGCCounters()
uint64_t rangeReserved = static_cast<uint64_t>(rangeEndReserved - rangeStart);
FIRE_EVENT(GCGenerationRange, generation, rangeStart, range, rangeReserved);
}, nullptr);
+
+ uint64_t end = GetHighPrecisionTimeStamp();
+ printf("GCHeap::UpdatePreGCCounters end -> %llu\n", (long long unsigned) end);
}
void GCHeap::UpdatePostGCCounters()
{
+ uint64_t start = GetHighPrecisionTimeStamp();
+ printf("GCHeap::UpdatePostGCCounters start -> %llu\n", (long long unsigned) start);
+
totalSurvivedSize = gc_heap::get_total_survived_size();
//
@@ -268,8 +276,6 @@ size_t GCHeap::GetLastGCDuration(int generation)
return dd_gc_elapsed_time (hp->dynamic_data_of (generation));
}
-size_t GetHighPrecisionTimeStamp();
-
size_t GCHeap::GetNow()
{
return GetHighPrecisionTimeStamp();Also, small change is needed to support clang10 build (simply add clang10 option to build.sh).
How to build:
# coreclr
ROOTFS_DIR=`pwd`/rootfs/arm64 ./build.sh -portablebuild=false -clang10 -cross -arm64 -Release -skipcrossgen
# corefx
ROOTFS_DIR=`pwd/rootfs/arm64 src/Native/build-native.sh -cross -clang10 -Release -arm64
ROOTFS_DIR=`pwd/rootfs/arm64 ./build.sh -c Release /p:BinPlaceNETCoreAppPackage=true /p:BuildNative=false /p:UseSharedCompilation=falseResult:
Count GC: 3385
Total GC time: 13.116 seconds
Average GC time (post start - pre end): 0.0038747415066469723 seconds
Max GC time: 0.054 seconds
2) v6.0.0 arm64 (rpi4, ubuntu 20.10)
4822e3c, v6.0.0 tag
Patch to calculate time:
diff --git a/src/coreclr/gc/gcee.cpp b/src/coreclr/gc/gcee.cpp
index afc5d3a8dd2..3b24911d130 100644
--- a/src/coreclr/gc/gcee.cpp
+++ b/src/coreclr/gc/gcee.cpp
@@ -19,6 +19,8 @@ uint32_t g_percentTimeInGCSinceLastGC = 0;
size_t g_GenerationSizes[total_generation_count];
size_t g_GenerationPromotedSizes[total_generation_count];
+uint64_t GetHighPrecisionTimeStamp();
+
void GCHeap::UpdatePreGCCounters()
{
#ifdef MULTIPLE_HEAPS
@@ -54,6 +56,9 @@ void GCHeap::UpdatePreGCCounters()
FIRE_EVENT(GCStart_V2, count, depth, reason, static_cast<uint32_t>(type));
ReportGenerationBounds();
+
+ uint64_t end = GetHighPrecisionTimeStamp();
+ printf("GCHeap::UpdatePreGCCounters end -> %llu\n", (long long unsigned) end);
}
void GCHeap::ReportGenerationBounds()
@@ -71,6 +76,9 @@ void GCHeap::ReportGenerationBounds()
void GCHeap::UpdatePostGCCounters()
{
+ uint64_t start = GetHighPrecisionTimeStamp();
+ printf("GCHeap::UpdatePostGCCounters start -> %llu\n", (long long unsigned) start);
+
totalSurvivedSize = gc_heap::get_total_survived_size();
//
@@ -243,8 +251,6 @@ size_t GCHeap::GetLastGCDuration(int generation)
return (size_t)(dd_gc_elapsed_time (hp->dynamic_data_of (generation)) / 1000);
}
-uint64_t GetHighPrecisionTimeStamp();
-
size_t GCHeap::GetNow()
{
return (size_t)(GetHighPrecisionTimeStamp() / 1000);How to build:
ROOTFS_DIR=`pwd`/.tools/rootfs/arm64 ./build.sh --portablebuild false --cross --clang10 --arch arm64 --runtimeConfiguration Release --librariesConfiguration Release --subset clr+libs.native /p:EnableSourceLink=false
ROOTFS_DIR=`pwd`/.tools/rootfs/arm64 ./build.sh --cross --clang10 --arch arm64 --runtimeConfiguration Release --librariesConfiguration Release --subset libs.ref+libs.src+libs.pretest+libs.packages+libs.tests --testscope all /p:EnableSourceLink=falseResult:
Count GC: 3386
Total GC time: 13.850371 seconds
Average GC time (post start - pre end): 0.004090481689308919 seconds
Max GC time: 0.061811 seconds
In this case there's 5.7% slowdown, we also observed 9% slowdown on arm64 rpi4. On armel slowdown in average GC time grows up to 10-20%. Can this be related to introduction of pinned object heap in .NET 5?
LeszekKalibrate
Metadata
Metadata
Assignees
Labels
Type
Projects
Status
No status