Skip to content

Commit d391085

Browse files
authored
Update stub decoding for .NET 8 for disassemblers (#2416)
* Update stub decoding for .NET 8 for disassemblers The call counting stub, stub precode and fixup precode were modified in .NET 8 to have larger size of the interleaved code / data blocks. The stub decoder in the intel and arm64 disassemblers need to be updated to take that into account.
1 parent e0c667f commit d391085

File tree

2 files changed

+105
-59
lines changed

2 files changed

+105
-59
lines changed

src/BenchmarkDotNet/Disassemblers/Arm64Disassembler.cs

Lines changed: 56 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -141,45 +141,63 @@ public void Feed(Arm64Instruction instruction)
141141

142142
internal class Arm64Disassembler : ClrMdV2Disassembler
143143
{
144-
// See dotnet/runtime src/coreclr/vm/arm64/thunktemplates.asm/.S for the stub code
145-
// ldr x9, DATA_SLOT(CallCountingStub, RemainingCallCountCell)
146-
// ldrh w10, [x9]
147-
// subs w10, w10, #0x1
148-
private static byte[] callCountingStubTemplate = new byte[12] { 0x09, 0x00, 0x00, 0x58, 0x2a, 0x01, 0x40, 0x79, 0x4a, 0x05, 0x00, 0x71 };
149-
// ldr x10, DATA_SLOT(StubPrecode, Target)
150-
// ldr x12, DATA_SLOT(StubPrecode, MethodDesc)
151-
// br x10
152-
private static byte[] stubPrecodeTemplate = new byte[12] { 0x4a, 0x00, 0x00, 0x58, 0xec, 0x00, 0x00, 0x58, 0x40, 0x01, 0x1f, 0xd6 };
153-
// ldr x11, DATA_SLOT(FixupPrecode, Target)
154-
// br x11
155-
// ldr x12, DATA_SLOT(FixupPrecode, MethodDesc)
156-
private static byte[] fixupPrecodeTemplate = new byte[12] { 0x0b, 0x00, 0x00, 0x58, 0x60, 0x01, 0x1f, 0xd6, 0x0c, 0x00, 0x00, 0x58 };
157-
158-
static Arm64Disassembler()
144+
internal sealed class RuntimeSpecificData
159145
{
160-
// The stubs code depends on the current OS memory page size, so we need to update the templates to reflect that
161-
int pageSizeShifted = Environment.SystemPageSize / 32;
162-
// Calculate the ldr x9, #offset instruction with offset based on the page size
163-
callCountingStubTemplate[1] = (byte)(pageSizeShifted & 0xff);
164-
callCountingStubTemplate[2] = (byte)(pageSizeShifted >> 8);
146+
// See dotnet/runtime src/coreclr/vm/arm64/thunktemplates.asm/.S for the stub code
147+
// ldr x9, DATA_SLOT(CallCountingStub, RemainingCallCountCell)
148+
// ldrh w10, [x9]
149+
// subs w10, w10, #0x1
150+
internal readonly byte[] callCountingStubTemplate = new byte[12] { 0x09, 0x00, 0x00, 0x58, 0x2a, 0x01, 0x40, 0x79, 0x4a, 0x05, 0x00, 0x71 };
151+
// ldr x10, DATA_SLOT(StubPrecode, Target)
152+
// ldr x12, DATA_SLOT(StubPrecode, MethodDesc)
153+
// br x10
154+
internal readonly byte[] stubPrecodeTemplate = new byte[12] { 0x4a, 0x00, 0x00, 0x58, 0xec, 0x00, 0x00, 0x58, 0x40, 0x01, 0x1f, 0xd6 };
155+
// ldr x11, DATA_SLOT(FixupPrecode, Target)
156+
// br x11
157+
// ldr x12, DATA_SLOT(FixupPrecode, MethodDesc)
158+
internal readonly byte[] fixupPrecodeTemplate = new byte[12] { 0x0b, 0x00, 0x00, 0x58, 0x60, 0x01, 0x1f, 0xd6, 0x0c, 0x00, 0x00, 0x58 };
159+
internal readonly ulong stubPageSize;
160+
161+
internal RuntimeSpecificData(State state)
162+
{
163+
stubPageSize = (ulong)Environment.SystemPageSize;
164+
if (state.RuntimeVersion.Major >= 8)
165+
{
166+
// In .NET 8, the stub page size was changed to min 16kB
167+
stubPageSize = Math.Max(stubPageSize, 16384);
168+
}
169+
170+
// The stubs code depends on the current OS memory page size, so we need to update the templates to reflect that
171+
ulong pageSizeShifted = stubPageSize / 32;
172+
// Calculate the ldr x9, #offset instruction with offset based on the page size
173+
callCountingStubTemplate[1] = (byte)(pageSizeShifted & 0xff);
174+
callCountingStubTemplate[2] = (byte)(pageSizeShifted >> 8);
165175

166-
// Calculate the ldr x10, #offset instruction with offset based on the page size
167-
stubPrecodeTemplate[1] = (byte)(pageSizeShifted & 0xff);
168-
stubPrecodeTemplate[2] = (byte)(pageSizeShifted >> 8);
169-
// Calculate the ldr x12, #offset instruction with offset based on the page size
170-
stubPrecodeTemplate[5] = (byte)((pageSizeShifted - 1) & 0xff);
171-
stubPrecodeTemplate[6] = (byte)((pageSizeShifted - 1) >> 8);
176+
// Calculate the ldr x10, #offset instruction with offset based on the page size
177+
stubPrecodeTemplate[1] = (byte)(pageSizeShifted & 0xff);
178+
stubPrecodeTemplate[2] = (byte)(pageSizeShifted >> 8);
179+
// Calculate the ldr x12, #offset instruction with offset based on the page size
180+
stubPrecodeTemplate[5] = (byte)((pageSizeShifted - 1) & 0xff);
181+
stubPrecodeTemplate[6] = (byte)((pageSizeShifted - 1) >> 8);
172182

173-
// Calculate the ldr x11, #offset instruction with offset based on the page size
174-
fixupPrecodeTemplate[1] = (byte)(pageSizeShifted & 0xff);
175-
fixupPrecodeTemplate[2] = (byte)(pageSizeShifted >> 8);
176-
// Calculate the ldr x12, #offset instruction with offset based on the page size
177-
fixupPrecodeTemplate[9] = (byte)(pageSizeShifted & 0xff);
178-
fixupPrecodeTemplate[10] = (byte)(pageSizeShifted >> 8);
183+
// Calculate the ldr x11, #offset instruction with offset based on the page size
184+
fixupPrecodeTemplate[1] = (byte)(pageSizeShifted & 0xff);
185+
fixupPrecodeTemplate[2] = (byte)(pageSizeShifted >> 8);
186+
// Calculate the ldr x12, #offset instruction with offset based on the page size
187+
fixupPrecodeTemplate[9] = (byte)(pageSizeShifted & 0xff);
188+
fixupPrecodeTemplate[10] = (byte)(pageSizeShifted >> 8);
189+
}
179190
}
180191

192+
private static readonly Dictionary<Version, RuntimeSpecificData> runtimeSpecificData = new ();
193+
181194
protected override IEnumerable<Asm> Decode(byte[] code, ulong startAddress, State state, int depth, ClrMethod currentMethod, DisassemblySyntax syntax)
182195
{
196+
if (!runtimeSpecificData.TryGetValue(state.RuntimeVersion, out RuntimeSpecificData data))
197+
{
198+
runtimeSpecificData.Add(state.RuntimeVersion, data = new RuntimeSpecificData(state));
199+
}
200+
183201
const Arm64DisassembleMode disassembleMode = Arm64DisassembleMode.Arm;
184202
using (CapstoneArm64Disassembler disassembler = CapstoneDisassembler.CreateArm64Disassembler(disassembleMode))
185203
{
@@ -210,21 +228,21 @@ protected override IEnumerable<Asm> Decode(byte[] code, ulong startAddress, Stat
210228

211229
if (state.Runtime.DataTarget.DataReader.Read(address, buffer) == buffer.Length)
212230
{
213-
if (buffer.SequenceEqual(callCountingStubTemplate))
231+
if (buffer.SequenceEqual(data.callCountingStubTemplate))
214232
{
215233
const ulong TargetMethodAddressSlotOffset = 8;
216-
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + (ulong)Environment.SystemPageSize + TargetMethodAddressSlotOffset);
234+
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + data.stubPageSize + TargetMethodAddressSlotOffset);
217235
}
218-
else if (buffer.SequenceEqual(stubPrecodeTemplate))
236+
else if (buffer.SequenceEqual(data.stubPrecodeTemplate))
219237
{
220238
const ulong MethodDescSlotOffset = 0;
221-
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + (ulong)Environment.SystemPageSize + MethodDescSlotOffset);
239+
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + data.stubPageSize + MethodDescSlotOffset);
222240
isPrestubMD = true;
223241
}
224-
else if (buffer.SequenceEqual(fixupPrecodeTemplate))
242+
else if (buffer.SequenceEqual(data.fixupPrecodeTemplate))
225243
{
226244
const ulong MethodDescSlotOffset = 8;
227-
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + (ulong)Environment.SystemPageSize + MethodDescSlotOffset);
245+
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + data.stubPageSize + MethodDescSlotOffset);
228246
isPrestubMD = true;
229247
}
230248
}

src/BenchmarkDotNet/Disassemblers/IntelDisassembler.cs

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,24 +9,52 @@ namespace BenchmarkDotNet.Disassemblers
99
{
1010
internal class IntelDisassembler : ClrMdV2Disassembler
1111
{
12-
// See dotnet/runtime src/coreclr/vm/amd64/thunktemplates.asm/.S for the stub code
13-
// mov rax,QWORD PTR [rip + DATA_SLOT(CallCountingStub, RemainingCallCountCell)]
14-
// dec WORD PTR [rax]
15-
// je LOCAL_LABEL(CountReachedZero)
16-
// jmp QWORD PTR [rip + DATA_SLOT(CallCountingStub, TargetForMethod)]
17-
// LOCAL_LABEL(CountReachedZero):
18-
// jmp QWORD PTR [rip + DATA_SLOT(CallCountingStub, TargetForThresholdReached)]
19-
private static byte[] callCountingStubTemplate = new byte[10] { 0x48, 0x8b, 0x05, 0xf9, 0x0f, 0x00, 0x00, 0x66, 0xff, 0x08 };
20-
// mov r10, [rip + DATA_SLOT(StubPrecode, MethodDesc)]
21-
// jmp [rip + DATA_SLOT(StubPrecode, Target)]
22-
private static byte[] stubPrecodeTemplate = new byte[13] { 0x4c, 0x8b, 0x15, 0xf9, 0x0f, 0x00, 0x00, 0xff, 0x25, 0xfb, 0x0f, 0x00, 0x00 };
23-
// jmp [rip + DATA_SLOT(FixupPrecode, Target)]
24-
// mov r10, [rip + DATA_SLOT(FixupPrecode, MethodDesc)]
25-
// jmp [rip + DATA_SLOT(FixupPrecode, PrecodeFixupThunk)]
26-
private static byte[] fixupPrecodeTemplate = new byte[19] { 0xff, 0x25, 0xfa, 0x0f, 0x00, 0x00, 0x4c, 0x8b, 0x15, 0xfb, 0x0f, 0x00, 0x00, 0xff, 0x25, 0xfd, 0x0f, 0x00, 0x00 };
12+
internal sealed class RuntimeSpecificData
13+
{
14+
// See dotnet/runtime src/coreclr/vm/amd64/thunktemplates.asm/.S for the stub code
15+
// mov rax,QWORD PTR [rip + DATA_SLOT(CallCountingStub, RemainingCallCountCell)]
16+
// dec WORD PTR [rax]
17+
// je LOCAL_LABEL(CountReachedZero)
18+
// jmp QWORD PTR [rip + DATA_SLOT(CallCountingStub, TargetForMethod)]
19+
// LOCAL_LABEL(CountReachedZero):
20+
// jmp QWORD PTR [rip + DATA_SLOT(CallCountingStub, TargetForThresholdReached)]
21+
internal readonly byte[] callCountingStubTemplate = new byte[10] { 0x48, 0x8b, 0x05, 0xf9, 0x0f, 0x00, 0x00, 0x66, 0xff, 0x08 };
22+
// mov r10, [rip + DATA_SLOT(StubPrecode, MethodDesc)]
23+
// jmp [rip + DATA_SLOT(StubPrecode, Target)]
24+
internal readonly byte[] stubPrecodeTemplate = new byte[13] { 0x4c, 0x8b, 0x15, 0xf9, 0x0f, 0x00, 0x00, 0xff, 0x25, 0xfb, 0x0f, 0x00, 0x00 };
25+
// jmp [rip + DATA_SLOT(FixupPrecode, Target)]
26+
// mov r10, [rip + DATA_SLOT(FixupPrecode, MethodDesc)]
27+
// jmp [rip + DATA_SLOT(FixupPrecode, PrecodeFixupThunk)]
28+
internal readonly byte[] fixupPrecodeTemplate = new byte[19] { 0xff, 0x25, 0xfa, 0x0f, 0x00, 0x00, 0x4c, 0x8b, 0x15, 0xfb, 0x0f, 0x00, 0x00, 0xff, 0x25, 0xfd, 0x0f, 0x00, 0x00 };
29+
internal readonly ulong stubPageSize;
30+
31+
internal RuntimeSpecificData(State state)
32+
{
33+
stubPageSize = (ulong)Environment.SystemPageSize;
34+
if (state.RuntimeVersion.Major >= 8)
35+
{
36+
// In .NET 8, the stub page size was changed to 16kB
37+
stubPageSize = 16384;
38+
// Update the templates so that the offsets are correct
39+
callCountingStubTemplate[4] = 0x3f;
40+
stubPrecodeTemplate[4] = 0x3f;
41+
stubPrecodeTemplate[10] = 0x3f;
42+
fixupPrecodeTemplate[3] = 0x3f;
43+
fixupPrecodeTemplate[10] = 0x3f;
44+
fixupPrecodeTemplate[16] = 0x3f;
45+
}
46+
}
47+
}
48+
49+
private static readonly Dictionary<Version, RuntimeSpecificData> runtimeSpecificData = new ();
2750

2851
protected override IEnumerable<Asm> Decode(byte[] code, ulong startAddress, State state, int depth, ClrMethod currentMethod, DisassemblySyntax syntax)
2952
{
53+
if (!runtimeSpecificData.TryGetValue(state.RuntimeVersion, out RuntimeSpecificData data))
54+
{
55+
runtimeSpecificData.Add(state.RuntimeVersion, data = new RuntimeSpecificData(state));
56+
}
57+
3058
var reader = new ByteArrayCodeReader(code);
3159
var decoder = Decoder.Create(state.Runtime.DataTarget.DataReader.PointerSize * 8, reader);
3260
decoder.IP = startAddress;
@@ -53,27 +81,27 @@ protected override IEnumerable<Asm> Decode(byte[] code, ulong startAddress, Stat
5381

5482
FlushCachedDataIfNeeded(state.Runtime.DataTarget.DataReader, address, buffer);
5583

56-
if (state.Runtime.DataTarget.DataReader.Read(address, buffer) == buffer.Length && buffer.SequenceEqual(callCountingStubTemplate))
84+
if (state.Runtime.DataTarget.DataReader.Read(address, buffer) == buffer.Length && buffer.SequenceEqual(data.callCountingStubTemplate))
5785
{
5886
const ulong TargetMethodAddressSlotOffset = 8;
59-
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + (ulong)Environment.SystemPageSize + TargetMethodAddressSlotOffset);
87+
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + data.stubPageSize + TargetMethodAddressSlotOffset);
6088
}
6189
else
6290
{
6391
buffer = new byte[13];
64-
if (state.Runtime.DataTarget.DataReader.Read(address, buffer) == buffer.Length && buffer.SequenceEqual(stubPrecodeTemplate))
92+
if (state.Runtime.DataTarget.DataReader.Read(address, buffer) == buffer.Length && buffer.SequenceEqual(data.stubPrecodeTemplate))
6593
{
6694
const ulong MethodDescSlotOffset = 0;
67-
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + (ulong)Environment.SystemPageSize + MethodDescSlotOffset);
95+
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + data.stubPageSize + MethodDescSlotOffset);
6896
isPrestubMD = true;
6997
}
7098
else
7199
{
72100
buffer = new byte[19];
73-
if (state.Runtime.DataTarget.DataReader.Read(address, buffer) == buffer.Length && buffer.SequenceEqual(fixupPrecodeTemplate))
101+
if (state.Runtime.DataTarget.DataReader.Read(address, buffer) == buffer.Length && buffer.SequenceEqual(data.fixupPrecodeTemplate))
74102
{
75103
const ulong MethodDescSlotOffset = 8;
76-
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + (ulong)Environment.SystemPageSize + MethodDescSlotOffset);
104+
address = state.Runtime.DataTarget.DataReader.ReadPointer(address + data.stubPageSize + MethodDescSlotOffset);
77105
isPrestubMD = true;
78106
}
79107

0 commit comments

Comments
 (0)