Skip to content

Commit b303ffe

Browse files
Support Arm64 "constructed" constants in SuperPMI asm diffs (dotnet#76616)
* Support Arm64 "constructed" constants in SuperPMI asm diffs SuperPMI asm diffs tries to ignore constants that can change between multiple replays, such as addresses that the replay engine must generate and not simply hand back from the collected data. Often, addresses have associated relocations generated during replay. SuperPMI can use these relocations to adjust the constants to allow two replays to match. However, there are cases on Arm64 where an address both doesn't report a relocation and is "constructed" using multiple `mov`/`movk` instructions. One case is the `allocPgoInstrumentationBySchema()` API which returns a pointer to a PGO data buffer. An address within this buffer is constructed via a sequence such as: ``` mov x0, dotnet#63408 movk x0, dotnet#23602, lsl #16 movk x0, dotnet#606, lsl #32 ``` When SuperPMI replays this API, it constructs a new buffer and returns that pointer, which is used to construct various actual addresses that are generated as "constructed" constants, shown above. This change "de-constructs" the constants and looks them up in the replay address map. If base and diff match the mapped constants, there is no asm diff. * Fix 32-bit build I don't think we fully support 64-bit replay on 32-bit host, but this fix at least makes it possible for this case. * Support more general mov/movk sequence Allow JIT1 and JIT2 to have a different sequence of mov/movk[/movk[/movk]] that map to the same address in the address map. That is, the replay constant might require a different set of instructions (e.g., if a `movk` is missing because its constant is zero).
1 parent dd96637 commit b303ffe

File tree

3 files changed

+201
-16
lines changed

3 files changed

+201
-16
lines changed

src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,3 +335,46 @@ void PutThumb2BlRel24(UINT16* p, INT32 imm24)
335335
p[0] = Opcode0;
336336
p[1] = Opcode1;
337337
}
338+
339+
// GetArm64MovConstant / GetArm64MovkConstant: Decode arm64 mov / movk instructions, e.g.:
340+
// d29ff600 mov x0, #65456
341+
// f2ab8640 movk x0, #23602, lsl #16
342+
// f2c04bc0 movk x0, #606, lsl #32
343+
//
344+
// This is used in the NearDiffer to determine if a sequence of mov/movk is actually an address.
345+
//
346+
// Return `true` if the instruction pointed to by `p` is a mov/movk, `false` otherwise.
347+
// If true, fill out the target register in `*pReg`, constant in `*pCon`, and (for movk) shift value in `*pShift`.
348+
349+
bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon)
350+
{
351+
UINT32 instr = *p;
352+
if ((instr & 0xffe00000) == 0xd2800000)
353+
{
354+
*pReg = instr & 0x1f;
355+
*pCon = (instr >> 5) & 0xffff;
356+
return true;
357+
}
358+
359+
return false;
360+
}
361+
362+
bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift)
363+
{
364+
UINT32 instr = *p;
365+
if ((instr & 0xff800000) == 0xf2800000)
366+
{
367+
*pReg = instr & 0x1f;
368+
*pCon = (instr >> 5) & 0xffff;
369+
*pShift = ((instr >> 21) & 0x3) * 16;
370+
return true;
371+
}
372+
373+
return false;
374+
}
375+
376+
// PutArm64MovkConstant: set the constant field in an Arm64 `movk` instruction
377+
void PutArm64MovkConstant(UINT32* p, unsigned con)
378+
{
379+
*p = (*p & ~(0xffff << 5)) | ((con & 0xffff) << 5);
380+
}

src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ void PutArm64Rel12(UINT32* pCode, INT32 imm12);
8282
void PutThumb2Mov32(UINT16* p, UINT32 imm32);
8383
void PutThumb2BlRel24(UINT16* p, INT32 imm24);
8484

85+
bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon);
86+
bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift);
87+
88+
void PutArm64MovkConstant(UINT32* p, unsigned con);
89+
8590
template <typename T, int size>
8691
inline constexpr unsigned ArrLen(T (&)[size])
8792
{

src/coreclr/tools/superpmi/superpmi/neardiffer.cpp

Lines changed: 153 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -298,22 +298,24 @@ struct DiffData
298298
CompileResult* cr2;
299299

300300
// Details of the first block
301-
size_t blocksize1;
302-
size_t datablock1;
303-
size_t datablockSize1;
304-
size_t originalBlock1;
305-
size_t originalDataBlock1;
306-
size_t otherCodeBlock1;
307-
size_t otherCodeBlockSize1;
301+
unsigned char* block1;
302+
size_t blocksize1;
303+
unsigned char* datablock1;
304+
size_t datablockSize1;
305+
size_t originalBlock1;
306+
size_t originalDataBlock1;
307+
size_t otherCodeBlock1;
308+
size_t otherCodeBlockSize1;
308309

309310
// Details of the second block
310-
size_t blocksize2;
311-
size_t datablock2;
312-
size_t datablockSize2;
313-
size_t originalBlock2;
314-
size_t originalDataBlock2;
315-
size_t otherCodeBlock2;
316-
size_t otherCodeBlockSize2;
311+
unsigned char* block2;
312+
size_t blocksize2;
313+
unsigned char* datablock2;
314+
size_t datablockSize2;
315+
size_t originalBlock2;
316+
size_t originalDataBlock2;
317+
size_t otherCodeBlock2;
318+
size_t otherCodeBlockSize2;
317319
};
318320

319321
//
@@ -330,6 +332,7 @@ bool NearDiffer::compareOffsets(
330332
return true;
331333
}
332334

335+
const SPMI_TARGET_ARCHITECTURE targetArch = GetSpmiTargetArchitecture();
333336
const DiffData* data = (const DiffData*)payload;
334337
size_t ip1 = data->originalBlock1 + blockOffset;
335338
size_t ip2 = data->originalBlock2 + blockOffset;
@@ -435,6 +438,140 @@ bool NearDiffer::compareOffsets(
435438
if ((mapped1 == mapped2) && (mapped1 != (size_t)-1))
436439
return true;
437440

441+
// There are some cases on arm64 where we generate multiple instruction register construction of addresses
442+
// but we don't have a relocation for them (so they aren't handled by `applyRelocs`). One case is
443+
// allocPgoInstrumentationBySchema(), which returns an address the JIT writes into the code stream
444+
// (used to store dynamic PGO probe data).
445+
//
446+
// The instruction sequence is something like this:
447+
// mov x0, #63408
448+
// movk x0, #23602, lsl #16
449+
// movk x0, #606, lsl #32
450+
//
451+
// Here, we try to match this sequence and look it up in the address map.
452+
//
453+
// Since the mov/movk sequence is specific to the replay address constant, we don't assume the baseline
454+
// and diff have the same number of instructions (e.g., it's possible to skip a `movk` if it is zero).
455+
//
456+
// Some version of this logic might apply to ARM as well.
457+
//
458+
if (targetArch == SPMI_TARGET_ARCHITECTURE_ARM64)
459+
{
460+
bool movk2_1 = false, movk3_1 = false;
461+
bool movk2_2 = false, movk3_2 = false;
462+
463+
unsigned reg1_1 = 0, reg2_1, reg3_1, reg4_1;
464+
unsigned reg1_2 = 0, reg2_2, reg3_2, reg4_2;
465+
unsigned con1_1, con2_1, con3_1, con4_1;
466+
unsigned con1_2, con2_2, con3_2, con4_2;
467+
unsigned shift2_1, shift3_1, shift4_1;
468+
unsigned shift2_2, shift3_2, shift4_2;
469+
470+
UINT32* iaddr1 = (UINT32*)(data->block1 + blockOffset);
471+
UINT32* iaddr2 = (UINT32*)(data->block2 + blockOffset);
472+
UINT32* iaddr1end = (UINT32*)(data->block1 + data->blocksize1);
473+
UINT32* iaddr2end = (UINT32*)(data->block2 + data->blocksize2);
474+
475+
DWORDLONG addr1 = 0;
476+
DWORDLONG addr2 = 0;
477+
478+
// Look for a mov/movk address pattern in code stream 1.
479+
480+
if ((iaddr1 < iaddr1end) &&
481+
GetArm64MovConstant(iaddr1, &reg1_1, &con1_1))
482+
{
483+
// We assume the address requires at least 1 'movk' instruction.
484+
if ((iaddr1 + 1 < iaddr1end) &&
485+
GetArm64MovkConstant(iaddr1 + 1, &reg2_1, &con2_1, &shift2_1) &&
486+
(reg1_1 == reg2_1))
487+
{
488+
addr1 = (DWORDLONG)con1_1 + ((DWORDLONG)con2_1 << shift2_1);
489+
490+
if ((iaddr1 + 2 < iaddr1end) &&
491+
GetArm64MovkConstant(iaddr1 + 2, &reg3_1, &con3_1, &shift3_1) &&
492+
(reg1_1 == reg3_1))
493+
{
494+
movk2_1 = true;
495+
addr1 += (DWORDLONG)con3_1 << shift3_1;
496+
497+
if ((iaddr1 + 3 < iaddr1end) &&
498+
GetArm64MovkConstant(iaddr1 + 3, &reg4_1, &con4_1, &shift4_1) &&
499+
(reg1_1 == reg4_1))
500+
{
501+
movk3_1 = true;
502+
addr1 += (DWORDLONG)con4_1 << shift4_1;
503+
}
504+
}
505+
}
506+
}
507+
508+
// Look for a mov/movk address pattern in code stream 2.
509+
510+
if ((iaddr2 < iaddr2end) &&
511+
GetArm64MovConstant(iaddr2, &reg1_2, &con1_2))
512+
{
513+
// We assume the address requires at least 1 'movk' instruction.
514+
if ((iaddr2 + 1 < iaddr2end) &&
515+
GetArm64MovkConstant(iaddr2 + 1, &reg2_2, &con2_2, &shift2_2) &&
516+
(reg1_2 == reg2_2))
517+
{
518+
addr2 = (DWORDLONG)con1_2 + ((DWORDLONG)con2_2 << shift2_2);
519+
520+
if ((iaddr2 + 2 < iaddr2end) &&
521+
GetArm64MovkConstant(iaddr2 + 2, &reg3_2, &con3_2, &shift3_2) &&
522+
(reg1_2 == reg3_2))
523+
{
524+
movk2_2 = true;
525+
addr2 += (DWORDLONG)con3_2 << shift3_2;
526+
527+
if ((iaddr2 + 3 < iaddr2end) &&
528+
GetArm64MovkConstant(iaddr2 + 3, &reg4_2, &con4_2, &shift4_2) &&
529+
(reg1_2 == reg4_2))
530+
{
531+
movk3_2 = true;
532+
addr2 += (DWORDLONG)con4_2 << shift4_2;
533+
}
534+
}
535+
}
536+
}
537+
538+
// Check the constants. We don't need to check 'addr1 == addr2' because if that were
539+
// true we wouldn't have gotten here.
540+
//
541+
// Note: when replaying on a 32-bit platform, we must have
542+
// movk2_1 == movk2_2 == movk3_1 == movk3_2 == false
543+
544+
if ((addr1 != 0) && (addr2 != 0) && (reg1_1 == reg1_2))
545+
{
546+
DWORDLONG mapped1 = (DWORDLONG)data->cr1->searchAddressMap((void*)addr1);
547+
DWORDLONG mapped2 = (DWORDLONG)data->cr2->searchAddressMap((void*)addr2);
548+
if ((mapped1 == mapped2) && (mapped1 != (DWORDLONG)-1))
549+
{
550+
// Now, zero out the constants in the `movk` instructions so when the disassembler
551+
// gets to them, they compare equal.
552+
PutArm64MovkConstant(iaddr1 + 1, 0);
553+
PutArm64MovkConstant(iaddr2 + 1, 0);
554+
if (movk2_1)
555+
{
556+
PutArm64MovkConstant(iaddr1 + 2, 0);
557+
}
558+
if (movk2_2)
559+
{
560+
PutArm64MovkConstant(iaddr2 + 2, 0);
561+
}
562+
if (movk3_1)
563+
{
564+
PutArm64MovkConstant(iaddr1 + 3, 0);
565+
}
566+
if (movk3_2)
567+
{
568+
PutArm64MovkConstant(iaddr2 + 3, 0);
569+
}
570+
return true;
571+
}
572+
}
573+
}
574+
438575
return false;
439576
}
440577

@@ -513,11 +650,11 @@ bool NearDiffer::compareCodeSection(MethodContext* mc,
513650
cr2,
514651

515652
// Details of the first block
516-
(size_t)blocksize1, (size_t)datablock1, (size_t)datablockSize1, (size_t)originalBlock1,
653+
block1, (size_t)blocksize1, datablock1, (size_t)datablockSize1, (size_t)originalBlock1,
517654
(size_t)originalDataBlock1, (size_t)otherCodeBlock1, (size_t)otherCodeBlockSize1,
518655

519656
// Details of the second block
520-
(size_t)blocksize2, (size_t)datablock2, (size_t)datablockSize2, (size_t)originalBlock2,
657+
block2, (size_t)blocksize2, datablock2, (size_t)datablockSize2, (size_t)originalBlock2,
521658
(size_t)originalDataBlock2, (size_t)otherCodeBlock2, (size_t)otherCodeBlockSize2};
522659

523660
#ifdef USE_COREDISTOOLS

0 commit comments

Comments
 (0)