Skip to content

Commit 058f83b

Browse files
Align arm64 data section as requested (#71044)
* Align arm64 data section as requested Currently, the data section alignment request is ignored unless it is 8. Since the minimum is 4, this effectively means that 16-byte SIMD16 data alignment requests are ignored. This is likely because this code was written before arm64 supported SIMD, and was never revised. Cases of SIMD loads of constant data lead to larger alignment padding of the data section. This is somewhat mitigated by #71043 which fixes a bug with overallocation and overalignment of SIMD8 data loads. * Additional fixes 1. On arm64/LA64, if asking for a data alignment greater than code alignment, we need to increase the requested code alignment since the code section is where this data will live. This isn't viewable in SPMI diffs, but it does increase the alignment of some functions from 8 to 16 byte code alignment. 2. Assert that the data section is at least 4 bytes aligned (this is the default in our code, and alignment only increases). 3. Simplify the code setting the alignment flags for allocMem. * Formatting + disable alignment asserts It looks like the buffer pointer passed back from crossgen2 doesn't meet the alignment request. Perhaps it does in the final image, but not in the buffer the JIT fills in? Maybe the asserts could be used for JIT-time but not AOT (when the buffer address is the final location of the code/data)?
1 parent e40236a commit 058f83b

File tree

1 file changed

+56
-15
lines changed

1 file changed

+56
-15
lines changed

src/coreclr/jit/emit.cpp

Lines changed: 56 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6260,7 +6260,13 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
62606260

62616261
coldCodeBlock = nullptr;
62626262

6263-
CorJitAllocMemFlag allocMemFlag = CORJIT_ALLOCMEM_DEFAULT_CODE_ALIGN;
6263+
// This restricts the data alignment to: 4, 8, 16, or 32 bytes
6264+
// Alignments greater than 32 would require VM support in ICorJitInfo::allocMem
6265+
uint32_t dataAlignment = emitConsDsc.alignment;
6266+
assert((dataSection::MIN_DATA_ALIGN <= dataAlignment) && (dataAlignment <= dataSection::MAX_DATA_ALIGN) &&
6267+
isPow2(dataAlignment));
6268+
6269+
uint32_t codeAlignment = TARGET_POINTER_SIZE;
62646270

62656271
#ifdef TARGET_X86
62666272
//
@@ -6280,14 +6286,14 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
62806286
const weight_t scenarioHotWeight = 256.0;
62816287
if (emitComp->fgCalledCount > (scenarioHotWeight * emitComp->fgProfileRunsCount()))
62826288
{
6283-
allocMemFlag = CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN;
6289+
codeAlignment = 16;
62846290
}
62856291
}
62866292
else
62876293
{
62886294
if (emitTotalHotCodeSize <= 16)
62896295
{
6290-
allocMemFlag = CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN;
6296+
codeAlignment = 16;
62916297
}
62926298
}
62936299
#endif
@@ -6299,23 +6305,44 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
62996305
if (emitComp->opts.OptimizationEnabled() && !emitComp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) &&
63006306
(emitTotalHotCodeSize > 16) && emitComp->fgHasLoops)
63016307
{
6302-
allocMemFlag = CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN;
6308+
codeAlignment = 32;
63036309
}
63046310
#endif
63056311

6306-
// This restricts the emitConsDsc.alignment to: 1, 2, 4, 8, 16, or 32 bytes
6307-
// Alignments greater than 32 would require VM support in ICorJitInfo::allocMem
6308-
assert(isPow2(emitConsDsc.alignment) && (emitConsDsc.alignment <= 32));
6312+
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
6313+
// For arm64/LoongArch64, we're going to put the data in the code section. So make sure the code section has
6314+
// adequate alignment.
6315+
if (emitConsDsc.dsdOffs > 0)
6316+
{
6317+
codeAlignment = max(codeAlignment, dataAlignment);
6318+
}
6319+
#endif
6320+
6321+
// Note that we don't support forcing code alignment of 8 bytes on 32-bit platforms; an omission?
6322+
assert((TARGET_POINTER_SIZE <= codeAlignment) && (codeAlignment <= 32) && isPow2(codeAlignment));
63096323

6310-
if (emitConsDsc.alignment == 16)
6324+
CorJitAllocMemFlag allocMemFlagCodeAlign = CORJIT_ALLOCMEM_DEFAULT_CODE_ALIGN;
6325+
if (codeAlignment == 32)
63116326
{
6312-
allocMemFlag = static_cast<CorJitAllocMemFlag>(allocMemFlag | CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN);
6327+
allocMemFlagCodeAlign = CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN;
63136328
}
6314-
else if (emitConsDsc.alignment == 32)
6329+
else if (codeAlignment == 16)
63156330
{
6316-
allocMemFlag = static_cast<CorJitAllocMemFlag>(allocMemFlag | CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN);
6331+
allocMemFlagCodeAlign = CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN;
63176332
}
63186333

6334+
CorJitAllocMemFlag allocMemFlagDataAlign = static_cast<CorJitAllocMemFlag>(0);
6335+
if (dataAlignment == 16)
6336+
{
6337+
allocMemFlagDataAlign = CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN;
6338+
}
6339+
else if (dataAlignment == 32)
6340+
{
6341+
allocMemFlagDataAlign = CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN;
6342+
}
6343+
6344+
CorJitAllocMemFlag allocMemFlag = static_cast<CorJitAllocMemFlag>(allocMemFlagCodeAlign | allocMemFlagDataAlign);
6345+
63196346
AllocMemArgs args;
63206347
memset(&args, 0, sizeof(args));
63216348

@@ -6330,11 +6357,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
63306357
}
63316358

63326359
UNATIVE_OFFSET roDataAlignmentDelta = 0;
6333-
if (emitConsDsc.dsdOffs && (emitConsDsc.alignment == TARGET_POINTER_SIZE))
6360+
if (emitConsDsc.dsdOffs > 0)
63346361
{
6335-
UNATIVE_OFFSET roDataAlignment = TARGET_POINTER_SIZE; // 8 Byte align by default.
6336-
roDataAlignmentDelta = (UNATIVE_OFFSET)ALIGN_UP(emitTotalHotCodeSize, roDataAlignment) - emitTotalHotCodeSize;
6337-
assert((roDataAlignmentDelta == 0) || (roDataAlignmentDelta == 4));
6362+
roDataAlignmentDelta = AlignmentPad(emitTotalHotCodeSize, dataAlignment);
63386363
}
63396364

63406365
args.hotCodeSize = emitTotalHotCodeSize + roDataAlignmentDelta + emitConsDsc.dsdOffs;
@@ -6377,6 +6402,22 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
63776402
{
63786403
assert(((size_t)codeBlock & 31) == 0);
63796404
}
6405+
#if 0
6406+
// TODO: we should be able to assert the following, but it appears crossgen2 doesn't respect them,
6407+
// or maybe it respects them in the written image but not in the buffer pointer given to the JIT.
6408+
if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN) != 0)
6409+
{
6410+
assert(((size_t)codeBlock & 15) == 0);
6411+
}
6412+
if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN) != 0)
6413+
{
6414+
assert(((size_t)consBlock & 31) == 0);
6415+
}
6416+
if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN) != 0)
6417+
{
6418+
assert(((size_t)consBlock & 15) == 0);
6419+
}
6420+
#endif // 0
63806421
#endif
63816422

63826423
// if (emitConsDsc.dsdOffs)

0 commit comments

Comments
 (0)