Skip to content

Commit f49460b

Browse files
pratikasharigcbot
authored andcommitted
Replace weak edges with 4GRF alignment
Replace weak edges with 4GRF alignment for variables belonging to Default64Bit augmentation bucket.
1 parent ad2d24e commit f49460b

13 files changed

+272
-144
lines changed

visa/G4_Opcode.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ enum class BankAlign {
6262
Odd = 3, // old align
6363
Even2GRF = 4, // 2-GRF even align 1100
6464
Odd2GRF = 5, // 2-GRF old align, 0011
65-
Align_NUM = 6 // Num of alignment
65+
QuadGRF = 6, // 4-GRF align
66+
Align_NUM = 7 // Num of alignment
6667
};
6768

6869
// An instruction's execution width

visa/GraphColor.cpp

Lines changed: 102 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2650,9 +2650,12 @@ void GlobalRA::updateSubRegAlignment(G4_SubReg_Align subAlign) {
26502650
}
26512651
}
26522652

2653-
bool GlobalRA::evenAlignNeeded(G4_Declare *dcl) {
2653+
int GlobalRA::getAlignFromAugBucket(G4_Declare *dcl) {
26542654
if (GlobalRA::useGenericAugAlign(builder.getPlatformGeneration())) {
2655-
// Return true if even alignment is needed
2655+
// Return 0 if no special alignment is needed
2656+
// Return 2 if even alignment is needed
2657+
// Return 4 if quad alignment is needed
2658+
26562659
// Even align needed if for given SIMD size and elem type,
26572660
// a complete def uses between 1-2 GRFs.
26582661
auto kernelSimdSizeToUse = kernel.getSimdSizeWithSlicing();
@@ -2670,14 +2673,41 @@ bool GlobalRA::evenAlignNeeded(G4_Declare *dcl) {
26702673
topdclAugMask == AugmentationMasks::Default64Bit)
26712674
elemSizeToUse = 8;
26722675

2673-
if ( // Even align if size is between 1-2 GRFs, for >2GRF sizes use weak
2674-
// edges
2675-
(elemSizeToUse * kernelSimdSizeToUse) >
2676-
(unsigned)kernel.numEltPerGRF<Type_UB>() &&
2677-
(elemSizeToUse * kernelSimdSizeToUse) <=
2678-
(unsigned)(2 * kernel.numEltPerGRF<Type_UB>()) &&
2679-
!(!builder.canReadR0() && dcl == kernel.fg.builder->getBuiltinR0())) {
2680-
return true;
2676+
auto totalByteSize = elemSizeToUse * kernelSimdSizeToUse;
2677+
auto bucketSpans2GRFs = [&]() {
2678+
return totalByteSize > (unsigned)kernel.numEltPerGRF<Type_UB>() &&
2679+
totalByteSize <= (unsigned)(2 * kernel.numEltPerGRF<Type_UB>());
2680+
};
2681+
2682+
if (!(!builder.canReadR0() && dcl == kernel.fg.builder->getBuiltinR0())) {
2683+
if (use4GRFAlign) {
2684+
// The only time it's safe to do 2GRF align is when augmentation
2685+
// bucket is known to be Default32Bit, otherwise we need to align
2686+
// 4GRF. It isn't enough to simply check elemSize * GRF size to
2687+
// decide alignment.
2688+
if (topdclAugMask == AugmentationMasks::Default32Bit) {
2689+
if (bucketSpans2GRFs())
2690+
return 2;
2691+
} else if (topdclAugMask == AugmentationMasks::Default64Bit) {
2692+
if (bucketSpans2GRFs())
2693+
// :df SIMD16
2694+
return 2;
2695+
2696+
// :df SIMD32
2697+
return 4;
2698+
} else {
2699+
// Local RA will take this path as augmentation buckets are set
2700+
// to Undetermined. Although this is conservative, hybrid RA
2701+
// will run augmentation and compute buckets to fill in "holes".
2702+
// For eg, mov (32|M0) V10<2>:f should use 4GRF alignment as
2703+
// it's Default64Bit variable, although elem size is :f.
2704+
return 4;
2705+
}
2706+
} else {
2707+
// Even align if size is between 1-2 GRFs, for >2GRF sizes.
2708+
if (bucketSpans2GRFs())
2709+
return 2;
2710+
}
26812711
}
26822712
}
26832713
} else {
@@ -2693,21 +2723,28 @@ bool GlobalRA::evenAlignNeeded(G4_Declare *dcl) {
26932723
topdcl->getByteSize() >= kernel.numEltPerGRF<Type_UB>() &&
26942724
!(!builder.canReadR0() &&
26952725
dcl == kernel.fg.builder->getBuiltinR0())) {
2696-
return true;
2726+
return 2;
26972727
}
26982728
}
26992729
}
27002730
}
27012731

2702-
return false;
2732+
return 0;
27032733
}
27042734

2705-
// This function can be invoked before local RA or after augmentation.
2706-
void GlobalRA::evenAlign() {
2707-
// Update alignment of all GRF declares to align
2735+
void GlobalRA::augAlign() {
2736+
// Update alignment of all GRF declares based on
2737+
// augmentation bucket and platform.
27082738
for (auto dcl : kernel.Declares) {
27092739
if (dcl->getRegFile() & G4_GRF) {
2710-
if (evenAlignNeeded(dcl)) {
2740+
unsigned int align = getAlignFromAugBucket(dcl);
2741+
if (align == 4) {
2742+
if (!isQuadAligned(dcl)) {
2743+
incRA.evenAlignUpdate(dcl);
2744+
}
2745+
forceQuadAlign(dcl);
2746+
}
2747+
else if (align == 2) {
27112748
if (!isEvenAligned(dcl)) {
27122749
incRA.evenAlignUpdate(dcl);
27132750
}
@@ -3471,8 +3508,8 @@ bool Augmentation::markNonDefaultMaskDef() {
34713508

34723509
bool checkLRAAlign = false;
34733510
if (liveAnalysis.livenessClass(G4_GRF)) {
3474-
if ((GlobalRA::useGenericAugAlign(kernel.getPlatformGeneration()) &&
3475-
gra.evenAlignNeeded(dcl)))
3511+
if (GlobalRA::useGenericAugAlign(kernel.getPlatformGeneration()) &&
3512+
gra.getAlignFromAugBucket(dcl) > 0)
34763513
checkLRAAlign = true;
34773514
else if (gra.getAugmentationMask(dcl) ==
34783515
AugmentationMasks::Default32Bit &&
@@ -3485,10 +3522,22 @@ bool Augmentation::markNonDefaultMaskDef() {
34853522
if (dclLR) {
34863523
int s;
34873524
auto phyReg = dclLR->getPhyReg(s);
3488-
if (phyReg && phyReg->asGreg()->getRegNum() % 2 != 0) {
3489-
// If LRA assignment is not 2GRF aligned for then
3525+
unsigned int maxAlign = 2;
3526+
if (gra.use4GRFAlign && gra.getAugmentationMask(dcl) == AugmentationMasks::Default64Bit) {
3527+
maxAlign = 4;
3528+
}
3529+
if (phyReg && phyReg->asGreg()->getRegNum() % maxAlign != 0) {
3530+
// If LRA assignment is not aligned as expected then
34903531
// mark it as non-default. GRA candidates cannot fully
34913532
// overlap with such ranges. Partial overlap is illegal.
3533+
3534+
// TODO: There's a bug here. This branch should execute only if
3535+
// dclLR->getAssigned() == true. If this is false, then
3536+
// dclLR->getPhyReg() is invalid. Once this is fixed, we can
3537+
// re-enable following assert.
3538+
//
3539+
//vISA_ASSERT(!gra.use4GRFAlign,
3540+
// "expecting LRA allocation to be aligned");
34923541
gra.setAugmentationMask(dcl, AugmentationMasks::NonDefault);
34933542
nonDefaultMaskDefFound = true;
34943543
}
@@ -4195,6 +4244,8 @@ bool Interference::isStrongEdgeBetween(const G4_Declare *dcl1,
41954244

41964245
bool Augmentation::weakEdgeNeeded(AugmentationMasks defaultDclMask,
41974246
AugmentationMasks newDclMask) {
4247+
if (gra.use4GRFAlign)
4248+
return false;
41984249
if (useGenericAugAlign) {
41994250
// Weak edge needed in case #GRF exceeds 2
42004251
if (newDclMask == AugmentationMasks::Default64Bit)
@@ -4746,9 +4797,9 @@ void Augmentation::augmentIntfGraph() {
47464797
// to 2GRF except for NoMask variables
47474798
VISA_DEBUG_VERBOSE(std::cout
47484799
<< "Kernel size is SIMD" << kernel.getSimdSize()
4749-
<< " so updating all GRFs to be 2GRF aligned"
4800+
<< " so updating all GRFs to aug align"
47504801
<< "\n");
4751-
gra.evenAlign();
4802+
gra.augAlign();
47524803
}
47534804
gra.updateSubRegAlignment(kernel.getGRFAlign());
47544805
}
@@ -5054,6 +5105,7 @@ void GraphColor::computeDegreeForGRF() {
50545105
// consider weak edges in degree computation
50555106
auto *weakEdges = intf.getCompatibleSparseIntf(lrs[i]->getDcl());
50565107
if (weakEdges) {
5108+
vISA_ASSERT(!gra.use4GRFAlign, "not expecting weak edges");
50575109
for (auto weakNeighbor : *weakEdges) {
50585110
if (!weakNeighbor->getRegVar()->isRegAllocPartaker())
50595111
continue;
@@ -5367,16 +5419,22 @@ void GraphColor::relaxNeighborDegreeGRF(LiveRange *lr) {
53675419
if (!(lr->getIsPseudoNode()) && !(lr->getIsPartialDcl())) {
53685420
unsigned lr_id = lr->getVar()->getId();
53695421
bool lr2EvenAlign = gra.isEvenAligned(lr->getDcl());
5422+
unsigned int lr2AugAlign = gra.getAugAlign(lr->getDcl());
53705423
unsigned lr2_nreg = lr->getNumRegNeeded();
53715424

53725425
// relax degree between 2 nodes
53735426
auto relaxDegree = [&](LiveRange *lr1) {
53745427
if (lr1->getActive() && !lr1->getIsPseudoNode() &&
53755428
!(lr1->getIsPartialDcl())) {
5376-
bool lr1EvenAlign = gra.isEvenAligned(lr1->getDcl());
53775429
unsigned lr1_nreg = lr1->getNumRegNeeded();
5378-
unsigned w =
5379-
edgeWeightGRF(lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
5430+
unsigned w = 0;
5431+
if (gra.use4GRFAlign) {
5432+
unsigned int lr1AugAlign = gra.getAugAlign(lr1->getDcl());
5433+
w = edgeWeightWith4GRF(lr1AugAlign, lr2AugAlign, lr1_nreg, lr2_nreg);
5434+
} else {
5435+
bool lr1EvenAlign = gra.isEvenAligned(lr1->getDcl());
5436+
w = edgeWeightGRF(lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
5437+
}
53805438
VISA_DEBUG_VERBOSE({
53815439
std::cout << "\t relax ";
53825440
lr1->dump();
@@ -5782,9 +5840,15 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF,
57825840
if (!failed_alloc) {
57835841
// When evenAlignNeeded is true, it is binding for correctness
57845842
bool evenAlignNeeded = gra.isEvenAligned(lrVar->getDeclare());
5785-
BankAlign align = evenAlignNeeded ? BankAlign::Even : BankAlign::Either;
5786-
if (allocFromBanks) {
5843+
bool quadAlignNeeded = gra.isQuadAligned(lrVar->getDeclare());
5844+
BankAlign align = BankAlign::Either;
5845+
if (quadAlignNeeded)
5846+
align = BankAlign::QuadGRF;
5847+
else if (evenAlignNeeded)
5848+
align = BankAlign::Even;
57875849

5850+
if (allocFromBanks) {
5851+
vISA_ASSERT(align != BankAlign::QuadGRF, "unexpected value");
57885852
if (!isHybrid && oneGRFBankDivision &&
57895853
(!evenAlignNeeded ||
57905854
builder.getPlatformGeneration() == PlatformGen::GEN9)) {
@@ -10876,12 +10940,20 @@ void GlobalRA::insertRestoreAddr(G4_BB *bb) {
1087610940
// correctness.
1087710941
//
1087810942
unsigned GraphColor::edgeWeightGRF(const LiveRange *lr1, const LiveRange *lr2) {
10879-
bool lr1EvenAlign = gra.isEvenAligned(lr1->getDcl());
10880-
bool lr2EvenAlign = gra.isEvenAligned(lr2->getDcl());
1088110943
unsigned lr1_nreg = lr1->getNumRegNeeded();
1088210944
unsigned lr2_nreg = lr2->getNumRegNeeded();
1088310945

10884-
return edgeWeightGRF(lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
10946+
if (gra.use4GRFAlign) {
10947+
auto lr1Align = gra.getAugAlign(lr1->getDcl());
10948+
auto lr2Align = gra.getAugAlign(lr2->getDcl());
10949+
10950+
return edgeWeightWith4GRF(lr1Align, lr2Align, lr1_nreg, lr2_nreg);
10951+
} else {
10952+
bool lr1EvenAlign = gra.isEvenAligned(lr1->getDcl());
10953+
bool lr2EvenAlign = gra.isEvenAligned(lr2->getDcl());
10954+
10955+
return edgeWeightGRF(lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
10956+
}
1088510957
}
1088610958

1088710959
unsigned GraphColor::edgeWeightARF(const LiveRange *lr1, const LiveRange *lr2) {

visa/GraphColor.h

Lines changed: 76 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,8 @@ class Interference {
778778
return nullptr;
779779
}
780780

781+
size_t numVarsWithWeakEdges() const { return compatibleSparseIntf.size(); }
782+
781783
void init() {
782784
if (useDenseMatrix()) {
783785
auto N = (size_t)rowSize * (size_t)maxId;
@@ -895,15 +897,46 @@ class GraphColor {
895897
return lr1_nreg + lr2_nreg - 1;
896898
}
897899

898-
if (!lr2EvenAlign) {
899-
unsigned sum = lr1_nreg + lr2_nreg;
900+
unsigned sum = lr1_nreg + lr2_nreg;
901+
if (!lr2EvenAlign)
900902
return sum + 1 - ((sum) % 2);
901-
} else if (lr2EvenAlign) {
902-
return lr1_nreg + lr2_nreg - 1 + (lr1_nreg % 2) + (lr2_nreg % 2);
903-
} else {
904-
vISA_ASSERT_UNREACHABLE("should be unreachable");
905-
return 0;
903+
904+
return sum - 1 + (lr1_nreg % 2) + (lr2_nreg % 2);
905+
}
906+
907+
static unsigned edgeWeightWith4GRF(int lr1Align, int lr2Align,
908+
unsigned lr1_nreg, unsigned lr2_nreg) {
909+
if (lr1Align < 4 && lr2Align < 4)
910+
return edgeWeightGRF(lr1Align % 2, lr2Align % 2, lr1_nreg, lr2_nreg);
911+
912+
if (lr2Align == 4) {
913+
if (lr1Align < 2)
914+
return lr1_nreg + lr2_nreg - 1;
915+
if (lr1Align == 2) {
916+
// if (lr2_nreg % 2 == 0) -- lr2 size is even
917+
// return lr2_nreg + lr1_nreg;
918+
// if (lr2_nreg % 2 == 1) -- lr2 size is odd
919+
// return lr2_nreg + lr1_nreg + 1;
920+
921+
return lr1_nreg + lr2_nreg + (lr2_nreg % 2);
922+
} else if (lr1Align == 4) {
923+
if (lr2_nreg % 4 == 0)
924+
// lr2 size is multiple of 4
925+
return lr1_nreg + lr2_nreg;
926+
927+
// if lr2_nreg % 4 == 1 -- lr2 size is 1 + (4*n)
928+
// return lr1_nreg + lr2_nreg + 3;
929+
// if lr2_nreg % 2 == 0 -- lr2 size is 2 + (4*n)
930+
// return lr2_nreg + lr1_nreg + 2;
931+
// if lr2_nreg % 4 == 3 -- lr2 size is 3 + (4*n)
932+
// return lr2_nreg + lr1_nreg + 1;
933+
934+
return lr1_nreg + lr2_nreg + 4 - (lr2_nreg % 4);
935+
}
906936
}
937+
938+
vISA_ASSERT(lr1Align == 4, "unexpected condition");
939+
return edgeWeightWith4GRF(lr2Align, lr1Align, lr2_nreg, lr1_nreg);
907940
}
908941

909942
void computeDegreeForGRF();
@@ -985,7 +1018,7 @@ struct RAVarInfo {
9851018
unsigned subOff = 0;
9861019
std::vector<BundleConflict> bundleConflicts;
9871020
G4_SubReg_Align subAlign = G4_SubReg_Align::Any;
988-
bool isEvenAlign = false;
1021+
int augAlignInGRF = 0;
9891022
AugmentationMasks augMask = AugmentationMasks::Undetermined;
9901023
};
9911024

@@ -1110,6 +1143,8 @@ class GlobalRA {
11101143
// The pre assigned forbidden register bits for different kinds
11111144
ForbiddenRegs fbdRegs;
11121145

1146+
const bool use4GRFAlign = false;
1147+
11131148
private:
11141149
template <class REGION_TYPE>
11151150
static unsigned getRegionDisp(REGION_TYPE *region, const IR_Builder &irb);
@@ -1572,12 +1607,35 @@ class GlobalRA {
15721607
return true;
15731608
}
15741609

1575-
bool isEvenAligned(const G4_Declare *dcl) const {
1576-
return getVar(dcl).isEvenAlign;
1610+
bool isQuadAligned(const G4_Declare *dcl) const {
1611+
auto augAlign = getAugAlign(dcl);
1612+
return augAlign == 4;
1613+
}
1614+
1615+
bool isEvenAligned(const G4_Declare* dcl) const {
1616+
auto augAlign = getAugAlign(dcl);
1617+
return augAlign > 0 && augAlign % 2 == 0;
1618+
}
1619+
1620+
int getAugAlign(const G4_Declare *dcl) const {
1621+
return getVar(dcl).augAlignInGRF;
1622+
}
1623+
1624+
void forceQuadAlign(const G4_Declare *dcl) { setAugAlign(dcl, 4); }
1625+
1626+
void resetAlign(const G4_Declare *dcl) { setAugAlign(dcl, 0); }
1627+
1628+
// Due to legacy usage, this method takes a boolean that, when set,
1629+
// causes alignment to be set to Even (2). When boolean flag is
1630+
// reset, it also resets alignment to Either (0).
1631+
void setEvenAligned(const G4_Declare *dcl, bool align) {
1632+
setAugAlign(dcl, align ? 2 : 0);
15771633
}
15781634

1579-
void setEvenAligned(const G4_Declare *dcl, bool e) {
1580-
allocVar(dcl).isEvenAlign = e;
1635+
void setAugAlign(const G4_Declare *dcl, int align) {
1636+
vISA_ASSERT(align <= 2 || use4GRFAlign, "unexpected alignment");
1637+
vISA_ASSERT(align <= 4, "unsupported alignment");
1638+
allocVar(dcl).augAlignInGRF = align;
15811639
}
15821640

15831641
BankAlign getBankAlign(const G4_Declare *) const;
@@ -1592,7 +1650,8 @@ class GlobalRA {
15921650
useLscForNonStackCallSpillFill(
15931651
k.fg.builder->useLscForNonStackSpillFill()),
15941652
useLscForScatterSpill(k.fg.builder->supportsLSC() &&
1595-
k.fg.builder->getOption(vISA_scatterSpill)) {
1653+
k.fg.builder->getOption(vISA_scatterSpill)),
1654+
use4GRFAlign(k.fg.builder->supports4GRFAlign()) {
15961655
vars.resize(k.Declares.size());
15971656

15981657
if (kernel.getOptions()->getOption(vISA_VerifyAugmentation)) {
@@ -1616,8 +1675,9 @@ class GlobalRA {
16161675
static uint32_t getRefCount(int loopNestLevel);
16171676
void updateSubRegAlignment(G4_SubReg_Align subAlign);
16181677
bool isChannelSliced();
1619-
void evenAlign();
1620-
bool evenAlignNeeded(G4_Declare *);
1678+
// Used by LRA/GRA/hybrid RA
1679+
void augAlign();
1680+
int getAlignFromAugBucket(G4_Declare *);
16211681
void getBankAlignment(LiveRange *lr, BankAlign &align);
16221682
void printLiveIntervals();
16231683
void reportUndefinedUses(LivenessAnalysis &liveAnalysis, G4_BB *bb,
@@ -1702,7 +1762,7 @@ class GlobalRA {
17021762
}
17031763

17041764
void copyAlignment(G4_Declare *dst, G4_Declare *src) {
1705-
setEvenAligned(dst, isEvenAligned(src));
1765+
setAugAlign(dst, getAugAlign(src));
17061766
setSubRegAlign(dst, getSubRegAlign(src));
17071767
}
17081768

0 commit comments

Comments
 (0)