@@ -31,6 +31,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
31
// ===----------------------------------------------------------------------===//
32
32
33
33
#include " GenXLowerAggrCopies.h"
34
+ #include " GenX.h"
34
35
#include " llvm/Analysis/TargetTransformInfo.h"
35
36
#include " llvm/CodeGen/StackProtector.h"
36
37
#include " llvm/IR/Constants.h"
@@ -47,10 +48,13 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
47
48
#include " llvm/Support/Debug.h"
48
49
#include " llvm/Transforms/Utils/BasicBlockUtils.h"
49
50
#include " llvm/Transforms/Utils/LowerMemIntrinsics.h"
50
- #include " Probe/Assertion.h"
51
51
52
+ #include " Probe/Assertion.h"
52
53
#include " llvmWrapper/IR/DerivedTypes.h"
53
54
55
+ #include < tuple>
56
+ #include < vector>
57
+
54
58
#define DEBUG_TYPE " GENX_LOWERAGGRCOPIES"
55
59
56
60
using namespace llvm ;
@@ -91,6 +95,98 @@ struct GenXLowerAggrCopies : public FunctionPass {
91
95
92
96
char GenXLowerAggrCopies::ID = 0 ;
93
97
98
+ namespace {
99
+ struct SliceInfo {
100
+ int Offset;
101
+ int Width;
102
+ };
103
+ } // namespace
104
+
105
+ static std::vector<SliceInfo> getLegalLengths (int TotalLength) {
106
+ std::vector<SliceInfo> Slices;
107
+ for (int Offset = 0 ; TotalLength;) {
108
+ int Width = PowerOf2Floor (TotalLength);
109
+ Slices.push_back ({Offset, Width});
110
+ Offset += Width;
111
+ TotalLength -= Width;
112
+ }
113
+ return std::move (Slices);
114
+ }
115
+
116
+ // Original memset intrinsic fills memory with 8-bit values.
117
+ // This function checks whether bigger type can be used (e.g. storing by 32-bit
118
+ // values).
119
+ // Desired type size is provided with \p CoalescedTySize parametr and given in
120
+ // bytes.
121
+ static bool memSetCanBeCoalesced (MemSetInst &MemSet, int CoalescedTySize) {
122
+ auto OrigLength = cast<ConstantInt>(MemSet.getLength ())->getSExtValue ();
123
+ IGC_ASSERT_MESSAGE (MemSet.getValue ()->getType ()->getScalarSizeInBits () ==
124
+ genx::ByteBits,
125
+ " memset is expected to store by bytes" );
126
+ IGC_ASSERT_MESSAGE (CoalescedTySize >= 1 && isPowerOf2_32 (CoalescedTySize),
127
+ " wrong argument: invalid CoalescedTySize" );
128
+ return OrigLength % CoalescedTySize == 0 &&
129
+ static_cast <int >(MemSet.getDestAlignment ()) >= CoalescedTySize;
130
+ }
131
+
132
+ // Original memset intrinsic fills memory with 8-bit values.
133
+ // This function checks whether bigger type can be used (e.g. storing by 32-bit
134
+ // values).
135
+ // New coalesced value and corresponding base address and length are returned
136
+ // respectively.
137
+ // New instructions may be inserted before the \p MemSet to produce these new
138
+ // values.
139
+ static std::tuple<Value &, Value &, int >
140
+ defineOptimalValueAndLength (MemSetInst &MemSet) {
141
+ auto OrigLength = cast<ConstantInt>(MemSet.getLength ())->getSExtValue ();
142
+ Value &OrigSetVal = *MemSet.getValue ();
143
+ Value &OrigBaseAddr = *MemSet.getRawDest ();
144
+
145
+ // Because DWord is better than Byte and causes minimal problems.
146
+ // OWord can be better but but it requires more code.
147
+ constexpr int CoalescedTySize = genx::DWordBytes;
148
+ if (!memSetCanBeCoalesced (MemSet, CoalescedTySize))
149
+ return {OrigSetVal, OrigBaseAddr, OrigLength};
150
+
151
+ IRBuilder<> IRB{&MemSet};
152
+ auto *PreNewSetVal = IRB.CreateVectorSplat (
153
+ CoalescedTySize, &OrigSetVal, OrigSetVal.getName () + " .pre.coalesce" );
154
+ auto *NewSetVal = IRB.CreateBitCast (PreNewSetVal, IRB.getInt32Ty (),
155
+ OrigSetVal.getName () + " .coalesce" );
156
+ auto DstAS = cast<PointerType>(OrigBaseAddr.getType ())->getAddressSpace ();
157
+ auto *NewBaseAddr =
158
+ IRB.CreateBitCast (&OrigBaseAddr, IRB.getInt32Ty ()->getPointerTo (DstAS),
159
+ OrigBaseAddr.getName () + " .align" );
160
+ return {*NewSetVal, *NewBaseAddr, OrigLength / CoalescedTySize};
161
+ }
162
+
163
+ // Fills memory section/slice defined by \p Slice and \p BaseAddr parameters
164
+ // with \p SetVal values. Memory is filled by a vector store instruction.
165
+ static void setMemorySliceWithVecStore (SliceInfo Slice, Value &SetVal,
166
+ Value &BaseAddr,
167
+ Instruction *InsertionPt) {
168
+ IGC_ASSERT_MESSAGE (
169
+ InsertionPt,
170
+ " wrong argument: insertion point must be a valid instruction" );
171
+ IGC_ASSERT_MESSAGE (Slice.Offset >= 0 && isPowerOf2_32 (Slice.Width ),
172
+ " illegal slice is provided" );
173
+ IGC_ASSERT_MESSAGE (SetVal.getType () ==
174
+ BaseAddr.getType ()->getPointerElementType (),
175
+ " value and pointer types must correspond" );
176
+
177
+ auto *VecTy = IGCLLVM::FixedVectorType::get (SetVal.getType (), Slice.Width );
178
+ IRBuilder<> IRB (InsertionPt);
179
+ Value *WriteOut = IRB.CreateVectorSplat (Slice.Width , &SetVal);
180
+ auto *DstAddr = &BaseAddr;
181
+ if (Slice.Offset != 0 )
182
+ DstAddr = IRB.CreateGEP (BaseAddr.getType ()->getPointerElementType (),
183
+ &BaseAddr, IRB.getInt32 (Slice.Offset ),
184
+ BaseAddr.getName () + " .addr.offset" );
185
+ auto DstAS = cast<PointerType>(DstAddr->getType ())->getAddressSpace ();
186
+ auto *StoreVecPtr = IRB.CreateBitCast (DstAddr, VecTy->getPointerTo (DstAS));
187
+ IRB.CreateStore (WriteOut, StoreVecPtr);
188
+ }
189
+
94
190
bool GenXLowerAggrCopies::runOnFunction (Function &F) {
95
191
SmallVector<MemIntrinsic *, 4 > MemCalls;
96
192
@@ -135,26 +231,14 @@ bool GenXLowerAggrCopies::runOnFunction(Function &F) {
135
231
} else {
136
232
expandMemMoveAsLoop (Memmove);
137
233
}
138
- } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
234
+ } else if (MemSetInst *MemSet = dyn_cast<MemSetInst>(MemCall)) {
139
235
if (doLinearExpand) {
140
- llvm::Value *SetVal = Memset->getValue ();
141
- llvm::Value *LenVal = Memset->getLength ();
142
- IGC_ASSERT (isa<Constant>(LenVal));
143
- IGC_ASSERT (SetVal->getType ()->getScalarSizeInBits () == 8 );
144
- auto Len = (unsigned )cast<ConstantInt>(LenVal)->getZExtValue ();
145
- auto VecTy = IGCLLVM::FixedVectorType::get (SetVal->getType (), Len);
146
- Value *WriteOut = UndefValue::get (VecTy);
147
- IRBuilder<> IRB (Memset);
148
- for (unsigned i = 0 ; i < Len; ++i) {
149
- WriteOut = IRB.CreateInsertElement (WriteOut, SetVal, IRB.getInt32 (i));
150
- }
151
- auto DstAddr = Memset->getRawDest ();
152
- unsigned dstAS = cast<PointerType>(DstAddr->getType ())->getAddressSpace ();
153
- auto StorePtrV =
154
- IRB.CreateBitCast (DstAddr, VecTy->getPointerTo (dstAS));
155
- IRB.CreateStore (WriteOut, StorePtrV);
236
+ auto &&[SetVal, BaseAddr, Len] = defineOptimalValueAndLength (*MemSet);
237
+ std::vector<SliceInfo> LegalLengths = getLegalLengths (Len);
238
+ for (SliceInfo Slice : LegalLengths)
239
+ setMemorySliceWithVecStore (Slice, SetVal, BaseAddr, MemSet);
156
240
} else {
157
- expandMemSetAsLoop (Memset );
241
+ expandMemSetAsLoop (MemSet );
158
242
}
159
243
}
160
244
MemCall->eraseFromParent ();
0 commit comments