@@ -31,6 +31,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
3131// ===----------------------------------------------------------------------===//
3232
3333#include " GenXLowerAggrCopies.h"
34+ #include " GenX.h"
3435#include " llvm/Analysis/TargetTransformInfo.h"
3536#include " llvm/CodeGen/StackProtector.h"
3637#include " llvm/IR/Constants.h"
@@ -47,10 +48,13 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
4748#include " llvm/Support/Debug.h"
4849#include " llvm/Transforms/Utils/BasicBlockUtils.h"
4950#include " llvm/Transforms/Utils/LowerMemIntrinsics.h"
50- #include " Probe/Assertion.h"
5151
52+ #include " Probe/Assertion.h"
5253#include " llvmWrapper/IR/DerivedTypes.h"
5354
55+ #include < tuple>
56+ #include < vector>
57+
5458#define DEBUG_TYPE " GENX_LOWERAGGRCOPIES"
5559
5660using namespace llvm ;
@@ -91,6 +95,98 @@ struct GenXLowerAggrCopies : public FunctionPass {
9195
9296char GenXLowerAggrCopies::ID = 0 ;
9397
98+ namespace {
99+ struct SliceInfo {
100+ int Offset;
101+ int Width;
102+ };
103+ } // namespace
104+
105+ static std::vector<SliceInfo> getLegalLengths (int TotalLength) {
106+ std::vector<SliceInfo> Slices;
107+ for (int Offset = 0 ; TotalLength;) {
108+ int Width = PowerOf2Floor (TotalLength);
109+ Slices.push_back ({Offset, Width});
110+ Offset += Width;
111+ TotalLength -= Width;
112+ }
113+ return std::move (Slices);
114+ }
115+
116+ // Original memset intrinsic fills memory with 8-bit values.
117+ // This function checks whether bigger type can be used (e.g. storing by 32-bit
118+ // values).
119+ // Desired type size is provided with \p CoalescedTySize parametr and given in
120+ // bytes.
121+ static bool memSetCanBeCoalesced (MemSetInst &MemSet, int CoalescedTySize) {
122+ auto OrigLength = cast<ConstantInt>(MemSet.getLength ())->getSExtValue ();
123+ IGC_ASSERT_MESSAGE (MemSet.getValue ()->getType ()->getScalarSizeInBits () ==
124+ genx::ByteBits,
125+ " memset is expected to store by bytes" );
126+ IGC_ASSERT_MESSAGE (CoalescedTySize >= 1 && isPowerOf2_32 (CoalescedTySize),
127+ " wrong argument: invalid CoalescedTySize" );
128+ return OrigLength % CoalescedTySize == 0 &&
129+ static_cast <int >(MemSet.getDestAlignment ()) >= CoalescedTySize;
130+ }
131+
132+ // Original memset intrinsic fills memory with 8-bit values.
133+ // This function checks whether bigger type can be used (e.g. storing by 32-bit
134+ // values).
135+ // New coalesced value and corresponding base address and length are returned
136+ // respectively.
137+ // New instructions may be inserted before the \p MemSet to produce these new
138+ // values.
139+ static std::tuple<Value &, Value &, int >
140+ defineOptimalValueAndLength (MemSetInst &MemSet) {
141+ auto OrigLength = cast<ConstantInt>(MemSet.getLength ())->getSExtValue ();
142+ Value &OrigSetVal = *MemSet.getValue ();
143+ Value &OrigBaseAddr = *MemSet.getRawDest ();
144+
145+ // Because DWord is better than Byte and causes minimal problems.
146+ // OWord can be better but but it requires more code.
147+ constexpr int CoalescedTySize = genx::DWordBytes;
148+ if (!memSetCanBeCoalesced (MemSet, CoalescedTySize))
149+ return {OrigSetVal, OrigBaseAddr, OrigLength};
150+
151+ IRBuilder<> IRB{&MemSet};
152+ auto *PreNewSetVal = IRB.CreateVectorSplat (
153+ CoalescedTySize, &OrigSetVal, OrigSetVal.getName () + " .pre.coalesce" );
154+ auto *NewSetVal = IRB.CreateBitCast (PreNewSetVal, IRB.getInt32Ty (),
155+ OrigSetVal.getName () + " .coalesce" );
156+ auto DstAS = cast<PointerType>(OrigBaseAddr.getType ())->getAddressSpace ();
157+ auto *NewBaseAddr =
158+ IRB.CreateBitCast (&OrigBaseAddr, IRB.getInt32Ty ()->getPointerTo (DstAS),
159+ OrigBaseAddr.getName () + " .align" );
160+ return {*NewSetVal, *NewBaseAddr, OrigLength / CoalescedTySize};
161+ }
162+
163+ // Fills memory section/slice defined by \p Slice and \p BaseAddr parameters
164+ // with \p SetVal values. Memory is filled by a vector store instruction.
165+ static void setMemorySliceWithVecStore (SliceInfo Slice, Value &SetVal,
166+ Value &BaseAddr,
167+ Instruction *InsertionPt) {
168+ IGC_ASSERT_MESSAGE (
169+ InsertionPt,
170+ " wrong argument: insertion point must be a valid instruction" );
171+ IGC_ASSERT_MESSAGE (Slice.Offset >= 0 && isPowerOf2_32 (Slice.Width ),
172+ " illegal slice is provided" );
173+ IGC_ASSERT_MESSAGE (SetVal.getType () ==
174+ BaseAddr.getType ()->getPointerElementType (),
175+ " value and pointer types must correspond" );
176+
177+ auto *VecTy = IGCLLVM::FixedVectorType::get (SetVal.getType (), Slice.Width );
178+ IRBuilder<> IRB (InsertionPt);
179+ Value *WriteOut = IRB.CreateVectorSplat (Slice.Width , &SetVal);
180+ auto *DstAddr = &BaseAddr;
181+ if (Slice.Offset != 0 )
182+ DstAddr = IRB.CreateGEP (BaseAddr.getType ()->getPointerElementType (),
183+ &BaseAddr, IRB.getInt32 (Slice.Offset ),
184+ BaseAddr.getName () + " .addr.offset" );
185+ auto DstAS = cast<PointerType>(DstAddr->getType ())->getAddressSpace ();
186+ auto *StoreVecPtr = IRB.CreateBitCast (DstAddr, VecTy->getPointerTo (DstAS));
187+ IRB.CreateStore (WriteOut, StoreVecPtr);
188+ }
189+
94190bool GenXLowerAggrCopies::runOnFunction (Function &F) {
95191 SmallVector<MemIntrinsic *, 4 > MemCalls;
96192
@@ -135,26 +231,14 @@ bool GenXLowerAggrCopies::runOnFunction(Function &F) {
135231 } else {
136232 expandMemMoveAsLoop (Memmove);
137233 }
138- } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
234+ } else if (MemSetInst *MemSet = dyn_cast<MemSetInst>(MemCall)) {
139235 if (doLinearExpand) {
140- llvm::Value *SetVal = Memset->getValue ();
141- llvm::Value *LenVal = Memset->getLength ();
142- IGC_ASSERT (isa<Constant>(LenVal));
143- IGC_ASSERT (SetVal->getType ()->getScalarSizeInBits () == 8 );
144- auto Len = (unsigned )cast<ConstantInt>(LenVal)->getZExtValue ();
145- auto VecTy = IGCLLVM::FixedVectorType::get (SetVal->getType (), Len);
146- Value *WriteOut = UndefValue::get (VecTy);
147- IRBuilder<> IRB (Memset);
148- for (unsigned i = 0 ; i < Len; ++i) {
149- WriteOut = IRB.CreateInsertElement (WriteOut, SetVal, IRB.getInt32 (i));
150- }
151- auto DstAddr = Memset->getRawDest ();
152- unsigned dstAS = cast<PointerType>(DstAddr->getType ())->getAddressSpace ();
153- auto StorePtrV =
154- IRB.CreateBitCast (DstAddr, VecTy->getPointerTo (dstAS));
155- IRB.CreateStore (WriteOut, StorePtrV);
236+ auto &&[SetVal, BaseAddr, Len] = defineOptimalValueAndLength (*MemSet);
237+ std::vector<SliceInfo> LegalLengths = getLegalLengths (Len);
238+ for (SliceInfo Slice : LegalLengths)
239+ setMemorySliceWithVecStore (Slice, SetVal, BaseAddr, MemSet);
156240 } else {
157- expandMemSetAsLoop (Memset );
241+ expandMemSetAsLoop (MemSet );
158242 }
159243 }
160244 MemCall->eraseFromParent ();
0 commit comments