diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index eacc67cd5a475..bbdbbef9c54ab 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -149,6 +149,11 @@ static cl::opt UseLIRCodeSizeHeurs( "with -Os/-Oz"), cl::init(true), cl::Hidden); +static cl::opt ForceMemsetPatternIntrinsic( + "loop-idiom-force-memset-pattern-intrinsic", + cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false), + cl::Hidden); + namespace { class LoopIdiomRecognize { @@ -322,10 +327,15 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) { L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs; HasMemset = TLI->has(LibFunc_memset); + // TODO: Unconditionally enable use of the memset pattern intrinsic (or at + // least, opt-in via target hook) once we are confident it will never result + // in worse codegen than without. For now, use it only when the target + // supports memset_pattern16 libcall (or unless this is overridden by + // command line option). HasMemsetPattern = TLI->has(LibFunc_memset_pattern16); HasMemcpy = TLI->has(LibFunc_memcpy); - if (HasMemset || HasMemsetPattern || HasMemcpy) + if (HasMemset || HasMemsetPattern || ForceMemsetPatternIntrinsic || HasMemcpy) if (SE->hasLoopInvariantBackedgeTakenCount(L)) return runOnCountableLoop(); @@ -411,14 +421,12 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { if (Size > 16) return nullptr; - // If the constant is exactly 16 bytes, just use it. - if (Size == 16) - return C; + // For now, don't handle types that aren't int, floats, or pointers. + if (!isa(C) && !isa(C) && + !isa(C->getType())) + return nullptr; - // Otherwise, we'll use an array of the constants. - unsigned ArraySize = 16 / Size; - ArrayType *AT = ArrayType::get(V->getType(), ArraySize); - return ConstantArray::get(AT, std::vector(ArraySize, C)); + return C; } LoopIdiomRecognize::LegalStoreKind @@ -482,8 +490,9 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) { // It looks like we can use SplatValue. return LegalStoreKind::Memset; } - if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset && - // Don't create memset_pattern16s with address spaces. + if (!UnorderedAtomic && (HasMemsetPattern || ForceMemsetPatternIntrinsic) && + !DisableLIRP::Memset && + // Don't create memset.pattern intrinsic calls with address spaces. StorePtr->getType()->getPointerAddressSpace() == 0 && getMemSetPatternValue(StoredVal, DL)) { // It looks like we can use PatternValue! @@ -1083,53 +1092,92 @@ bool LoopIdiomRecognize::processLoopStridedStore( return Changed; // Okay, everything looks good, insert the memset. + // MemsetArg is the number of bytes for the memset libcall, and the number + // of pattern repetitions if the memset.pattern intrinsic is being used. + Value *MemsetArg; + std::optional BytesWritten; + + if (PatternValue && (HasMemsetPattern || ForceMemsetPatternIntrinsic)) { + const SCEV *TripCountS = + SE->getTripCountFromExitCount(BECount, IntIdxTy, CurLoop); + if (!Expander.isSafeToExpand(TripCountS)) + return Changed; + const SCEVConstant *ConstStoreSize = dyn_cast(StoreSizeSCEV); + if (!ConstStoreSize) + return Changed; + Value *TripCount = Expander.expandCodeFor(TripCountS, IntIdxTy, + Preheader->getTerminator()); + uint64_t PatternRepsPerTrip = + (ConstStoreSize->getValue()->getZExtValue() * 8) / + DL->getTypeSizeInBits(PatternValue->getType()); + // If ConstStoreSize is not equal to the width of PatternValue, then + // MemsetArg is TripCount * (ConstStoreSize/PatternValueWidth). Else + // MemSetArg is just TripCount. + MemsetArg = + PatternRepsPerTrip == 1 + ? TripCount + : Builder.CreateMul(TripCount, + Builder.getIntN(IntIdxTy->getIntegerBitWidth(), + PatternRepsPerTrip)); + if (auto *CI = dyn_cast(TripCount)) + BytesWritten = + CI->getZExtValue() * ConstStoreSize->getValue()->getZExtValue(); + } else { + const SCEV *NumBytesS = + getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE); - const SCEV *NumBytesS = - getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE); - - // TODO: ideally we should still be able to generate memset if SCEV expander - // is taught to generate the dependencies at the latest point. - if (!Expander.isSafeToExpand(NumBytesS)) - return Changed; - - Value *NumBytes = - Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator()); + // TODO: ideally we should still be able to generate memset if SCEV expander + // is taught to generate the dependencies at the latest point. + if (!Expander.isSafeToExpand(NumBytesS)) + return Changed; + MemsetArg = + Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator()); + if (auto *CI = dyn_cast(MemsetArg)) + BytesWritten = CI->getZExtValue(); + } + assert(MemsetArg && "MemsetArg should have been set"); - if (!SplatValue && !isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) + if (!SplatValue && !(ForceMemsetPatternIntrinsic || + isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16))) return Changed; AAMDNodes AATags = TheStore->getAAMetadata(); for (Instruction *Store : Stores) AATags = AATags.merge(Store->getAAMetadata()); - if (auto CI = dyn_cast(NumBytes)) - AATags = AATags.extendTo(CI->getZExtValue()); + if (BytesWritten) + AATags = AATags.extendTo(BytesWritten.value()); else AATags = AATags.extendTo(-1); CallInst *NewCall; if (SplatValue) { NewCall = Builder.CreateMemSet( - BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment), + BasePtr, SplatValue, MemsetArg, MaybeAlign(StoreAlignment), /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias); } else { - assert (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)); - // Everything is emitted in default address space - Type *Int8PtrTy = DestInt8PtrTy; - - StringRef FuncName = "memset_pattern16"; - FunctionCallee MSP = getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16, - Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy); - inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI); - - // Otherwise we should form a memset_pattern16. PatternValue is known to be - // an constant array of 16-bytes. Plop the value into a mergable global. - GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, - GlobalValue::PrivateLinkage, - PatternValue, ".memset_pattern"); - GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these. - GV->setAlignment(Align(16)); - Value *PatternPtr = GV; - NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); + assert(ForceMemsetPatternIntrinsic || + isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)); + assert(isa(StoreSizeSCEV) && "Expected constant store size"); + + Value *PatternArg; + IntegerType *PatternArgTy = + Builder.getIntNTy(DL->getTypeSizeInBits(PatternValue->getType())); + + if (isa(PatternValue)) + PatternArg = PatternValue; + else if (isa(PatternValue)) + PatternArg = Builder.CreateBitCast(PatternValue, PatternArgTy); + else if (isa(PatternValue->getType())) + PatternArg = Builder.CreatePtrToInt(PatternValue, PatternArgTy); + else + report_fatal_error("Unexpected PatternValue type"); + + NewCall = Builder.CreateIntrinsic(Intrinsic::experimental_memset_pattern, + {DestInt8PtrTy, PatternArgTy, IntIdxTy}, + {BasePtr, PatternArg, MemsetArg, + ConstantInt::getFalse(M->getContext())}); + if (StoreAlignment) + cast(NewCall)->setDestAlignment(*StoreAlignment); // Set the TBAA info if present. if (AATags.TBAA) @@ -1419,7 +1467,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( AAMDNodes AATags = TheLoad->getAAMetadata(); AAMDNodes StoreAATags = TheStore->getAAMetadata(); AATags = AATags.merge(StoreAATags); - if (auto CI = dyn_cast(NumBytes)) + if (auto *CI = dyn_cast(NumBytes)) AATags = AATags.extendTo(CI->getZExtValue()); else AATags = AATags.extendTo(-1); diff --git a/llvm/test/Transforms/LoopIdiom/RISCV/memset-pattern.ll b/llvm/test/Transforms/LoopIdiom/RISCV/memset-pattern.ll new file mode 100644 index 0000000000000..fbe858199d190 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/RISCV/memset-pattern.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; RUN: opt -passes=loop-idiom -mtriple=riscv64 < %s -S | FileCheck %s +; RUN: opt -passes=loop-idiom -mtriple=riscv64 -loop-idiom-force-memset-pattern-intrinsic < %s -S \ +; RUN: | FileCheck -check-prefix=CHECK-INTRIN %s + +define dso_local void @double_memset(ptr nocapture %p) { +; CHECK-LABEL: @double_memset( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P:%.*]], i64 [[I_07]] +; CHECK-NEXT: store double 3.141590e+00, ptr [[PTR1]], align 1 +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; +; CHECK-INTRIN-LABEL: @double_memset( +; CHECK-INTRIN-NEXT: entry: +; CHECK-INTRIN-NEXT: call void @llvm.experimental.memset.pattern.p0.i64.i64(ptr align 1 [[P:%.*]], i64 4614256650576692846, i64 16, i1 false) +; CHECK-INTRIN-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-INTRIN: for.cond.cleanup: +; CHECK-INTRIN-NEXT: ret void +; CHECK-INTRIN: for.body: +; CHECK-INTRIN-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-INTRIN-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[I_07]] +; CHECK-INTRIN-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1 +; CHECK-INTRIN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16 +; CHECK-INTRIN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; +entry: + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %ptr1 = getelementptr inbounds double, ptr %p, i64 %i.07 + store double 3.14159e+00, ptr %ptr1, align 1 + %inc = add nuw nsw i64 %i.07, 1 + %exitcond.not = icmp eq i64 %inc, 16 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} +;. +; CHECK-INTRIN: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +;. diff --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll index 45af783026575..95b4ee729b40d 100644 --- a/llvm/test/Transforms/LoopIdiom/basic.ll +++ b/llvm/test/Transforms/LoopIdiom/basic.ll @@ -7,8 +7,6 @@ target triple = "x86_64-apple-darwin10.0.0" ;. ; CHECK: @G = global i32 5 ; CHECK: @g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16 -; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 1, i32 1, i32 1, i32 1], align 16 -; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x ptr] [ptr @G, ptr @G], align 16 ;. define void @test1(ptr %Base, i64 %Size) nounwind ssp { ; CHECK-LABEL: @test1( @@ -533,7 +531,7 @@ for.end13: ; preds = %for.inc10 define void @test11_pattern(ptr nocapture %P) nounwind ssp { ; CHECK-LABEL: @test11_pattern( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern, i64 40000) +; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[P:%.*]], i32 1, i64 10000, i1 false) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ] @@ -596,7 +594,7 @@ for.end: ; preds = %for.body define void @test13_pattern(ptr nocapture %P) nounwind ssp { ; CHECK-LABEL: @test13_pattern( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.1, i64 80000) +; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i64.i64(ptr align 4 [[P:%.*]], i64 ptrtoint (ptr @G to i64), i64 10000, i1 false) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll b/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll index 9bbc732f8d07d..62e72c45da1ee 100644 --- a/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll +++ b/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll @@ -6,15 +6,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-darwin10.0.0" -;. -; CHECK: @.memset_pattern = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16 -; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16 -; CHECK: @.memset_pattern.2 = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16 -;. define dso_local void @double_memset(ptr nocapture %p) { ; CHECK-LABEL: @double_memset( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern, i64 128), !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i64.i64(ptr align 1 [[P:%.*]], i64 4614256650576692846, i64 16, i1 false), !tbaa [[TBAA0:![0-9]+]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void @@ -44,7 +39,7 @@ for.body: define dso_local void @struct_memset(ptr nocapture %p) { ; CHECK-LABEL: @struct_memset( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.1, i64 128), !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i64.i64(ptr align 1 [[P:%.*]], i64 4614256650576692846, i64 16, i1 false), !tbaa [[TBAA4:![0-9]+]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void @@ -73,8 +68,7 @@ for.body: define dso_local void @var_memset(ptr nocapture %p, i64 %len) { ; CHECK-LABEL: @var_memset( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[LEN:%.*]], 3 -; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.2, i64 [[TMP0]]) +; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i64.i64(ptr align 1 [[P:%.*]], i64 4614256650576692846, i64 [[TMP0:%.*]], i1 false) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void @@ -82,7 +76,7 @@ define dso_local void @var_memset(ptr nocapture %p, i64 %len) { ; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[I_07]] ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[LEN]] +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[TMP0]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] ; entry: @@ -116,7 +110,7 @@ for.body: !21 = !{!22, !20, i64 0} !22 = !{!"B", !20, i64 0} ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ;. ; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]], i64 0} diff --git a/llvm/test/Transforms/LoopIdiom/struct_pattern.ll b/llvm/test/Transforms/LoopIdiom/struct_pattern.ll index b4ce766fdd73b..4b33f30b59f80 100644 --- a/llvm/test/Transforms/LoopIdiom/struct_pattern.ll +++ b/llvm/test/Transforms/LoopIdiom/struct_pattern.ll @@ -16,11 +16,6 @@ target triple = "x86_64-apple-darwin10.0.0" ;} -;. -; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 -; CHECK: @.memset_pattern.1 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 -; CHECK: @.memset_pattern.2 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 -;. define void @bar1(ptr %f, i32 %n) nounwind ssp { ; CHECK-LABEL: @bar1( ; CHECK-NEXT: entry: @@ -28,8 +23,8 @@ define void @bar1(ptr %f, i32 %n) nounwind ssp { ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: call void @memset_pattern16(ptr [[F:%.*]], ptr @.memset_pattern, i64 [[TMP1]]) +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[F:%.*]], i32 2, i64 [[TMP1]], i1 false) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -82,8 +77,8 @@ define void @bar2(ptr %f, i32 %n) nounwind ssp { ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: call void @memset_pattern16(ptr [[F:%.*]], ptr @.memset_pattern.1, i64 [[TMP1]]) +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[F:%.*]], i32 2, i64 [[TMP1]], i1 false) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -142,7 +137,8 @@ define void @bar3(ptr nocapture %f, i32 %n) nounwind ssp { ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 3 ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP1]], [[TMP4]] ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[F:%.*]], i64 [[TMP5]] -; CHECK-NEXT: call void @memset_pattern16(ptr [[UGLYGEP]], ptr @.memset_pattern.2, i64 [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[UGLYGEP]], i32 2, i64 [[TMP7]], i1 false) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -292,5 +288,5 @@ for.end: ; preds = %for.end.loopexit, % } ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ;. diff --git a/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll b/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll index bca922094eebb..42db236f1d6b9 100644 --- a/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll +++ b/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll @@ -11,9 +11,6 @@ target triple = "x86_64-apple-darwin10.0.0" ; f[i+1] = 0; ; } ;} -;. -; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 -;. define void @test(ptr %f, i32 %n) nounwind ssp { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: @@ -81,9 +78,9 @@ define void @test_pattern(ptr %f, i32 %n) nounwind ssp { ; CHECK: for.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[MUL]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 8 -; CHECK-NEXT: call void @memset_pattern16(ptr [[F:%.*]], ptr @.memset_pattern, i32 [[TMP3]]) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 2 +; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i32(ptr align 4 [[F:%.*]], i32 2, i32 [[TMP3]], i1 false) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -126,5 +123,4 @@ for.end: ; preds = %for.end.loopexit, % ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. diff --git a/llvm/test/Transforms/LoopIdiom/unroll.ll b/llvm/test/Transforms/LoopIdiom/unroll.ll index 621082b9eeb6f..63a960e4f3b7c 100644 --- a/llvm/test/Transforms/LoopIdiom/unroll.ll +++ b/llvm/test/Transforms/LoopIdiom/unroll.ll @@ -11,9 +11,6 @@ target triple = "x86_64-apple-darwin10.0.0" ; f[i+1] = 0; ; } ;} -;. -; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 -;. define void @test(ptr %f, i32 %n) nounwind ssp { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: @@ -84,9 +81,9 @@ define void @test_pattern(ptr %f, i32 %n) nounwind ssp { ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[MUL]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -1 ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 8 -; CHECK-NEXT: call void @memset_pattern16(ptr [[F:%.*]], ptr @.memset_pattern, i64 [[TMP4]]) +; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[F:%.*]], i32 2, i64 [[TMP4]], i1 false) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -130,5 +127,4 @@ for.end: ; preds = %for.end.loopexit, % ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;.