Skip to content

[LoopIdiom] Select llvm.experimental.memset.pattern intrinsic rather than memset_pattern16 libcall #126736

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 91 additions & 43 deletions llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,11 @@ static cl::opt<bool> UseLIRCodeSizeHeurs(
"with -Os/-Oz"),
cl::init(true), cl::Hidden);

static cl::opt<bool> ForceMemsetPatternIntrinsic(
"loop-idiom-force-memset-pattern-intrinsic",
cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false),
cl::Hidden);

namespace {

class LoopIdiomRecognize {
Expand Down Expand Up @@ -322,10 +327,15 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;

HasMemset = TLI->has(LibFunc_memset);
// TODO: Unconditionally enable use of the memset pattern intrinsic (or at
// least, opt-in via target hook) once we are confident it will never result
// in worse codegen than without. For now, use it only when the target
// supports memset_pattern16 libcall (or unless this is overridden by
// command line option).
HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
HasMemcpy = TLI->has(LibFunc_memcpy);

if (HasMemset || HasMemsetPattern || HasMemcpy)
if (HasMemset || HasMemsetPattern || ForceMemsetPatternIntrinsic || HasMemcpy)
if (SE->hasLoopInvariantBackedgeTakenCount(L))
return runOnCountableLoop();

Expand Down Expand Up @@ -411,14 +421,12 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
if (Size > 16)
return nullptr;

// If the constant is exactly 16 bytes, just use it.
if (Size == 16)
return C;
// For now, don't handle types that aren't int, floats, or pointers.
if (!isa<ConstantInt>(C) && !isa<ConstantFP>(C) &&
!isa<PointerType>(C->getType()))
return nullptr;

// Otherwise, we'll use an array of the constants.
unsigned ArraySize = 16 / Size;
ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
return C;
}

LoopIdiomRecognize::LegalStoreKind
Expand Down Expand Up @@ -482,8 +490,9 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
// It looks like we can use SplatValue.
return LegalStoreKind::Memset;
}
if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset &&
// Don't create memset_pattern16s with address spaces.
if (!UnorderedAtomic && (HasMemsetPattern || ForceMemsetPatternIntrinsic) &&
!DisableLIRP::Memset &&
// Don't create memset.pattern intrinsic calls with address spaces.
StorePtr->getType()->getPointerAddressSpace() == 0 &&
getMemSetPatternValue(StoredVal, DL)) {
// It looks like we can use PatternValue!
Expand Down Expand Up @@ -1083,53 +1092,92 @@ bool LoopIdiomRecognize::processLoopStridedStore(
return Changed;

// Okay, everything looks good, insert the memset.
// MemsetArg is the number of bytes for the memset libcall, and the number
// of pattern repetitions if the memset.pattern intrinsic is being used.
Value *MemsetArg;
std::optional<int64_t> BytesWritten;

if (PatternValue && (HasMemsetPattern || ForceMemsetPatternIntrinsic)) {
const SCEV *TripCountS =
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a bunch of complexity in this section of the change that I'm not getting. Since we need the NumBytes (in the original code) for the AAInfo, why not just leave that alone? Isn't count (for memset.pattern intrinsic) just the trip count of the loop by definition?

SE->getTripCountFromExitCount(BECount, IntIdxTy, CurLoop);
if (!Expander.isSafeToExpand(TripCountS))
return Changed;
const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
if (!ConstStoreSize)
return Changed;
Value *TripCount = Expander.expandCodeFor(TripCountS, IntIdxTy,
Preheader->getTerminator());
uint64_t PatternRepsPerTrip =
(ConstStoreSize->getValue()->getZExtValue() * 8) /
DL->getTypeSizeInBits(PatternValue->getType());
// If ConstStoreSize is not equal to the width of PatternValue, then
// MemsetArg is TripCount * (ConstStoreSize/PatternValueWidth). Else
// MemSetArg is just TripCount.
MemsetArg =
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this just weird way of compute NumBytes?

PatternRepsPerTrip == 1
? TripCount
: Builder.CreateMul(TripCount,
Builder.getIntN(IntIdxTy->getIntegerBitWidth(),
PatternRepsPerTrip));
if (auto *CI = dyn_cast<ConstantInt>(TripCount))
BytesWritten =
CI->getZExtValue() * ConstStoreSize->getValue()->getZExtValue();
} else {
const SCEV *NumBytesS =
getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);

const SCEV *NumBytesS =
getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);

// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
if (!Expander.isSafeToExpand(NumBytesS))
return Changed;

Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
if (!Expander.isSafeToExpand(NumBytesS))
return Changed;
MemsetArg =
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
if (auto *CI = dyn_cast<ConstantInt>(MemsetArg))
BytesWritten = CI->getZExtValue();
}
assert(MemsetArg && "MemsetArg should have been set");

if (!SplatValue && !isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16))
if (!SplatValue && !(ForceMemsetPatternIntrinsic ||
isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)))
return Changed;

AAMDNodes AATags = TheStore->getAAMetadata();
for (Instruction *Store : Stores)
AATags = AATags.merge(Store->getAAMetadata());
if (auto CI = dyn_cast<ConstantInt>(NumBytes))
AATags = AATags.extendTo(CI->getZExtValue());
if (BytesWritten)
AATags = AATags.extendTo(BytesWritten.value());
else
AATags = AATags.extendTo(-1);

CallInst *NewCall;
if (SplatValue) {
NewCall = Builder.CreateMemSet(
BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment),
BasePtr, SplatValue, MemsetArg, MaybeAlign(StoreAlignment),
/*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
} else {
assert (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16));
// Everything is emitted in default address space
Type *Int8PtrTy = DestInt8PtrTy;

StringRef FuncName = "memset_pattern16";
FunctionCallee MSP = getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16,
Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy);
inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI);

// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
GlobalValue::PrivateLinkage,
PatternValue, ".memset_pattern");
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
GV->setAlignment(Align(16));
Value *PatternPtr = GV;
NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
assert(ForceMemsetPatternIntrinsic ||
isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16));
assert(isa<SCEVConstant>(StoreSizeSCEV) && "Expected constant store size");

Value *PatternArg;
IntegerType *PatternArgTy =
Builder.getIntNTy(DL->getTypeSizeInBits(PatternValue->getType()));

if (isa<ConstantInt>(PatternValue))
PatternArg = PatternValue;
else if (isa<ConstantFP>(PatternValue))
PatternArg = Builder.CreateBitCast(PatternValue, PatternArgTy);
else if (isa<PointerType>(PatternValue->getType()))
PatternArg = Builder.CreatePtrToInt(PatternValue, PatternArgTy);
else
report_fatal_error("Unexpected PatternValue type");

NewCall = Builder.CreateIntrinsic(Intrinsic::experimental_memset_pattern,
{DestInt8PtrTy, PatternArgTy, IntIdxTy},
{BasePtr, PatternArg, MemsetArg,
ConstantInt::getFalse(M->getContext())});
if (StoreAlignment)
cast<MemSetPatternInst>(NewCall)->setDestAlignment(*StoreAlignment);

// Set the TBAA info if present.
if (AATags.TBAA)
Expand Down Expand Up @@ -1419,7 +1467,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
AAMDNodes AATags = TheLoad->getAAMetadata();
AAMDNodes StoreAATags = TheStore->getAAMetadata();
AATags = AATags.merge(StoreAATags);
if (auto CI = dyn_cast<ConstantInt>(NumBytes))
if (auto *CI = dyn_cast<ConstantInt>(NumBytes))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

precommit this please

AATags = AATags.extendTo(CI->getZExtValue());
else
AATags = AATags.extendTo(-1);
Expand Down
49 changes: 49 additions & 0 deletions llvm/test/Transforms/LoopIdiom/RISCV/memset-pattern.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -passes=loop-idiom -mtriple=riscv64 < %s -S | FileCheck %s
; RUN: opt -passes=loop-idiom -mtriple=riscv64 -loop-idiom-force-memset-pattern-intrinsic < %s -S \
; RUN: | FileCheck -check-prefix=CHECK-INTRIN %s

define dso_local void @double_memset(ptr nocapture %p) {
; CHECK-LABEL: @double_memset(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P:%.*]], i64 [[I_07]]
; CHECK-NEXT: store double 3.141590e+00, ptr [[PTR1]], align 1
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
;
; CHECK-INTRIN-LABEL: @double_memset(
; CHECK-INTRIN-NEXT: entry:
; CHECK-INTRIN-NEXT: call void @llvm.experimental.memset.pattern.p0.i64.i64(ptr align 1 [[P:%.*]], i64 4614256650576692846, i64 16, i1 false)
; CHECK-INTRIN-NEXT: br label [[FOR_BODY:%.*]]
; CHECK-INTRIN: for.cond.cleanup:
; CHECK-INTRIN-NEXT: ret void
; CHECK-INTRIN: for.body:
; CHECK-INTRIN-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-INTRIN-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[I_07]]
; CHECK-INTRIN-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1
; CHECK-INTRIN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16
; CHECK-INTRIN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
;
entry:
br label %for.body

for.cond.cleanup:
ret void

for.body:
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%ptr1 = getelementptr inbounds double, ptr %p, i64 %i.07
store double 3.14159e+00, ptr %ptr1, align 1
%inc = add nuw nsw i64 %i.07, 1
%exitcond.not = icmp eq i64 %inc, 16
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
;.
; CHECK-INTRIN: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
;.
6 changes: 2 additions & 4 deletions llvm/test/Transforms/LoopIdiom/basic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ target triple = "x86_64-apple-darwin10.0.0"
;.
; CHECK: @G = global i32 5
; CHECK: @g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 1, i32 1, i32 1, i32 1], align 16
; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x ptr] [ptr @G, ptr @G], align 16
;.
define void @test1(ptr %Base, i64 %Size) nounwind ssp {
; CHECK-LABEL: @test1(
Expand Down Expand Up @@ -533,7 +531,7 @@ for.end13: ; preds = %for.inc10
define void @test11_pattern(ptr nocapture %P) nounwind ssp {
; CHECK-LABEL: @test11_pattern(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern, i64 40000)
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[P:%.*]], i32 1, i64 10000, i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
Expand Down Expand Up @@ -596,7 +594,7 @@ for.end: ; preds = %for.body
define void @test13_pattern(ptr nocapture %P) nounwind ssp {
; CHECK-LABEL: @test13_pattern(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.1, i64 80000)
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i64.i64(ptr align 4 [[P:%.*]], i64 ptrtoint (ptr @G to i64), i64 10000, i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
Expand Down
16 changes: 5 additions & 11 deletions llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-darwin10.0.0"


;.
; CHECK: @.memset_pattern = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16
; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16
; CHECK: @.memset_pattern.2 = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16
;.
define dso_local void @double_memset(ptr nocapture %p) {
; CHECK-LABEL: @double_memset(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern, i64 128), !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i64.i64(ptr align 1 [[P:%.*]], i64 4614256650576692846, i64 16, i1 false), !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
Expand Down Expand Up @@ -44,7 +39,7 @@ for.body:
define dso_local void @struct_memset(ptr nocapture %p) {
; CHECK-LABEL: @struct_memset(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.1, i64 128), !tbaa [[TBAA4:![0-9]+]]
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i64.i64(ptr align 1 [[P:%.*]], i64 4614256650576692846, i64 16, i1 false), !tbaa [[TBAA4:![0-9]+]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
Expand Down Expand Up @@ -73,16 +68,15 @@ for.body:
define dso_local void @var_memset(ptr nocapture %p, i64 %len) {
; CHECK-LABEL: @var_memset(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[LEN:%.*]], 3
; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.2, i64 [[TMP0]])
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i64.i64(ptr align 1 [[P:%.*]], i64 4614256650576692846, i64 [[TMP0:%.*]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[I_07]]
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[LEN]]
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
;
entry:
Expand Down Expand Up @@ -116,7 +110,7 @@ for.body:
!21 = !{!22, !20, i64 0}
!22 = !{!"B", !20, i64 0}
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
;.
; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]], i64 0}
Expand Down
18 changes: 7 additions & 11 deletions llvm/test/Transforms/LoopIdiom/struct_pattern.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,15 @@ target triple = "x86_64-apple-darwin10.0.0"
;}


;.
; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
; CHECK: @.memset_pattern.1 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
; CHECK: @.memset_pattern.2 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
;.
define void @bar1(ptr %f, i32 %n) nounwind ssp {
; CHECK-LABEL: @bar1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-NEXT: call void @memset_pattern16(ptr [[F:%.*]], ptr @.memset_pattern, i64 [[TMP1]])
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[F:%.*]], i32 2, i64 [[TMP1]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
Expand Down Expand Up @@ -82,8 +77,8 @@ define void @bar2(ptr %f, i32 %n) nounwind ssp {
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-NEXT: call void @memset_pattern16(ptr [[F:%.*]], ptr @.memset_pattern.1, i64 [[TMP1]])
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[F:%.*]], i32 2, i64 [[TMP1]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
Expand Down Expand Up @@ -142,7 +137,8 @@ define void @bar3(ptr nocapture %f, i32 %n) nounwind ssp {
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 3
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP1]], [[TMP4]]
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[F:%.*]], i64 [[TMP5]]
; CHECK-NEXT: call void @memset_pattern16(ptr [[UGLYGEP]], ptr @.memset_pattern.2, i64 [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP0]], 2
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[UGLYGEP]], i32 2, i64 [[TMP7]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
Expand Down Expand Up @@ -292,5 +288,5 @@ for.end: ; preds = %for.end.loopexit, %
}
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
;.
Loading
Loading