Skip to content

Commit 19e6d54

Browse files
committed
[LV] Re-use existing compare if possible for diff checks.
SCEV simplifying the subtraction may result in redundant compares that are all OR'd together. Keep track of the generated operands in SeenCompares, with the key being the pair of operands for the compare. If we alrady generated the same compare previously, skip it.
1 parent 0d9f557 commit 19e6d54

File tree

2 files changed

+14
-85
lines changed

2 files changed

+14
-85
lines changed

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,17 +1787,28 @@ Value *llvm::addDiffRuntimeChecks(
17871787
// Our instructions might fold to a constant.
17881788
Value *MemoryRuntimeCheck = nullptr;
17891789

1790+
auto &SE = *Expander.getSE();
1791+
// Map to keep track of created compares, The key is the pair of operands for
1792+
// the compare, to allow detecting and re-using redundant compares.
1793+
DenseMap<std::pair<Value *, Value *>, Value *> SeenCompares;
17901794
for (const auto &C : Checks) {
17911795
Type *Ty = C.SinkStart->getType();
17921796
// Compute VF * IC * AccessSize.
17931797
auto *VFTimesUFTimesSize =
17941798
ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
17951799
ConstantInt::get(Ty, IC * C.AccessSize));
1796-
auto &SE = *Expander.getSE();
17971800
Value *Diff = Expander.expandCodeFor(
17981801
SE.getMinusSCEV(C.SinkStart, C.SrcStart), Ty, Loc);
1799-
Value *IsConflict =
1802+
1803+
// Check if the same compare has already been created earlier. In that case,
1804+
// there is no need to check it again.
1805+
Value *IsConflict = SeenCompares.lookup({Diff, VFTimesUFTimesSize});
1806+
if (IsConflict)
1807+
continue;
1808+
1809+
IsConflict =
18001810
ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
1811+
SeenCompares.insert({{Diff, VFTimesUFTimesSize}, IsConflict});
18011812
if (C.NeedsFreeze)
18021813
IsConflict =
18031814
ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr");

llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll

Lines changed: 1 addition & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -51,89 +51,7 @@ define void @test_large_number_of_group(ptr %dst, i64 %off, i64 %N) {
5151
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[OFF]], 88
5252
; CHECK-NEXT: [[DIFF_CHECK18:%.*]] = icmp ult i64 [[TMP9]], 32
5353
; CHECK-NEXT: [[CONFLICT_RDX19:%.*]] = or i1 [[CONFLICT_RDX17]], [[DIFF_CHECK18]]
54-
; CHECK-NEXT: [[DIFF_CHECK20:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
55-
; CHECK-NEXT: [[CONFLICT_RDX21:%.*]] = or i1 [[CONFLICT_RDX19]], [[DIFF_CHECK20]]
56-
; CHECK-NEXT: [[DIFF_CHECK22:%.*]] = icmp ult i64 [[TMP0]], 32
57-
; CHECK-NEXT: [[CONFLICT_RDX23:%.*]] = or i1 [[CONFLICT_RDX21]], [[DIFF_CHECK22]]
58-
; CHECK-NEXT: [[DIFF_CHECK24:%.*]] = icmp ult i64 [[TMP1]], 32
59-
; CHECK-NEXT: [[CONFLICT_RDX25:%.*]] = or i1 [[CONFLICT_RDX23]], [[DIFF_CHECK24]]
60-
; CHECK-NEXT: [[DIFF_CHECK26:%.*]] = icmp ult i64 [[TMP2]], 32
61-
; CHECK-NEXT: [[CONFLICT_RDX27:%.*]] = or i1 [[CONFLICT_RDX25]], [[DIFF_CHECK26]]
62-
; CHECK-NEXT: [[DIFF_CHECK28:%.*]] = icmp ult i64 [[TMP3]], 32
63-
; CHECK-NEXT: [[CONFLICT_RDX29:%.*]] = or i1 [[CONFLICT_RDX27]], [[DIFF_CHECK28]]
64-
; CHECK-NEXT: [[DIFF_CHECK30:%.*]] = icmp ult i64 [[TMP4]], 32
65-
; CHECK-NEXT: [[CONFLICT_RDX31:%.*]] = or i1 [[CONFLICT_RDX29]], [[DIFF_CHECK30]]
66-
; CHECK-NEXT: [[DIFF_CHECK32:%.*]] = icmp ult i64 [[TMP5]], 32
67-
; CHECK-NEXT: [[CONFLICT_RDX33:%.*]] = or i1 [[CONFLICT_RDX31]], [[DIFF_CHECK32]]
68-
; CHECK-NEXT: [[DIFF_CHECK34:%.*]] = icmp ult i64 [[TMP6]], 32
69-
; CHECK-NEXT: [[CONFLICT_RDX35:%.*]] = or i1 [[CONFLICT_RDX33]], [[DIFF_CHECK34]]
70-
; CHECK-NEXT: [[DIFF_CHECK36:%.*]] = icmp ult i64 [[TMP7]], 32
71-
; CHECK-NEXT: [[CONFLICT_RDX37:%.*]] = or i1 [[CONFLICT_RDX35]], [[DIFF_CHECK36]]
72-
; CHECK-NEXT: [[DIFF_CHECK38:%.*]] = icmp ult i64 [[TMP8]], 32
73-
; CHECK-NEXT: [[CONFLICT_RDX39:%.*]] = or i1 [[CONFLICT_RDX37]], [[DIFF_CHECK38]]
74-
; CHECK-NEXT: [[DIFF_CHECK40:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
75-
; CHECK-NEXT: [[CONFLICT_RDX41:%.*]] = or i1 [[CONFLICT_RDX39]], [[DIFF_CHECK40]]
76-
; CHECK-NEXT: [[DIFF_CHECK42:%.*]] = icmp ult i64 [[TMP0]], 32
77-
; CHECK-NEXT: [[CONFLICT_RDX43:%.*]] = or i1 [[CONFLICT_RDX41]], [[DIFF_CHECK42]]
78-
; CHECK-NEXT: [[DIFF_CHECK44:%.*]] = icmp ult i64 [[TMP1]], 32
79-
; CHECK-NEXT: [[CONFLICT_RDX45:%.*]] = or i1 [[CONFLICT_RDX43]], [[DIFF_CHECK44]]
80-
; CHECK-NEXT: [[DIFF_CHECK46:%.*]] = icmp ult i64 [[TMP2]], 32
81-
; CHECK-NEXT: [[CONFLICT_RDX47:%.*]] = or i1 [[CONFLICT_RDX45]], [[DIFF_CHECK46]]
82-
; CHECK-NEXT: [[DIFF_CHECK48:%.*]] = icmp ult i64 [[TMP3]], 32
83-
; CHECK-NEXT: [[CONFLICT_RDX49:%.*]] = or i1 [[CONFLICT_RDX47]], [[DIFF_CHECK48]]
84-
; CHECK-NEXT: [[DIFF_CHECK50:%.*]] = icmp ult i64 [[TMP4]], 32
85-
; CHECK-NEXT: [[CONFLICT_RDX51:%.*]] = or i1 [[CONFLICT_RDX49]], [[DIFF_CHECK50]]
86-
; CHECK-NEXT: [[DIFF_CHECK52:%.*]] = icmp ult i64 [[TMP5]], 32
87-
; CHECK-NEXT: [[CONFLICT_RDX53:%.*]] = or i1 [[CONFLICT_RDX51]], [[DIFF_CHECK52]]
88-
; CHECK-NEXT: [[DIFF_CHECK54:%.*]] = icmp ult i64 [[TMP6]], 32
89-
; CHECK-NEXT: [[CONFLICT_RDX55:%.*]] = or i1 [[CONFLICT_RDX53]], [[DIFF_CHECK54]]
90-
; CHECK-NEXT: [[DIFF_CHECK56:%.*]] = icmp ult i64 [[TMP7]], 32
91-
; CHECK-NEXT: [[CONFLICT_RDX57:%.*]] = or i1 [[CONFLICT_RDX55]], [[DIFF_CHECK56]]
92-
; CHECK-NEXT: [[DIFF_CHECK58:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
93-
; CHECK-NEXT: [[CONFLICT_RDX59:%.*]] = or i1 [[CONFLICT_RDX57]], [[DIFF_CHECK58]]
94-
; CHECK-NEXT: [[DIFF_CHECK60:%.*]] = icmp ult i64 [[TMP0]], 32
95-
; CHECK-NEXT: [[CONFLICT_RDX61:%.*]] = or i1 [[CONFLICT_RDX59]], [[DIFF_CHECK60]]
96-
; CHECK-NEXT: [[DIFF_CHECK62:%.*]] = icmp ult i64 [[TMP1]], 32
97-
; CHECK-NEXT: [[CONFLICT_RDX63:%.*]] = or i1 [[CONFLICT_RDX61]], [[DIFF_CHECK62]]
98-
; CHECK-NEXT: [[DIFF_CHECK64:%.*]] = icmp ult i64 [[TMP2]], 32
99-
; CHECK-NEXT: [[CONFLICT_RDX65:%.*]] = or i1 [[CONFLICT_RDX63]], [[DIFF_CHECK64]]
100-
; CHECK-NEXT: [[DIFF_CHECK66:%.*]] = icmp ult i64 [[TMP3]], 32
101-
; CHECK-NEXT: [[CONFLICT_RDX67:%.*]] = or i1 [[CONFLICT_RDX65]], [[DIFF_CHECK66]]
102-
; CHECK-NEXT: [[DIFF_CHECK68:%.*]] = icmp ult i64 [[TMP4]], 32
103-
; CHECK-NEXT: [[CONFLICT_RDX69:%.*]] = or i1 [[CONFLICT_RDX67]], [[DIFF_CHECK68]]
104-
; CHECK-NEXT: [[DIFF_CHECK70:%.*]] = icmp ult i64 [[TMP5]], 32
105-
; CHECK-NEXT: [[CONFLICT_RDX71:%.*]] = or i1 [[CONFLICT_RDX69]], [[DIFF_CHECK70]]
106-
; CHECK-NEXT: [[DIFF_CHECK72:%.*]] = icmp ult i64 [[TMP6]], 32
107-
; CHECK-NEXT: [[CONFLICT_RDX73:%.*]] = or i1 [[CONFLICT_RDX71]], [[DIFF_CHECK72]]
108-
; CHECK-NEXT: [[DIFF_CHECK74:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
109-
; CHECK-NEXT: [[DIFF_CHECK75:%.*]] = icmp ult i64 [[TMP0]], 32
110-
; CHECK-NEXT: [[DIFF_CHECK76:%.*]] = icmp ult i64 [[TMP1]], 32
111-
; CHECK-NEXT: [[DIFF_CHECK77:%.*]] = icmp ult i64 [[TMP2]], 32
112-
; CHECK-NEXT: [[DIFF_CHECK78:%.*]] = icmp ult i64 [[TMP3]], 32
113-
; CHECK-NEXT: [[DIFF_CHECK79:%.*]] = icmp ult i64 [[TMP4]], 32
114-
; CHECK-NEXT: [[DIFF_CHECK80:%.*]] = icmp ult i64 [[TMP5]], 32
115-
; CHECK-NEXT: [[DIFF_CHECK81:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
116-
; CHECK-NEXT: [[DIFF_CHECK82:%.*]] = icmp ult i64 [[TMP0]], 32
117-
; CHECK-NEXT: [[DIFF_CHECK83:%.*]] = icmp ult i64 [[TMP1]], 32
118-
; CHECK-NEXT: [[DIFF_CHECK84:%.*]] = icmp ult i64 [[TMP2]], 32
119-
; CHECK-NEXT: [[DIFF_CHECK85:%.*]] = icmp ult i64 [[TMP3]], 32
120-
; CHECK-NEXT: [[DIFF_CHECK86:%.*]] = icmp ult i64 [[TMP4]], 32
121-
; CHECK-NEXT: [[DIFF_CHECK87:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
122-
; CHECK-NEXT: [[DIFF_CHECK88:%.*]] = icmp ult i64 [[TMP0]], 32
123-
; CHECK-NEXT: [[DIFF_CHECK89:%.*]] = icmp ult i64 [[TMP1]], 32
124-
; CHECK-NEXT: [[DIFF_CHECK90:%.*]] = icmp ult i64 [[TMP2]], 32
125-
; CHECK-NEXT: [[DIFF_CHECK91:%.*]] = icmp ult i64 [[TMP3]], 32
126-
; CHECK-NEXT: [[DIFF_CHECK92:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
127-
; CHECK-NEXT: [[DIFF_CHECK93:%.*]] = icmp ult i64 [[TMP0]], 32
128-
; CHECK-NEXT: [[DIFF_CHECK94:%.*]] = icmp ult i64 [[TMP1]], 32
129-
; CHECK-NEXT: [[DIFF_CHECK95:%.*]] = icmp ult i64 [[TMP2]], 32
130-
; CHECK-NEXT: [[DIFF_CHECK96:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
131-
; CHECK-NEXT: [[DIFF_CHECK97:%.*]] = icmp ult i64 [[TMP0]], 32
132-
; CHECK-NEXT: [[DIFF_CHECK98:%.*]] = icmp ult i64 [[TMP1]], 32
133-
; CHECK-NEXT: [[DIFF_CHECK99:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
134-
; CHECK-NEXT: [[DIFF_CHECK100:%.*]] = icmp ult i64 [[TMP0]], 32
135-
; CHECK-NEXT: [[DIFF_CHECK101:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
136-
; CHECK-NEXT: br i1 [[CONFLICT_RDX73]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
54+
; CHECK-NEXT: br i1 [[CONFLICT_RDX19]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
13755
; CHECK: vector.ph:
13856
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
13957
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]

0 commit comments

Comments
 (0)