|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
2 |
| -; RUN: opt -passes='loop(indvars),instcombine' -S < %s | FileCheck %s |
| 2 | +; RUN: opt -passes='loop(indvars),instcombine' -replexitval=always -S < %s | FileCheck %s |
3 | 3 |
|
4 | 4 | ;; Test that loop's exit value is rewritten to its initial
|
5 | 5 | ;; value from loop preheader
|
@@ -197,3 +197,106 @@ crit_edge:
|
197 | 197 | ret i16 %conv
|
198 | 198 | }
|
199 | 199 |
|
| 200 | +define i32 @vscale_slt_with_vp_umin(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| 201 | +; CHECK-LABEL: @vscale_slt_with_vp_umin( |
| 202 | +; CHECK-NEXT: entry: |
| 203 | +; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32() |
| 204 | +; CHECK-NEXT: [[VF:%.*]] = shl nuw nsw i32 [[VSCALE]], 2 |
| 205 | +; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 |
| 206 | +; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[EARLY_EXIT:%.*]] |
| 207 | +; CHECK: for.body.preheader: |
| 208 | +; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| 209 | +; CHECK: early.exit: |
| 210 | +; CHECK-NEXT: ret i32 0 |
| 211 | +; CHECK: for.body: |
| 212 | +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] |
| 213 | +; CHECK-NEXT: [[LEFT:%.*]] = sub nsw i32 [[N]], [[I_05]] |
| 214 | +; CHECK-NEXT: [[VF_CAPPED:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[LEFT]]) |
| 215 | +; CHECK-NEXT: store i32 [[VF_CAPPED]], ptr [[A:%.*]], align 4 |
| 216 | +; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_05]], [[VF]] |
| 217 | +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD]], [[N]] |
| 218 | +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] |
| 219 | +; CHECK: for.end: |
| 220 | +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[N]], -1 |
| 221 | +; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0]], [[VF]] |
| 222 | +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[VSCALE]] |
| 223 | +; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 2 |
| 224 | +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[N]], [[TMP3]] |
| 225 | +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[TMP4]]) |
| 226 | +; CHECK-NEXT: ret i32 [[UMIN]] |
| 227 | +; |
| 228 | +entry: |
| 229 | + %vscale = call i32 @llvm.vscale.i32() |
| 230 | + %VF = shl nuw nsw i32 %vscale, 2 |
| 231 | + %cmp4 = icmp sgt i32 %n, 0 |
| 232 | + br i1 %cmp4, label %for.body, label %early.exit |
| 233 | + |
| 234 | +early.exit: |
| 235 | + ret i32 0 |
| 236 | + |
| 237 | +for.body: |
| 238 | + %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| 239 | + %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| 240 | + %left = sub i32 %n, %i.05 |
| 241 | + %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left) |
| 242 | + store i32 %VF.capped, ptr %A |
| 243 | + |
| 244 | + %add = add nsw i32 %i.05, %VF |
| 245 | + %cmp = icmp slt i32 %add, %n |
| 246 | + br i1 %cmp, label %for.body, label %for.end |
| 247 | + |
| 248 | +for.end: |
| 249 | + ret i32 %VF.capped |
| 250 | +} |
| 251 | + |
| 252 | +define i32 @vscale_slt_with_vp_umin2(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| 253 | +; CHECK-LABEL: @vscale_slt_with_vp_umin2( |
| 254 | +; CHECK-NEXT: entry: |
| 255 | +; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32() |
| 256 | +; CHECK-NEXT: [[VF:%.*]] = shl nuw nsw i32 [[VSCALE]], 2 |
| 257 | +; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i32 [[VF]], [[N:%.*]] |
| 258 | +; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[EARLY_EXIT:%.*]] |
| 259 | +; CHECK: for.body.preheader: |
| 260 | +; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| 261 | +; CHECK: early.exit: |
| 262 | +; CHECK-NEXT: ret i32 0 |
| 263 | +; CHECK: for.body: |
| 264 | +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] |
| 265 | +; CHECK-NEXT: [[LEFT:%.*]] = sub i32 [[N]], [[I_05]] |
| 266 | +; CHECK-NEXT: [[VF_CAPPED:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[LEFT]]) |
| 267 | +; CHECK-NEXT: store i32 [[VF_CAPPED]], ptr [[A:%.*]], align 4 |
| 268 | +; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_05]], [[VF]] |
| 269 | +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD]], [[N]] |
| 270 | +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] |
| 271 | +; CHECK: for.end: |
| 272 | +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 |
| 273 | +; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0]], [[VF]] |
| 274 | +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[VSCALE]] |
| 275 | +; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 2 |
| 276 | +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[N]], [[TMP3]] |
| 277 | +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[TMP4]]) |
| 278 | +; CHECK-NEXT: ret i32 [[UMIN]] |
| 279 | +; |
| 280 | +entry: |
| 281 | + %vscale = call i32 @llvm.vscale.i32() |
| 282 | + %VF = shl nuw nsw i32 %vscale, 2 |
| 283 | + %cmp4 = icmp sgt i32 %n, %VF |
| 284 | + br i1 %cmp4, label %for.body, label %early.exit |
| 285 | + |
| 286 | +early.exit: |
| 287 | + ret i32 0 |
| 288 | + |
| 289 | +for.body: |
| 290 | + %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| 291 | + %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| 292 | + %left = sub i32 %n, %i.05 |
| 293 | + %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left) |
| 294 | + store i32 %VF.capped, ptr %A |
| 295 | + |
| 296 | + %add = add nsw i32 %i.05, %VF |
| 297 | + %cmp = icmp slt i32 %add, %n |
| 298 | + br i1 %cmp, label %for.body, label %for.end |
| 299 | + |
| 300 | +for.end: |
| 301 | + ret i32 %VF.capped |
| 302 | +} |
0 commit comments