-
Notifications
You must be signed in to change notification settings - Fork 15.5k
Open
Labels
Description
godbolt: https://godbolt.org/z/4nxz9K4re
Unroll-and-jam is applied to the following LLVM IR:
define void @f(ptr noalias %A, ptr noalias %B) {
entry:
br label %loop.i.header
loop.i.header:
%i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ]
%i.4 = mul i64 %i, 4
br label %loop.j
loop.j:
%j = phi i64 [ 0, %loop.i.header ], [ %j.inc, %loop.j ]
%gep.B = getelementptr [16 x i8], ptr %B, i64 %i, i64 %j
%val = load i8, ptr %gep.B
%offset.A = add i64 %i.4, %j
%gep.A = getelementptr i8, ptr %A, i64 %offset.A
store i8 %val, ptr %gep.A
%j.inc = add i64 %j, 1
%ec.j = icmp eq i64 %j.inc, 16
br i1 %ec.j, label %loop.i.latch, label %loop.j
loop.i.latch:
%i.inc = add i64 %i, 1
%ec.i = icmp eq i64 %i.inc, 32
br i1 %ec.i, label %exit, label %loop.i.header
exit:
ret void
}This transformation breaks the original semantics. Consider the following pseudo code. For example, the value stored in A[12] will be from B[3][0] in the original loop, but from B[0][12] in the transformed one.
A[256];
B[32][16];
// Original code
for (i = 0; i < 32; i++)
for (j = 0; j < 16; j++)
A[4*i + j] = B[i][j];
// Unroll-and-jammed (count = 4)
for (i = 0; i < 8; i++)
for (j = 0; j < 16; j++) {
A[4*(i*4+0) + j] = B[i*4+0][j];
A[4*(i*4+1) + j] = B[i*4+1][j];
A[4*(i*4+2) + j] = B[i*4+2][j];
A[4*(i*4+3) + j] = B[i*4+3][j];
}Apparently the legality check ignores loop-carried dependency between the same instruction.
llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
Lines 691 to 698 in 64e3bcd
| static bool checkDependency(Instruction *Src, Instruction *Dst, | |
| unsigned UnrollLevel, unsigned JamLevel, | |
| bool Sequentialized, DependenceInfo &DI) { | |
| assert(UnrollLevel <= JamLevel && | |
| "Expecting JamLevel to be at least UnrollLevel"); | |
| if (Src == Dst) | |
| return true; |