Skip to content

Commit d9b53d5

Browse files
committed
[llvm][CodeGen] Some bugfix for window scheduler
1. Fixed max cycle calculation for zero-cost instructions. 2. Added a new restriction for II by pragma; the window scheduler will not support loops if the Swing Scheduler's II pragma is set. 3. Fixed a bug in stall cycle calculation. This bugfix will not affecting the window scheduler's result. 4. Added missing initialization failure information.
1 parent ad15428 commit d9b53d5

File tree

5 files changed

+158
-13
lines changed

5 files changed

+158
-13
lines changed

llvm/lib/CodeGen/MachinePipeliner.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -528,8 +528,12 @@ bool MachinePipeliner::useSwingModuloScheduler() {
528528
}
529529

530530
bool MachinePipeliner::useWindowScheduler(bool Changed) {
531-
// WindowScheduler does not work when it is off or when SwingModuloScheduler
532-
// is successfully scheduled.
531+
// WindowScheduler does not work for following cases:
532+
// 1. when it is off.
533+
// 2. when SwingModuloScheduler is successfully scheduled.
534+
// 3. when pragma II is enabled.
535+
if (II_setByPragma)
536+
return false;
533537
return WindowSchedulingOption == WindowSchedulingFlag::WS_Force ||
534538
(WindowSchedulingOption == WindowSchedulingFlag::WS_On && !Changed);
535539
}

llvm/lib/CodeGen/WindowScheduler.cpp

+19-11
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,11 @@ bool WindowScheduler::initialize() {
232232
return false;
233233
}
234234
for (auto &Def : MI.all_defs())
235-
if (Def.isReg() && Def.getReg().isPhysical())
235+
if (Def.isReg() && Def.getReg().isPhysical()) {
236+
LLVM_DEBUG(dbgs() << "Physical registers are not supported in "
237+
"window scheduling!\n");
236238
return false;
239+
}
237240
}
238241
if (SchedInstrNum <= WindowRegionLimit) {
239242
LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n");
@@ -437,14 +440,17 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
437440
int PredCycle = getOriCycle(PredMI);
438441
ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency());
439442
}
440-
// ResourceManager can be used to detect resource conflicts between the
441-
// current MI and the previously inserted MIs.
442-
while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
443-
++CurCycle;
444-
if (CurCycle == (int)WindowIILimit)
445-
return CurCycle;
443+
// Zero cost instructions do not need to check resource.
444+
if (!TII->isZeroCost(MI.getOpcode())) {
445+
// ResourceManager can be used to detect resource conflicts between the
446+
// current MI and the previously inserted MIs.
447+
while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
448+
++CurCycle;
449+
if (CurCycle == (int)WindowIILimit)
450+
return CurCycle;
451+
}
452+
RM.reserveResources(*SU, CurCycle);
446453
}
447-
RM.reserveResources(*SU, CurCycle);
448454
OriToCycle[getOriMI(&MI)] = CurCycle;
449455
LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S."
450456
<< getOriStage(getOriMI(&MI), Offset) << "]: " << MI);
@@ -485,15 +491,17 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
485491
// ========================================
486492
int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
487493
int MaxStallCycle = 0;
494+
int CurrentII = MaxCycle + 1;
488495
auto Range = getScheduleRange(Offset, SchedInstrNum);
489496
for (auto &MI : Range) {
490497
auto *SU = TripleDAG->getSUnit(&MI);
491498
int DefCycle = getOriCycle(&MI);
492499
for (auto &Succ : SU->Succs) {
493500
if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
494501
continue;
495-
// If the expected cycle does not exceed MaxCycle, no check is needed.
496-
if (DefCycle + (int)Succ.getLatency() <= MaxCycle)
502+
// If the expected cycle does not exceed loop initiation interval, no
503+
// check is needed.
504+
if (DefCycle + (int)Succ.getLatency() <= CurrentII)
497505
continue;
498506
// If the cycle of the scheduled MI A is less than that of the scheduled
499507
// MI B, the scheduling will fail because the lifetime of the
@@ -503,7 +511,7 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
503511
if (DefCycle < UseCycle)
504512
return WindowIILimit;
505513
// Get the stall cycle introduced by the register between two trips.
506-
int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle;
514+
int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle;
507515
MaxStallCycle = std::max(MaxStallCycle, StallCycle);
508516
}
509517
}

llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
44
# RUN: | FileCheck %s
55

6+
# CHECK: Physical registers are not supported in window scheduling!
67
# CHECK: The WindowScheduler failed to initialize!
78

89
---
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
2+
# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
3+
# RUN: | FileCheck %s
4+
# REQUIRES: asserts
5+
6+
# Test that checks no window scheduler is performed if the II set by pragma was
7+
# enabled
8+
9+
# CHECK-NOT: Start analyzing II
10+
# CHECK-NOT: Start scheduling Phis
11+
# CHECK-NOT: Current window Offset is {{[0-9]+}} and II is {{[0-9]+}}
12+
13+
--- |
14+
define void @test_pragma_ii_fail(ptr %a0, i32 %a1) {
15+
b0:
16+
%v0 = icmp sgt i32 %a1, 1
17+
br i1 %v0, label %b1, label %b4
18+
19+
b1: ; preds = %b0
20+
%v1 = load i32, ptr %a0, align 4
21+
%v2 = add i32 %v1, 10
22+
%v4 = add i32 %a1, -1
23+
%cgep = getelementptr i32, ptr %a0, i32 1
24+
br label %b2
25+
26+
b2: ; preds = %b2, %b1
27+
%v5 = phi i32 [ %v12, %b2 ], [ %v4, %b1 ]
28+
%v6 = phi ptr [ %cgep2, %b2 ], [ %cgep, %b1 ]
29+
%v7 = phi i32 [ %v10, %b2 ], [ %v2, %b1 ]
30+
store i32 %v7, ptr %v6, align 4
31+
%v8 = add i32 %v7, 10
32+
%cgep1 = getelementptr i32, ptr %v6, i32 -1
33+
store i32 %v8, ptr %cgep1, align 4
34+
%v10 = add i32 %v7, 10
35+
%v12 = add i32 %v5, -1
36+
%v13 = icmp eq i32 %v12, 0
37+
%cgep2 = getelementptr i32, ptr %v6, i32 1
38+
br i1 %v13, label %b4, label %b2, !llvm.loop !0
39+
40+
b4: ; preds = %b2, %b0
41+
ret void
42+
}
43+
44+
!0 = distinct !{!0, !1}
45+
!1 = !{!"llvm.loop.pipeline.initiationinterval", i32 2}
46+
...
47+
---
48+
name: test_pragma_ii_fail
49+
tracksRegLiveness: true
50+
body: |
51+
bb.0.b0:
52+
successors: %bb.1(0x40000000), %bb.3(0x40000000)
53+
liveins: $r0, $r1
54+
55+
%10:intregs = COPY $r1
56+
%9:intregs = COPY $r0
57+
%11:predregs = C2_cmpgti %10, 1
58+
J2_jumpf %11, %bb.3, implicit-def dead $pc
59+
J2_jump %bb.1, implicit-def dead $pc
60+
61+
bb.1.b1:
62+
successors: %bb.2(0x80000000)
63+
64+
%13:intregs, %2:intregs = L2_loadri_pi %9, 4
65+
%0:intregs = A2_addi killed %13, 10
66+
%1:intregs = A2_addi %10, -1
67+
%16:intregs = COPY %1
68+
J2_loop0r %bb.2, %16, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
69+
70+
bb.2.b2 (machine-block-address-taken):
71+
successors: %bb.3(0x04000000), %bb.2(0x7c000000)
72+
73+
%4:intregs = PHI %2, %bb.1, %8, %bb.2
74+
%5:intregs = PHI %0, %bb.1, %6, %bb.2
75+
S2_storeri_io %4, 0, %5
76+
%6:intregs = A2_addi %5, 10
77+
S2_storeri_io %4, -4, %6
78+
%8:intregs = A2_addi %4, 4
79+
ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
80+
J2_jump %bb.3, implicit-def dead $pc
81+
82+
bb.3.b4:
83+
PS_jmpret $r31, implicit-def dead $pc
84+
85+
...
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# REQUIRES: asserts
2+
# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
3+
# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
4+
# RUN: | FileCheck %s
5+
6+
# CHECK-NOT: Can't find a valid II. Keep searching...
7+
8+
---
9+
name: relu
10+
tracksRegLiveness: true
11+
body: |
12+
bb.0:
13+
successors: %bb.2(0x30000000), %bb.1(0x50000000)
14+
liveins: $r0, $r1, $r2
15+
16+
%0:intregs = COPY $r2
17+
%1:intregs = COPY $r1
18+
%2:intregs = COPY $r0
19+
%3:predregs = C2_cmpeqi %2, 0
20+
J2_jumpt killed %3, %bb.2, implicit-def dead $pc
21+
J2_jump %bb.1, implicit-def dead $pc
22+
23+
bb.1:
24+
successors: %bb.3(0x80000000)
25+
26+
%4:hvxvr = V6_vd0
27+
%5:intregs = A2_addi %2, 31
28+
%6:intregs = S2_lsr_i_r %5, 5
29+
%7:intregs = COPY %6
30+
J2_loop0r %bb.3, %7, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
31+
J2_jump %bb.3, implicit-def dead $pc
32+
33+
bb.2:
34+
PS_jmpret $r31, implicit-def dead $pc
35+
36+
bb.3 (machine-block-address-taken):
37+
successors: %bb.3(0x7c000000), %bb.2(0x04000000)
38+
39+
%8:intregs = PHI %1, %bb.1, %9, %bb.3
40+
%10:intregs = PHI %0, %bb.1, %14, %bb.3
41+
%11:hvxvr, %9:intregs = V6_vL32b_pi %8, 128
42+
%12:intregs = COPY %10
43+
%13:hvxvr = V6_vmaxw killed %11, %4
44+
%14:intregs = V6_vS32b_pi %12, 128, killed %13
45+
ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
46+
J2_jump %bb.2, implicit-def dead $pc
47+
...

0 commit comments

Comments
 (0)