Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions llvm/lib/CodeGen/MachineOutliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -828,10 +828,12 @@ bool MachineOutliner::outline(Module &M,
<< "\n");
bool OutlinedSomething = false;

// Sort by benefit. The most beneficial functions should be outlined first.
// Sort by priority where priority := getNotOutlinedCost / getOutliningCost.
// The function with highest priority should be outlined first.
stable_sort(FunctionList,
[](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
return LHS.getBenefit() > RHS.getBenefit();
return LHS.getNotOutlinedCost() * RHS.getOutliningCost() >
RHS.getNotOutlinedCost() * LHS.getOutliningCost();
});

// Walk over each function, outlining them as we go along. Functions are
Expand Down
96 changes: 96 additions & 0 deletions llvm/test/CodeGen/AArch64/machine-outliner-sort-per-priority.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
; This tests the order in which functions are outlined in MachineOutliner
; There are TWO key OutlinedFunction in FunctionList
;
; ===================== First One =====================
; ```
; mov w0, #1
; mov w1, #2
; mov w2, #3
; mov w3, #4
; mov w4, #5
; ```
; It has:
; - `SequenceSize=20` and `OccurrenceCount=6`
; - each Candidate has `CallOverhead=12` and `FrameOverhead=4`
; - `NotOutlinedCost=20*6=120` and `OutliningCost=12*6+20+4=96`
; - `Benefit=120-96=24` and `Priority=120/96=1.25`
;
; ===================== Second One =====================
; ```
; mov w6, #6
; mov w7, #7
; b
; ```
; It has:
; - `SequenceSize=12` and `OccurrenceCount=4`
; - each Candidate has `CallOverhead=4` and `FrameOverhead=0`
; - `NotOutlinedCost=12*4=48` and `OutliningCost=4*4+12+0=28`
; - `Benefit=48-28=20` and `Priority=48/28=1.71`
;
; Note that the first one has higher benefit, but lower priority.
; Hence, when outlining per priority, the second one will be outlined first.

; RUN: llc %s -enable-machine-outliner=always -filetype=obj -o %t
; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-SORT-BY-PRIORITY

; RUN: llc %s -enable-machine-outliner=always -outliner-benefit-threshold=22 -filetype=obj -o %t
; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-THRESHOLD


target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-macosx14.0.0"

declare i32 @_Z3fooiiii(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef)

define i32 @_Z2f1v() minsize {
%1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 11, i32 noundef 6, i32 noundef 7)
ret i32 %1
}

define i32 @_Z2f2v() minsize {
%1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 12, i32 noundef 6, i32 noundef 7)
ret i32 %1
}

define i32 @_Z2f3v() minsize {
%1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 13, i32 noundef 6, i32 noundef 7)
ret i32 %1
}

define i32 @_Z2f4v() minsize {
%1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 14, i32 noundef 6, i32 noundef 7)
ret i32 %1
}

define i32 @_Z2f5v() minsize {
%1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 15, i32 noundef 8, i32 noundef 9)
ret i32 %1
}

define i32 @_Z2f6v() minsize {
%1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 16, i32 noundef 9, i32 noundef 8)
ret i32 %1
}

; CHECK-SORT-BY-PRIORITY: <_OUTLINED_FUNCTION_0>:
; CHECK-SORT-BY-PRIORITY-NEXT: mov w6, #0x6
; CHECK-SORT-BY-PRIORITY-NEXT: mov w7, #0x7
; CHECK-SORT-BY-PRIORITY-NEXT: b

; CHECK-SORT-BY-PRIORITY: <_OUTLINED_FUNCTION_1>:
; CHECK-SORT-BY-PRIORITY-NEXT: mov w0, #0x1
; CHECK-SORT-BY-PRIORITY-NEXT: mov w1, #0x2
; CHECK-SORT-BY-PRIORITY-NEXT: mov w2, #0x3
; CHECK-SORT-BY-PRIORITY-NEXT: mov w3, #0x4
; CHECK-SORT-BY-PRIORITY-NEXT: mov w4, #0x5
; CHECK-SORT-BY-PRIORITY-NEXT: ret

; CHECK-THRESHOLD: <_OUTLINED_FUNCTION_0>:
; CHECK-THRESHOLD-NEXT: mov w0, #0x1
; CHECK-THRESHOLD-NEXT: mov w1, #0x2
; CHECK-THRESHOLD-NEXT: mov w2, #0x3
; CHECK-THRESHOLD-NEXT: mov w3, #0x4
; CHECK-THRESHOLD-NEXT: mov w4, #0x5
; CHECK-THRESHOLD-NEXT: ret

; CHECK-THRESHOLD-NOT: <_OUTLINED_FUNCTION_1>:
206 changes: 206 additions & 0 deletions llvm/test/CodeGen/AArch64/machine-outliner-sort-per-priority.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
# The content of this test is modfied upon the output obtained from running
# `bin/llc -O2 -stop-before=machine-outliner <path_to_llvm_project>/llvm/test/CodeGen/AArch64/machine-outliner-sort-per-priority.ll -o -`
# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s

--- |
declare i32 @foo()

define void @f1() #0 { ret void }
define void @f2() #0 { ret void }
define void @f3() #0 { ret void }
define void @f4() #0 { ret void }
define void @f5() #0 { ret void }
define void @f6() #0 { ret void }

attributes #0 = { minsize }
...
---
# CHECK-LABEL: name: f1
# CHECK-LABEL: bb.0:
# CHECK-NEXT: liveins: $lr
# CHECK-NEXT: {{ $}}
# CHECK-NEXT: $x5 = ORRXrs $xzr, $lr, 0
# CHECK-NEXT: BL @OUTLINED_FUNCTION_1, implicit-def $lr, implicit $sp, implicit-def $lr, implicit-def $w0, implicit-def $w1, implicit-def $w2, implicit-def $w3, implicit-def $w4, implicit $sp, implicit $xzr, implicit $x5
# CHECK-NEXT: $lr = ORRXrs $xzr, $x5, 0
# CHECK-NEXT: $w5 = MOVZWi 11, 0
# CHECK-NEXT: TCRETURNdi @OUTLINED_FUNCTION_0, 0, implicit $sp, implicit-def $w7, implicit-def $w6, implicit $sp, implicit $w0, implicit $w1, implicit $w2, implicit $w3, implicit $w4, implicit $w5
name: f1
tracksRegLiveness: true
frameInfo:
isCalleeSavedInfoValid: true
machineFunctionInfo:
hasRedZone: false
body: |
bb.0:
$w0 = MOVZWi 1, 0
$w1 = MOVZWi 2, 0
$w2 = MOVZWi 3, 0
$w3 = MOVZWi 4, 0
$w4 = MOVZWi 5, 0
$w5 = MOVZWi 11, 0
$w6 = MOVZWi 6, 0
$w7 = MOVZWi 7, 0
TCRETURNdi @foo, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7

...
---
# CHECK-LABEL: name: f2
# CHECK-LABEL: bb.0:
# CHECK-NEXT: liveins: $lr
# CHECK-NEXT: {{ $}}
# CHECK-NEXT: $x5 = ORRXrs $xzr, $lr, 0
# CHECK-NEXT: BL @OUTLINED_FUNCTION_1, implicit-def $lr, implicit $sp, implicit-def $lr, implicit-def $w0, implicit-def $w1, implicit-def $w2, implicit-def $w3, implicit-def $w4, implicit $sp, implicit $xzr, implicit $x5
# CHECK-NEXT: $lr = ORRXrs $xzr, $x5, 0
# CHECK-NEXT: $w5 = MOVZWi 12, 0
# CHECK-NEXT: TCRETURNdi @OUTLINED_FUNCTION_0, 0, implicit $sp, implicit-def $w7, implicit-def $w6, implicit $sp, implicit $w0, implicit $w1, implicit $w2, implicit $w3, implicit $w4, implicit $w5
name: f2
tracksRegLiveness: true
frameInfo:
isCalleeSavedInfoValid: true
machineFunctionInfo:
hasRedZone: false
body: |
bb.0:
$w0 = MOVZWi 1, 0
$w1 = MOVZWi 2, 0
$w2 = MOVZWi 3, 0
$w3 = MOVZWi 4, 0
$w4 = MOVZWi 5, 0
$w5 = MOVZWi 12, 0
$w6 = MOVZWi 6, 0
$w7 = MOVZWi 7, 0
TCRETURNdi @foo, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7

...
---
# CHECK-LABEL: name: f3
# CHECK-LABEL: bb.0:
# CHECK-NEXT: liveins: $lr
# CHECK-NEXT: {{ $}}
# CHECK-NEXT: $x5 = ORRXrs $xzr, $lr, 0
# CHECK-NEXT: BL @OUTLINED_FUNCTION_1, implicit-def $lr, implicit $sp, implicit-def $lr, implicit-def $w0, implicit-def $w1, implicit-def $w2, implicit-def $w3, implicit-def $w4, implicit $sp, implicit $xzr, implicit $x5
# CHECK-NEXT: $lr = ORRXrs $xzr, $x5, 0
# CHECK-NEXT: $w5 = MOVZWi 13, 0
# CHECK-NEXT: TCRETURNdi @OUTLINED_FUNCTION_0, 0, implicit $sp, implicit-def $w7, implicit-def $w6, implicit $sp, implicit $w0, implicit $w1, implicit $w2, implicit $w3, implicit $w4, implicit $w5
name: f3
tracksRegLiveness: true
frameInfo:
isCalleeSavedInfoValid: true
machineFunctionInfo:
hasRedZone: false
body: |
bb.0:
$w0 = MOVZWi 1, 0
$w1 = MOVZWi 2, 0
$w2 = MOVZWi 3, 0
$w3 = MOVZWi 4, 0
$w4 = MOVZWi 5, 0
$w5 = MOVZWi 13, 0
$w6 = MOVZWi 6, 0
$w7 = MOVZWi 7, 0
TCRETURNdi @foo, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7

...
---
# CHECK-LABEL: name: f4
# CHECK-LABEL: bb.0:
# CHECK-NEXT: liveins: $lr
# CHECK-NEXT: {{ $}}
# CHECK-NEXT: $x5 = ORRXrs $xzr, $lr, 0
# CHECK-NEXT: BL @OUTLINED_FUNCTION_1, implicit-def $lr, implicit $sp, implicit-def $lr, implicit-def $w0, implicit-def $w1, implicit-def $w2, implicit-def $w3, implicit-def $w4, implicit $sp, implicit $xzr, implicit $x5
# CHECK-NEXT: $lr = ORRXrs $xzr, $x5, 0
# CHECK-NEXT: $w5 = MOVZWi 14, 0
# CHECK-NEXT: TCRETURNdi @OUTLINED_FUNCTION_0, 0, implicit $sp, implicit-def $w7, implicit-def $w6, implicit $sp, implicit $w0, implicit $w1, implicit $w2, implicit $w3, implicit $w4, implicit $w5
name: f4
tracksRegLiveness: true
frameInfo:
isCalleeSavedInfoValid: true
machineFunctionInfo:
hasRedZone: false
body: |
bb.0:
$w0 = MOVZWi 1, 0
$w1 = MOVZWi 2, 0
$w2 = MOVZWi 3, 0
$w3 = MOVZWi 4, 0
$w4 = MOVZWi 5, 0
$w5 = MOVZWi 14, 0
$w6 = MOVZWi 6, 0
$w7 = MOVZWi 7, 0
TCRETURNdi @foo, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7

...
---
# CHECK-LABEL: name: f5
# CHECK-LABEL: bb.0:
# CHECK-NEXT: liveins: $lr
# CHECK-NEXT: {{ $}}
# CHECK-NEXT: $x5 = ORRXrs $xzr, $lr, 0
# CHECK-NEXT: BL @OUTLINED_FUNCTION_1, implicit-def $lr, implicit $sp, implicit-def $lr, implicit-def $w0, implicit-def $w1, implicit-def $w2, implicit-def $w3, implicit-def $w4, implicit $sp, implicit $xzr, implicit $x5
# CHECK-NEXT: $lr = ORRXrs $xzr, $x5, 0
# CHECK-NO: @OUTLINED_FUNCTION_0
name: f5
tracksRegLiveness: true
frameInfo:
isCalleeSavedInfoValid: true
machineFunctionInfo:
hasRedZone: false
body: |
bb.0:
$w0 = MOVZWi 1, 0
$w1 = MOVZWi 2, 0
$w2 = MOVZWi 3, 0
$w3 = MOVZWi 4, 0
$w4 = MOVZWi 5, 0
$w5 = MOVZWi 15, 0
$w6 = MOVZWi 8, 0
$w7 = MOVZWi 9, 0
TCRETURNdi @foo, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7

...
---
# CHECK-LABEL: name: f6
# CHECK-LABEL: bb.0:
# CHECK-NEXT: liveins: $lr
# CHECK-NEXT: {{ $}}
# CHECK-NEXT: $x5 = ORRXrs $xzr, $lr, 0
# CHECK-NEXT: BL @OUTLINED_FUNCTION_1, implicit-def $lr, implicit $sp, implicit-def $lr, implicit-def $w0, implicit-def $w1, implicit-def $w2, implicit-def $w3, implicit-def $w4, implicit $sp, implicit $xzr, implicit $x5
# CHECK-NEXT: $lr = ORRXrs $xzr, $x5, 0
# CHECK-NO: @OUTLINED_FUNCTION_0
name: f6
tracksRegLiveness: true
frameInfo:
isCalleeSavedInfoValid: true
machineFunctionInfo:
hasRedZone: false
body: |
bb.0:
$w0 = MOVZWi 1, 0
$w1 = MOVZWi 2, 0
$w2 = MOVZWi 3, 0
$w3 = MOVZWi 4, 0
$w4 = MOVZWi 5, 0
$w5 = MOVZWi 16, 0
$w6 = MOVZWi 9, 0
$w7 = MOVZWi 8, 0
TCRETURNdi @foo, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7

...

# CHECK-LABEL: name: OUTLINED_FUNCTION_0
# CHECK-LABEL: bb.0:
# CHECK-NEXT: liveins: $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr, $d8, $d9, $d10, $d11, $d12, $d13, $d14, $d15, $w0, $w1, $w2, $w3, $w4, $w5
# CHECK-NEXT: {{ $}}
# CHECK-NEXT: $w6 = MOVZWi 6, 0
# CHECK-NEXT: $w7 = MOVZWi 7, 0

# CHECK-LABEL: name: OUTLINED_FUNCTION_1
# CHECK-LABEL: bb.0:
# CHECK-NEXT: liveins: $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr, $d8, $d9, $d10, $d11, $d12, $d13, $d14, $d15
# CHECK-NEXT: {{ $}}
# CHECK-NEXT: $w0 = MOVZWi 1, 0
# CHECK-NEXT: $w1 = MOVZWi 2, 0
# CHECK-NEXT: $w2 = MOVZWi 3, 0
# CHECK-NEXT: $w3 = MOVZWi 4, 0
# CHECK-NEXT: $w4 = MOVZWi 5, 0
# CHECK-NEXT: RET $lr
Loading