Skip to content

cmd/compile: greedy basic block layout #66421

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions src/cmd/compile/internal/ssa/flagalloc.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,12 @@ func flagalloc(f *Func) {
}
}
}
visitOrder := layoutOrder(f)

// For blocks which have a flags control value, that's the only value
// we can leave in the flags register at the end of the block. (There
// is no place to put a flag regeneration instruction.)
for _, b := range f.Blocks {
for _, b := range visitOrder {
if b.Kind == BlockDefer {
// Defer blocks internally use/clobber the flags value.
end[b.ID] = nil
Expand Down Expand Up @@ -109,7 +110,7 @@ func flagalloc(f *Func) {
// Add flag spill and recomputation where they are needed.
var remove []*Value // values that should be checked for possible removal
var oldSched []*Value
for _, b := range f.Blocks {
for _, b := range visitOrder {
oldSched = append(oldSched[:0], b.Values...)
b.Values = b.Values[:0]
// The current live flag value (the pre-flagalloc copy).
Expand Down Expand Up @@ -188,7 +189,7 @@ func flagalloc(f *Func) {
}

// Save live flag state for later.
for _, b := range f.Blocks {
for _, b := range visitOrder {
b.FlagsLiveAtEnd = end[b.ID] != nil
}

Expand Down Expand Up @@ -223,7 +224,7 @@ func flagalloc(f *Func) {
}

// Process affected blocks, preserving value order.
for _, b := range f.Blocks {
for _, b := range visitOrder {
if !removeBlocks.contains(b.ID) {
continue
}
Expand Down
6 changes: 6 additions & 0 deletions src/cmd/compile/internal/ssa/func.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ type LocalSlotSplitKey struct {
Type *types.Type // type of slot
}

func assert(cond bool, fx string, msg ...interface{}) {
if !cond {
panic(fmt.Sprintf(fx, msg...))
}
}

// NewFunc returns a new, empty function object.
// Caller must reset cache before calling NewFunc.
func (c *Config) NewFunc(fe Frontend, cache *Cache) *Func {
Expand Down
281 changes: 280 additions & 1 deletion src/cmd/compile/internal/ssa/layout.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@

package ssa

import (
"fmt"
"sort"
)

// layout orders basic blocks in f with the goal of minimizing control flow instructions.
// After this phase returns, the order of f.Blocks matters and is the order
// in which those blocks will appear in the assembly output.
func layout(f *Func) {
f.Blocks = layoutOrder(f)
f.Blocks = greedyBlockOrder(f)
}

// Register allocation may use a different order which has constraints
Expand Down Expand Up @@ -183,3 +188,277 @@ blockloop:
return order
//f.Blocks = order
}

// ----------------------------------------------------------------------------
// Greedy Basic Block Layout
//
// This is an adaptation of Pettis & Hansen's greedy algorithm for laying out
// basic blocks. See Profile Guided Code Positioning by Pettis & Hansen. The idea
// is to arrange hot blocks near each other. Initially all blocks are belongs to
// its own chain, then starting from hottest edge and repeatedly merge two proper
// chains iff the edge dest is the first block of dest chain and edge src is the
// last block of src chain. Once all edges are processed, the chains are sorted
// by hottness and merge count and generate final block order. The algorithm is
// summarized as follows:
// - Initially every block is in its own chain.
// - Sort edges by weight and move slow path to end.
// - Merge proper chains until no more chains can be merged.
// - Sort chains by hottness and priority.
// - Generate final block order.

// chain is a linear sequence of blocks, where the first block is the entry block
// and the last block is the exit block. The chain is used to represent a sequence
// of blocks that are likely to be executed together.
type chain struct {
id ID
blocks []*Block // ordered blocks in this chain
priority int // merge count
}

func (c *chain) first() *Block {
return c.blocks[0]
}

func (c *chain) last() *Block {
return c.blocks[len(c.blocks)-1]
}

// edge simply represents a CFG edge
type edge struct {
src *Block
dst *Block
weight int // frequency
}

const (
WeightTaken = 100
WeightNotTaken = 0
)

func (e *edge) String() string {
return fmt.Sprintf("%v->%v(%d)", e.src, e.dst, e.weight)
}

// chainGraph is a directed graph of chains, where each chain is a sequence of
// blocks, and each edge is a CFG edge with a weight. The graph is used to build
// a block layout that minimizes control flow instructions.
type chainGraph struct {
cid idAlloc
chains []*chain
edges []*edge
b2chain []*chain // indexed by block id
}

func (g *chainGraph) newChain(block *Block) *chain {
c := &chain{g.cid.get(), []*Block{block}, 0 /*priority*/}
g.b2chain[block.ID] = c
g.chains = append(g.chains, c)
return c
}

func (g *chainGraph) getChain(b *Block) *chain {
return g.b2chain[b.ID]
}

// mergeChain merges the "from" chain into the "to" chain. The from chain is
// removed then.
func (g *chainGraph) mergeChain(from, to *chain) {
for _, block := range from.blocks {
g.b2chain[block.ID] = to
}
to.blocks = append(to.blocks, from.blocks...)
to.priority++
g.chains[from.id-1 /*ID always >0*/] = nil
}

func (g *chainGraph) print() {
fmt.Printf("== Edges:\n")
for _, edge := range g.edges {
fmt.Printf("%v\n", edge)
}
fmt.Printf("== Chains:\n")
for _, ch := range g.chains {
if ch == nil {
continue
}
fmt.Printf("id:%d priority:%d blocks:%v\n", ch.id, ch.priority, ch.blocks)
}
fmt.Printf("== BlockOrder:\n")
blockOrder := make([]*Block, 0)
for _, chain := range g.chains {
blockOrder = append(blockOrder, chain.blocks...)
}
fmt.Printf("%v\n", blockOrder)
}

func newChainGraph(fn *Func) *chainGraph {
graph := &chainGraph{
chains: []*chain{},
edges: []*edge{},
b2chain: make([]*chain, fn.NumBlocks(), fn.NumBlocks()),
}
return graph
}

// isBefore returns true if block a is before block b in the block order. The
// "before" precedence relation is transitive, i.e., if a is before b and b is
// before c, then a is before c.
func isBefore(before map[*chain][]*chain, visited map[*chain]bool, a, b *chain) bool {
if _, ok := visited[a]; ok {
return false
}
visited[a] = true
for _, c := range before[a] {
if c == b {
return true
}
if isBefore(before, visited, c, b) {
return true
}
}
return false
}

func greedyBlockOrder(fn *Func) []*Block {
graph := newChainGraph(fn)

// Initially every block is in its own chain
for _, block := range fn.Blocks {
graph.newChain(block)

if len(block.Succs) == 1 {
graph.edges = append(graph.edges, &edge{block, block.Succs[0].b, WeightTaken})
} else if len(block.Succs) == 2 && block.Likely != BranchUnknown {
// Static branch prediction is available
taken := 0
if block.Likely == BranchUnlikely {
taken = 1
}
e1 := &edge{block, block.Succs[taken].b, WeightTaken}
e2 := &edge{block, block.Succs[1-taken].b, WeightNotTaken}
graph.edges = append(graph.edges, e1, e2)
} else {
// Block predication is unknown or there are more than 2 successors
for _, succ := range block.Succs {
e1 := &edge{block, succ.b, WeightTaken}
graph.edges = append(graph.edges, e1)
}
}
}

// Sort edges by weight and move slow path to end
sort.SliceStable(graph.edges, func(i, j int) bool {
e1, e2 := graph.edges[i], graph.edges[j]
// Move slow path to end
if e1.weight == WeightNotTaken && e2.weight == WeightNotTaken {
return e1.dst.Kind != BlockExit && e2.dst.Kind == BlockExit
}

// If the weights are the same, then keep the original order, this
// ensures that adjacent edges are accessed sequentially, which has
// a noticeable impact on performance
return e1.weight > e2.weight
})

// Merge proper chains until no more chains can be merged
for _, edge := range graph.edges {
c1 := graph.getChain(edge.src)
c2 := graph.getChain(edge.dst)
if c1 == c2 {
continue
}
// [..c1..] edge [..c2..] ? Then merge c1 into c2 and remove entire c1 then
if edge.dst == c2.first() && edge.src == c1.last() {
if fn.pass.debug > 2 {
fmt.Printf("process %v merge %v to %v\n",
edge, c2.blocks, c1.blocks)
}
graph.mergeChain(c2, c1)
}
}
i := 0
for _, chain := range graph.chains {
// Remove nil chains because they are merge
if chain != nil {
graph.chains[i] = chain
if chain.first() == fn.Entry {
// Entry chain must be present at beginning
graph.chains[0], graph.chains[i] = graph.chains[i], graph.chains[0]
}
i++
}
}
graph.chains = graph.chains[:i]

// Reorder chains based by hottness and priority
before := make(map[*chain][]*chain)
for _, edge := range graph.edges {
// Compute the "before" precedence relation between chain, specifically,
// the chain that is taken is arranged before the chain that is not taken.
// This is because hardware prediction thought forward branch is less
// frequently taken, while backedge is more frequently taken.
if edge.weight == WeightNotTaken {
src := graph.getChain(edge.src)
dst := graph.getChain(edge.dst)
before[src] = append(before[src], dst)
if fn.pass.debug > 2 {
fmt.Printf("%v comes before %v\n", src.blocks, dst.blocks)
}
}
}
sort.SliceStable(graph.chains, func(i, j int) bool {
c1, c2 := graph.chains[i], graph.chains[j]
// Entry chain must be present at beginning
if c1.first() == fn.Entry {
return true
}
if c2.first() == fn.Entry {
return false
}
// Respect precedence relation
visited := make(map[*chain]bool)
if isBefore(before, visited, c1, c2) {
return true
}
if isBefore(before, visited, c2, c1) {
return false
}
// Higher merge count is considered
if c1.priority != c2.priority {
return c1.priority > c2.priority
}
// Keep adjacent chains close together
if c1.id > c2.id {
return false
}
// Non-terminated chain is considered
if s1, s2 := len(c1.last().Succs), len(c2.last().Succs); s1 != s2 {
return s1 > s2
}
// Keep original order if we can't decide
return false
})
assert(graph.chains[0].first() == fn.Entry, "entry chain must be first")

// Generate final block order
blockOrder := make([]*Block, 0)
for _, chain := range graph.chains {
blockOrder = append(blockOrder, chain.blocks...)
}
fn.laidout = true

if fn.pass.debug > 2 {
fmt.Printf("Block ordering(%v):\n", fn.Name)
graph.print()
}
if len(blockOrder) != len(fn.Blocks) {
graph.print()
fn.Fatalf("miss blocks in final order: %v %v", blockOrder, fn.Blocks)
}
if entryChain := graph.getChain(fn.Entry); entryChain != graph.chains[0] ||
entryChain.first() != fn.Entry {
graph.print()
fn.Fatalf("entry block is not first block")
}
return blockOrder
}