From be2f3199663283f4ec55156ca2610a82d167b43b Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Thu, 18 Jan 2024 20:09:25 +0800 Subject: [PATCH 1/5] cmd/compile: greedy basic block layout Implement Pettis&Hanse's greedy algorithm, i.e. bottom-up variant Fixes #66420 --- src/cmd/compile/internal/ssa/compile.go | 5 +- src/cmd/compile/internal/ssa/flagalloc.go | 9 +- src/cmd/compile/internal/ssa/layout.go | 226 +++++++++++++++++++++- 3 files changed, 231 insertions(+), 9 deletions(-) diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index d125891f88c58f..4c897a4002ed46 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -508,8 +508,7 @@ var passes = [...]pass{ {name: "late nilcheck", fn: nilcheckelim2}, {name: "flagalloc", fn: flagalloc, required: true}, // allocate flags register {name: "regalloc", fn: regalloc, required: true}, // allocate int & float registers + stack slots - {name: "loop rotate", fn: loopRotate}, - {name: "trim", fn: trim}, // remove empty blocks + {name: "trim", fn: trim}, // remove empty blocks } // Double-check phase ordering constraints. @@ -577,8 +576,6 @@ var passOrder = [...]constraint{ {"schedule", "flagalloc"}, // regalloc needs flags to be allocated first. {"flagalloc", "regalloc"}, - // loopRotate will confuse regalloc. - {"regalloc", "loop rotate"}, // trim needs regalloc to be done first. {"regalloc", "trim"}, // memcombine works better if fuse happens first, to help merge stores. diff --git a/src/cmd/compile/internal/ssa/flagalloc.go b/src/cmd/compile/internal/ssa/flagalloc.go index cf2c9a0023f925..e3eed91315f18e 100644 --- a/src/cmd/compile/internal/ssa/flagalloc.go +++ b/src/cmd/compile/internal/ssa/flagalloc.go @@ -53,11 +53,12 @@ func flagalloc(f *Func) { } } } + visitOrder := layoutOrder(f) // For blocks which have a flags control value, that's the only value // we can leave in the flags register at the end of the block. (There // is no place to put a flag regeneration instruction.) - for _, b := range f.Blocks { + for _, b := range visitOrder { if b.Kind == BlockDefer { // Defer blocks internally use/clobber the flags value. end[b.ID] = nil @@ -109,7 +110,7 @@ func flagalloc(f *Func) { // Add flag spill and recomputation where they are needed. var remove []*Value // values that should be checked for possible removal var oldSched []*Value - for _, b := range f.Blocks { + for _, b := range visitOrder { oldSched = append(oldSched[:0], b.Values...) b.Values = b.Values[:0] // The current live flag value (the pre-flagalloc copy). @@ -188,7 +189,7 @@ func flagalloc(f *Func) { } // Save live flag state for later. - for _, b := range f.Blocks { + for _, b := range visitOrder { b.FlagsLiveAtEnd = end[b.ID] != nil } @@ -223,7 +224,7 @@ func flagalloc(f *Func) { } // Process affected blocks, preserving value order. - for _, b := range f.Blocks { + for _, b := range visitOrder { if !removeBlocks.contains(b.ID) { continue } diff --git a/src/cmd/compile/internal/ssa/layout.go b/src/cmd/compile/internal/ssa/layout.go index e4a8c6ffbf0dde..1e9909fbe8ccf1 100644 --- a/src/cmd/compile/internal/ssa/layout.go +++ b/src/cmd/compile/internal/ssa/layout.go @@ -4,11 +4,16 @@ package ssa +import ( + "fmt" + "sort" +) + // layout orders basic blocks in f with the goal of minimizing control flow instructions. // After this phase returns, the order of f.Blocks matters and is the order // in which those blocks will appear in the assembly output. func layout(f *Func) { - f.Blocks = layoutOrder(f) + f.Blocks = greedyBlockOrder(f) } // Register allocation may use a different order which has constraints @@ -183,3 +188,222 @@ blockloop: return order //f.Blocks = order } + +// ---------------------------------------------------------------------------- +// Greedy Basic Block Layout +// +// This is an adaptation of Pettis & Hansen's greedy algorithm for laying out +// basic blocks. See Profile Guided Code Positioning by Pettis & Hansen. The idea +// is to arrange hot blocks near each other. Initially all blocks are belongs to +// its own chain, then starting from hottest edge and repeatedly merge two proper +// chains iff the edge dest is the first block of dest chain and edge src is the +// last block of src chain. Once all edges are processed, the chains are sorted +// by hottness and merge count and generate final block order. + +// chain is a linear sequence of blocks +type chain struct { + id int + blocks []*Block + priority int // merge count +} + +func (t *chain) first() *Block { + return t.blocks[0] +} + +func (t *chain) last() *Block { + return t.blocks[len(t.blocks)-1] +} + +// edge simply represents a CFG edge +type edge struct { + src *Block + dst *Block + weight int // frequency +} + +const ( + WeightTaken = 100 + WeightNotTaken = 0 +) + +func (e *edge) String() string { + return fmt.Sprintf("%v->%v(%d)", e.src, e.dst, e.weight) +} + +type chainGraph struct { + chainId int + chains []*chain + edges []*edge + b2chain map[*Block]*chain +} + +func (g *chainGraph) newChain(block *Block) *chain { + tr := &chain{g.chainId, []*Block{block}, 0 /*priority*/} + g.b2chain[block] = tr + g.chains = append(g.chains, tr) + g.chainId++ + return tr +} + +func (g *chainGraph) getChain(b *Block) *chain { + return g.b2chain[b] +} + +func (g *chainGraph) mergeChain(to, from *chain) { + for _, block := range from.blocks { + g.b2chain[block] = to + } + to.blocks = append(to.blocks, from.blocks...) + to.priority++ // increment + g.chains[from.id] = nil +} + +func (g *chainGraph) print() { + fmt.Printf("== Edges:\n") + for _, edge := range g.edges { + fmt.Printf("%v\n", edge) + } + fmt.Printf("== Chains:\n") + for _, ch := range g.chains { + if ch == nil { + continue + } + fmt.Printf("id:%d priority:%d blocks:%v\n", ch.id, ch.priority, ch.blocks) + } +} + +func greedyBlockOrder(fn *Func) []*Block { + graph := &chainGraph{0, []*chain{}, []*edge{}, make(map[*Block]*chain)} + + // Initially every block is in its own chain + for _, block := range fn.Blocks { + graph.newChain(block) + + if len(block.Succs) == 1 { + graph.edges = append(graph.edges, &edge{block, block.Succs[0].b, WeightTaken}) + } else if len(block.Succs) == 2 && block.Likely != BranchUnknown { + // Static branch prediction is available + taken := 0 + if block.Likely == BranchUnlikely { + taken = 1 + } + e1 := &edge{block, block.Succs[taken].b, WeightTaken} + e2 := &edge{block, block.Succs[1-taken].b, WeightNotTaken} + graph.edges = append(graph.edges, e1, e2) + } else { + // Block predication is unknown or there are more than 2 successors + for _, succ := range block.Succs { + e1 := &edge{block, succ.b, WeightTaken} + graph.edges = append(graph.edges, e1) + } + } + } + + // Sort edges by weight and move slow path to end + j := len(graph.edges) - 1 + for i, edge := range graph.edges { + if edge.weight == 0 { + if edge.dst.Kind == BlockExit && i < j { + graph.edges[j], graph.edges[i] = graph.edges[i], graph.edges[j] + j-- + } + } + } + sort.SliceStable(graph.edges, func(i, j int) bool { + e1, e2 := graph.edges[i], graph.edges[j] + // If the weights are the same, then keep the original order, this + // ensures that adjacent edges are accessed sequentially, which has + // a noticeable impact on performance + return e1.weight >= e2.weight + }) + + // Merge proper chains until no more chains can be merged + for _, edge := range graph.edges { + src := graph.getChain(edge.src) + dst := graph.getChain(edge.dst) + if src == dst { + // Loop detected, "rotate" the loop from [..,header,body,latch] to + // [..,body,latch,header] + for idx, block := range src.blocks { + if block == edge.dst && block.Kind != BlockPlain /*already rotated?*/ { + c := append(src.blocks[0:idx], src.blocks[idx+1:]...) + c = append(c, block) + src.blocks = c + break + } + } + continue + } + if edge.dst == dst.first() && edge.src == src.last() { + graph.mergeChain(src, dst) + } + } + for i := 0; i < len(graph.chains); i++ { + // Remove nil chains because they are merged + if graph.chains[i] == nil { + graph.chains = append(graph.chains[:i], graph.chains[i+1:]...) + i-- + } else if graph.chains[i].first() == fn.Entry { + // Entry chain must be present at beginning + graph.chains[0], graph.chains[i] = graph.chains[i], graph.chains[0] + } + } + + // Reorder chains based by hottness and priority + before := make(map[*chain][]*chain) + for _, edge := range graph.edges { + // Compute the "before" precedence relation between chain, specifically, + // the chain that is taken is arranged before the chain that is not taken. + // This is because hardware prediction thought forward branch is less + // frequently taken, while backedge is more frequently taken. + if edge.weight == WeightNotTaken { + src := graph.getChain(edge.src) + dst := graph.getChain(edge.dst) + before[src] = append(before[src], dst) + } + } + // assert(graph.chains[0].first() == fn.Entry, "entry chain must be first") + const idxSkipEntry = 1 // Entry chain is always first + sort.SliceStable(graph.chains[idxSkipEntry:], func(i, j int) bool { + c1, c2 := graph.chains[i+idxSkipEntry], graph.chains[j+idxSkipEntry] + // Respect precedence relation + for _, b := range before[c1] { + if b == c2 { + return true + } + } + // Higher merge count is considered + if c1.priority != c2.priority { + return c1.priority > c2.priority + } + // Non-terminated chain is considered + if s1, s2 := len(c1.last().Succs), len(c2.last().Succs); s1 != s2 { + return s1 > s2 + } + // Keep original order if we can't decide + return true + }) + + // Generate final block order + blockOrder := make([]*Block, 0) + for _, chain := range graph.chains { + blockOrder = append(blockOrder, chain.blocks...) + } + fn.laidout = true + + if fn.pass.debug > 2 { + fmt.Printf("Block ordering(%v):\n", fn.Name) + graph.print() + } + if len(blockOrder) != len(fn.Blocks) { + graph.print() + fn.Fatalf("miss blocks in final order") + } + if entryChain := graph.getChain(fn.Entry); entryChain != graph.chains[0] || + entryChain.first() != fn.Entry { + graph.print() + fn.Fatalf("entry block is not first block") + } + return blockOrder +} From 1e61a845591a04650c5e1063727b57fe009c119b Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Fri, 21 Jun 2024 10:22:17 +0800 Subject: [PATCH 2/5] address keith comments --- src/cmd/compile/internal/ssa/func.go | 6 ++ src/cmd/compile/internal/ssa/layout.go | 97 +++++++++++++++----------- 2 files changed, 62 insertions(+), 41 deletions(-) diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go index 529c119dc3da5a..c07d01a6f19d34 100644 --- a/src/cmd/compile/internal/ssa/func.go +++ b/src/cmd/compile/internal/ssa/func.go @@ -86,6 +86,12 @@ type LocalSlotSplitKey struct { Type *types.Type // type of slot } +func assert(cond bool, fx string, msg ...interface{}) { + if !cond { + panic(fmt.Sprintf(fx, msg...)) + } +} + // NewFunc returns a new, empty function object. // Caller must reset cache before calling NewFunc. func (c *Config) NewFunc(fe Frontend, cache *Cache) *Func { diff --git a/src/cmd/compile/internal/ssa/layout.go b/src/cmd/compile/internal/ssa/layout.go index 1e9909fbe8ccf1..be83a2b121269a 100644 --- a/src/cmd/compile/internal/ssa/layout.go +++ b/src/cmd/compile/internal/ssa/layout.go @@ -200,19 +200,21 @@ blockloop: // last block of src chain. Once all edges are processed, the chains are sorted // by hottness and merge count and generate final block order. -// chain is a linear sequence of blocks +// chain is a linear sequence of blocks, where the first block is the entry block +// and the last block is the exit block. The chain is used to represent a sequence +// of blocks that are likely to be executed together. type chain struct { - id int - blocks []*Block - priority int // merge count + id ID + blocks []*Block // ordered blocks in this chain + priority int // merge count } -func (t *chain) first() *Block { - return t.blocks[0] +func (c *chain) first() *Block { + return c.blocks[0] } -func (t *chain) last() *Block { - return t.blocks[len(t.blocks)-1] +func (c *chain) last() *Block { + return c.blocks[len(c.blocks)-1] } // edge simply represents a CFG edge @@ -231,32 +233,36 @@ func (e *edge) String() string { return fmt.Sprintf("%v->%v(%d)", e.src, e.dst, e.weight) } +// chainGraph is a directed graph of chains, where each chain is a sequence of +// blocks, and each edge is a CFG edge with a weight. The graph is used to build +// a block layout that minimizes control flow instructions. type chainGraph struct { - chainId int + cid idAlloc chains []*chain edges []*edge - b2chain map[*Block]*chain + b2chain []*chain // indexed by block id } func (g *chainGraph) newChain(block *Block) *chain { - tr := &chain{g.chainId, []*Block{block}, 0 /*priority*/} - g.b2chain[block] = tr - g.chains = append(g.chains, tr) - g.chainId++ - return tr + c := &chain{g.cid.get(), []*Block{block}, 0 /*priority*/} + g.b2chain[block.ID] = c + g.chains = append(g.chains, c) + return c } func (g *chainGraph) getChain(b *Block) *chain { - return g.b2chain[b] + return g.b2chain[b.ID] } +// mergeChain merges the "from" chain into the "to" chain. The from chain is +// removed then. func (g *chainGraph) mergeChain(to, from *chain) { for _, block := range from.blocks { - g.b2chain[block] = to + g.b2chain[block.ID] = to } to.blocks = append(to.blocks, from.blocks...) to.priority++ // increment - g.chains[from.id] = nil + g.chains[from.id-1 /*ID always >0*/] = nil } func (g *chainGraph) print() { @@ -274,7 +280,11 @@ func (g *chainGraph) print() { } func greedyBlockOrder(fn *Func) []*Block { - graph := &chainGraph{0, []*chain{}, []*edge{}, make(map[*Block]*chain)} + graph := &chainGraph{ + chains: []*chain{}, + edges: []*edge{}, + b2chain: make([]*chain, fn.NumBlocks(), fn.NumBlocks()), + } // Initially every block is in its own chain for _, block := range fn.Blocks { @@ -301,17 +311,13 @@ func greedyBlockOrder(fn *Func) []*Block { } // Sort edges by weight and move slow path to end - j := len(graph.edges) - 1 - for i, edge := range graph.edges { - if edge.weight == 0 { - if edge.dst.Kind == BlockExit && i < j { - graph.edges[j], graph.edges[i] = graph.edges[i], graph.edges[j] - j-- - } - } - } sort.SliceStable(graph.edges, func(i, j int) bool { e1, e2 := graph.edges[i], graph.edges[j] + // Move slow path to end + if e1.weight == WeightNotTaken && e2.weight == WeightNotTaken { + return e1.dst.Kind != BlockExit && e2.dst.Kind == BlockExit + } + // If the weights are the same, then keep the original order, this // ensures that adjacent edges are accessed sequentially, which has // a noticeable impact on performance @@ -339,16 +345,19 @@ func greedyBlockOrder(fn *Func) []*Block { graph.mergeChain(src, dst) } } - for i := 0; i < len(graph.chains); i++ { - // Remove nil chains because they are merged - if graph.chains[i] == nil { - graph.chains = append(graph.chains[:i], graph.chains[i+1:]...) - i-- - } else if graph.chains[i].first() == fn.Entry { - // Entry chain must be present at beginning - graph.chains[0], graph.chains[i] = graph.chains[i], graph.chains[0] + i := 0 + for _, chain := range graph.chains { + // Remove nil chains because they are merge + if chain != nil { + graph.chains[i] = chain + if chain.first() == fn.Entry { + // Entry chain must be present at beginning + graph.chains[0], graph.chains[i] = graph.chains[i], graph.chains[0] + } + i++ } } + graph.chains = graph.chains[:i] // Reorder chains based by hottness and priority before := make(map[*chain][]*chain) @@ -363,10 +372,15 @@ func greedyBlockOrder(fn *Func) []*Block { before[src] = append(before[src], dst) } } - // assert(graph.chains[0].first() == fn.Entry, "entry chain must be first") - const idxSkipEntry = 1 // Entry chain is always first - sort.SliceStable(graph.chains[idxSkipEntry:], func(i, j int) bool { - c1, c2 := graph.chains[i+idxSkipEntry], graph.chains[j+idxSkipEntry] + sort.SliceStable(graph.chains, func(i, j int) bool { + c1, c2 := graph.chains[i], graph.chains[j] + // Entry chain must be present at beginning + if c1.first() == fn.Entry { + return true + } + if c2.first() == fn.Entry { + return false + } // Respect precedence relation for _, b := range before[c1] { if b == c2 { @@ -382,8 +396,9 @@ func greedyBlockOrder(fn *Func) []*Block { return s1 > s2 } // Keep original order if we can't decide - return true + return false }) + assert(graph.chains[0].first() == fn.Entry, "entry chain must be first") // Generate final block order blockOrder := make([]*Block, 0) From 2611f3d9aaa69fd00798ef32644eefbdfa4ee9b7 Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Tue, 25 Jun 2024 11:22:34 +0800 Subject: [PATCH 3/5] remove loop rotate --- src/cmd/compile/internal/ssa/compile.go | 5 ++++- src/cmd/compile/internal/ssa/layout.go | 30 +++++++++---------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index 4c897a4002ed46..d125891f88c58f 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -508,7 +508,8 @@ var passes = [...]pass{ {name: "late nilcheck", fn: nilcheckelim2}, {name: "flagalloc", fn: flagalloc, required: true}, // allocate flags register {name: "regalloc", fn: regalloc, required: true}, // allocate int & float registers + stack slots - {name: "trim", fn: trim}, // remove empty blocks + {name: "loop rotate", fn: loopRotate}, + {name: "trim", fn: trim}, // remove empty blocks } // Double-check phase ordering constraints. @@ -576,6 +577,8 @@ var passOrder = [...]constraint{ {"schedule", "flagalloc"}, // regalloc needs flags to be allocated first. {"flagalloc", "regalloc"}, + // loopRotate will confuse regalloc. + {"regalloc", "loop rotate"}, // trim needs regalloc to be done first. {"regalloc", "trim"}, // memcombine works better if fuse happens first, to help merge stores. diff --git a/src/cmd/compile/internal/ssa/layout.go b/src/cmd/compile/internal/ssa/layout.go index be83a2b121269a..c32247f68493ca 100644 --- a/src/cmd/compile/internal/ssa/layout.go +++ b/src/cmd/compile/internal/ssa/layout.go @@ -256,7 +256,7 @@ func (g *chainGraph) getChain(b *Block) *chain { // mergeChain merges the "from" chain into the "to" chain. The from chain is // removed then. -func (g *chainGraph) mergeChain(to, from *chain) { +func (g *chainGraph) mergeChain(from, to *chain) { for _, block := range from.blocks { g.b2chain[block.ID] = to } @@ -321,28 +321,20 @@ func greedyBlockOrder(fn *Func) []*Block { // If the weights are the same, then keep the original order, this // ensures that adjacent edges are accessed sequentially, which has // a noticeable impact on performance - return e1.weight >= e2.weight + return e1.weight > e2.weight }) // Merge proper chains until no more chains can be merged for _, edge := range graph.edges { - src := graph.getChain(edge.src) - dst := graph.getChain(edge.dst) - if src == dst { - // Loop detected, "rotate" the loop from [..,header,body,latch] to - // [..,body,latch,header] - for idx, block := range src.blocks { - if block == edge.dst && block.Kind != BlockPlain /*already rotated?*/ { - c := append(src.blocks[0:idx], src.blocks[idx+1:]...) - c = append(c, block) - src.blocks = c - break - } + c1 := graph.getChain(edge.src) + c2 := graph.getChain(edge.dst) + // [c1] edge [c2] ? Then merge c1 into c2 and remove entire c1 then + if c1 != c2 && (edge.dst == c2.first() && edge.src == c1.last()) { + if fn.pass.debug > 2 { + fmt.Printf("process %v merge %v to %v\n", + edge, c2.blocks, c1.blocks) } - continue - } - if edge.dst == dst.first() && edge.src == src.last() { - graph.mergeChain(src, dst) + graph.mergeChain(c2, c1) } } i := 0 @@ -413,7 +405,7 @@ func greedyBlockOrder(fn *Func) []*Block { } if len(blockOrder) != len(fn.Blocks) { graph.print() - fn.Fatalf("miss blocks in final order") + fn.Fatalf("miss blocks in final order: %v %v", blockOrder, fn.Blocks) } if entryChain := graph.getChain(fn.Entry); entryChain != graph.chains[0] || entryChain.first() != fn.Entry { From 9fd9a549774d8e76e12bd3da8f9ec5bf32deb271 Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Thu, 27 Jun 2024 15:56:44 +0800 Subject: [PATCH 4/5] keep adjacent chains close together --- src/cmd/compile/internal/ssa/layout.go | 27 ++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/cmd/compile/internal/ssa/layout.go b/src/cmd/compile/internal/ssa/layout.go index c32247f68493ca..dfb3ef4f4c9764 100644 --- a/src/cmd/compile/internal/ssa/layout.go +++ b/src/cmd/compile/internal/ssa/layout.go @@ -198,7 +198,13 @@ blockloop: // its own chain, then starting from hottest edge and repeatedly merge two proper // chains iff the edge dest is the first block of dest chain and edge src is the // last block of src chain. Once all edges are processed, the chains are sorted -// by hottness and merge count and generate final block order. +// by hottness and merge count and generate final block order. The algorithm is +// summarized as follows: +// - Initially every block is in its own chain. +// - Sort edges by weight and move slow path to end. +// - Merge proper chains until no more chains can be merged. +// - Sort chains by hottness and priority. +// - Generate final block order. // chain is a linear sequence of blocks, where the first block is the entry block // and the last block is the exit block. The chain is used to represent a sequence @@ -261,7 +267,7 @@ func (g *chainGraph) mergeChain(from, to *chain) { g.b2chain[block.ID] = to } to.blocks = append(to.blocks, from.blocks...) - to.priority++ // increment + to.priority++ g.chains[from.id-1 /*ID always >0*/] = nil } @@ -277,6 +283,12 @@ func (g *chainGraph) print() { } fmt.Printf("id:%d priority:%d blocks:%v\n", ch.id, ch.priority, ch.blocks) } + fmt.Printf("== BlockOrder:\n") + blockOrder := make([]*Block, 0) + for _, chain := range g.chains { + blockOrder = append(blockOrder, chain.blocks...) + } + fmt.Printf("%v\n", blockOrder) } func greedyBlockOrder(fn *Func) []*Block { @@ -328,8 +340,11 @@ func greedyBlockOrder(fn *Func) []*Block { for _, edge := range graph.edges { c1 := graph.getChain(edge.src) c2 := graph.getChain(edge.dst) - // [c1] edge [c2] ? Then merge c1 into c2 and remove entire c1 then - if c1 != c2 && (edge.dst == c2.first() && edge.src == c1.last()) { + if c1 == c2 { + continue + } + // [..c1..] edge [..c2..] ? Then merge c1 into c2 and remove entire c1 then + if edge.dst == c2.first() && edge.src == c1.last() { if fn.pass.debug > 2 { fmt.Printf("process %v merge %v to %v\n", edge, c2.blocks, c1.blocks) @@ -383,6 +398,10 @@ func greedyBlockOrder(fn *Func) []*Block { if c1.priority != c2.priority { return c1.priority > c2.priority } + // Keep adjacent chains close together + if c1.id > c2.id { + return false + } // Non-terminated chain is considered if s1, s2 := len(c1.last().Succs), len(c2.last().Succs); s1 != s2 { return s1 > s2 From 36f564c18937bd8c0a5d2338e1c7e4394da93bb4 Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Mon, 1 Jul 2024 15:47:37 +0800 Subject: [PATCH 5/5] fix before precedence relation "before" precedence relation should be transitive, such that if a is before b, and b is before c, then a should before c --- src/cmd/compile/internal/ssa/layout.go | 39 ++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/src/cmd/compile/internal/ssa/layout.go b/src/cmd/compile/internal/ssa/layout.go index dfb3ef4f4c9764..75a003a29aeb6e 100644 --- a/src/cmd/compile/internal/ssa/layout.go +++ b/src/cmd/compile/internal/ssa/layout.go @@ -291,12 +291,36 @@ func (g *chainGraph) print() { fmt.Printf("%v\n", blockOrder) } -func greedyBlockOrder(fn *Func) []*Block { +func newChainGraph(fn *Func) *chainGraph { graph := &chainGraph{ chains: []*chain{}, edges: []*edge{}, b2chain: make([]*chain, fn.NumBlocks(), fn.NumBlocks()), } + return graph +} + +// isBefore returns true if block a is before block b in the block order. The +// "before" precedence relation is transitive, i.e., if a is before b and b is +// before c, then a is before c. +func isBefore(before map[*chain][]*chain, visited map[*chain]bool, a, b *chain) bool { + if _, ok := visited[a]; ok { + return false + } + visited[a] = true + for _, c := range before[a] { + if c == b { + return true + } + if isBefore(before, visited, c, b) { + return true + } + } + return false +} + +func greedyBlockOrder(fn *Func) []*Block { + graph := newChainGraph(fn) // Initially every block is in its own chain for _, block := range fn.Blocks { @@ -377,6 +401,9 @@ func greedyBlockOrder(fn *Func) []*Block { src := graph.getChain(edge.src) dst := graph.getChain(edge.dst) before[src] = append(before[src], dst) + if fn.pass.debug > 2 { + fmt.Printf("%v comes before %v\n", src.blocks, dst.blocks) + } } } sort.SliceStable(graph.chains, func(i, j int) bool { @@ -389,10 +416,12 @@ func greedyBlockOrder(fn *Func) []*Block { return false } // Respect precedence relation - for _, b := range before[c1] { - if b == c2 { - return true - } + visited := make(map[*chain]bool) + if isBefore(before, visited, c1, c2) { + return true + } + if isBefore(before, visited, c2, c1) { + return false } // Higher merge count is considered if c1.priority != c2.priority {