diff options
author | David Chase <drchase@google.com> | 2016-04-21 13:24:58 -0400 |
---|---|---|
committer | David Chase <drchase@google.com> | 2016-05-16 21:08:05 +0000 |
commit | 6b99fb5bea56b9674161b6c5415c1d05f19dfdc6 (patch) | |
tree | b4fb33117b40b4dda17e6573a0770c7cdf2df9ba /src/cmd/compile/internal/ssa/sparsetree.go | |
parent | 466cae6ca9f28c971a2d716a5a49dc76bbd1d5bb (diff) | |
download | go-6b99fb5bea56b9674161b6c5415c1d05f19dfdc6.tar.gz go-6b99fb5bea56b9674161b6c5415c1d05f19dfdc6.zip |
cmd/compile: use sparse algorithm for phis in large program
This adds a sparse method for locating nearest ancestors
in a dominator tree, and checks blocks with more than one
predecessor for differences and inserts phi functions where
there are.
Uses reversed post order to cut number of passes, running
it from first def to last use ("last use" for paramout and
mem is end-of-program; last use for a phi input from a
backedge is the source of the back edge)
Includes a cutover from old algorithm to new to avoid paying
large constant factor for small programs. This keeps normal
builds running at about the same time, while not running
over-long on large machine-generated inputs.
Add "phase" flags for ssa/build -- ssa/build/stats prints
number of blocks, values (before and after linking references
and inserting phis, so expansion can be measured), and their
product; the product governs the cutover, where a good value
seems to be somewhere between 1 and 5 million.
Among the files compiled by make.bash, this is the shape of
the tail of the distribution for #blocks, #vars, and their
product:
#blocks #vars product
max 6171 28180 173,898,780
99.9% 1641 6548 10,401,878
99% 463 1909 873,721
95% 152 639 95,235
90% 84 359 30,021
The old algorithm is indeed usually fastest, for 99%ile
values of usually.
The fix to LookupVarOutgoing
( https://go-review.googlesource.com/#/c/22790/ )
deals with some of the same problems addressed by this CL,
but on at least one bug ( #15537 ) this change is still
a significant help.
With this CL:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 4m35.200s
user 13m16.644s
sys 0m36.712s
and pprof reports 3.4GB allocated in one of the larger profiles
With tip:
/tmp/gopath$ rm -rf pkg bin
/tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \
github.com/gogo/protobuf/test/theproto3/combos/...
...
real 10m36.569s
user 25m52.286s
sys 4m3.696s
and pprof reports 8.3GB allocated in the same larger profile
With this CL, most of the compilation time on the benchmarked
input is spent in register/stack allocation (cumulative 53%)
and in the sparse lookup algorithm itself (cumulative 20%).
Fixes #15537.
Change-Id: Ia0299dda6a291534d8b08e5f9883216ded677a00
Reviewed-on: https://go-review.googlesource.com/22342
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/cmd/compile/internal/ssa/sparsetree.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/sparsetree.go | 49 |
1 files changed, 32 insertions, 17 deletions
diff --git a/src/cmd/compile/internal/ssa/sparsetree.go b/src/cmd/compile/internal/ssa/sparsetree.go index 45c7897496..21fe68601e 100644 --- a/src/cmd/compile/internal/ssa/sparsetree.go +++ b/src/cmd/compile/internal/ssa/sparsetree.go @@ -4,7 +4,9 @@ package ssa -type sparseTreeNode struct { +import "fmt" + +type SparseTreeNode struct { child *Block sibling *Block parent *Block @@ -20,26 +22,39 @@ type sparseTreeNode struct { entry, exit int32 } +func (s *SparseTreeNode) String() string { + return fmt.Sprintf("[%d,%d]", s.entry, s.exit) +} + +func (s *SparseTreeNode) Entry() int32 { + return s.entry +} + +func (s *SparseTreeNode) Exit() int32 { + return s.exit +} + const ( // When used to lookup up definitions in a sparse tree, // these adjustments to a block's entry (+adjust) and // exit (-adjust) numbers allow a distinction to be made // between assignments (typically branch-dependent - // conditionals) occurring "before" phi functions, the - // phi functions, and at the bottom of a block. - ADJUST_BEFORE = -1 // defined before phi - ADJUST_TOP = 0 // defined by phi - ADJUST_BOTTOM = 1 // defined within block + // conditionals) occurring "before" the block (e.g., as inputs + // to the block and its phi functions), "within" the block, + // and "after" the block. + AdjustBefore = -1 // defined before phi + AdjustWithin = 0 // defined by phi + AdjustAfter = 1 // defined within block ) -// A sparseTree is a tree of Blocks. +// A SparseTree is a tree of Blocks. // It allows rapid ancestor queries, // such as whether one block dominates another. -type sparseTree []sparseTreeNode +type SparseTree []SparseTreeNode -// newSparseTree creates a sparseTree from a block-to-parent map (array indexed by Block.ID) -func newSparseTree(f *Func, parentOf []*Block) sparseTree { - t := make(sparseTree, f.NumBlocks()) +// newSparseTree creates a SparseTree from a block-to-parent map (array indexed by Block.ID) +func newSparseTree(f *Func, parentOf []*Block) SparseTree { + t := make(SparseTree, f.NumBlocks()) for _, b := range f.Blocks { n := &t[b.ID] if p := parentOf[b.ID]; p != nil { @@ -80,7 +95,7 @@ func newSparseTree(f *Func, parentOf []*Block) sparseTree { // root left left right right root // 1 2e 3 | 4 5e 6 | 7 8x 9 | 10 11e 12 | 13 14x 15 | 16 17x 18 -func (t sparseTree) numberBlock(b *Block, n int32) int32 { +func (t SparseTree) numberBlock(b *Block, n int32) int32 { // reserve n for entry-1, assign n+1 to entry n++ t[b.ID].entry = n @@ -103,19 +118,19 @@ func (t sparseTree) numberBlock(b *Block, n int32) int32 { // to assign entry and exit numbers in the treewalk, those // numbers are also consistent with this order (i.e., // Sibling(x) has entry number larger than x's exit number). -func (t sparseTree) Sibling(x *Block) *Block { +func (t SparseTree) Sibling(x *Block) *Block { return t[x.ID].sibling } // Child returns a child of x in the dominator tree, or // nil if there are none. The choice of first child is // arbitrary but repeatable. -func (t sparseTree) Child(x *Block) *Block { +func (t SparseTree) Child(x *Block) *Block { return t[x.ID].child } // isAncestorEq reports whether x is an ancestor of or equal to y. -func (t sparseTree) isAncestorEq(x, y *Block) bool { +func (t SparseTree) isAncestorEq(x, y *Block) bool { if x == y { return true } @@ -125,7 +140,7 @@ func (t sparseTree) isAncestorEq(x, y *Block) bool { } // isAncestor reports whether x is a strict ancestor of y. -func (t sparseTree) isAncestor(x, y *Block) bool { +func (t SparseTree) isAncestor(x, y *Block) bool { if x == y { return false } @@ -136,6 +151,6 @@ func (t sparseTree) isAncestor(x, y *Block) bool { // maxdomorder returns a value to allow a maximal dominator first sort. maxdomorder(x) < maxdomorder(y) is true // if x may dominate y, and false if x cannot dominate y. -func (t sparseTree) maxdomorder(x *Block) int32 { +func (t SparseTree) maxdomorder(x *Block) int32 { return t[x.ID].entry } |