diff options
Diffstat (limited to 'src/cmd/compile/internal/ssa/loopreschedchecks.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/loopreschedchecks.go | 517 |
1 files changed, 517 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/loopreschedchecks.go b/src/cmd/compile/internal/ssa/loopreschedchecks.go new file mode 100644 index 0000000000..8f8055e302 --- /dev/null +++ b/src/cmd/compile/internal/ssa/loopreschedchecks.go @@ -0,0 +1,517 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +import "fmt" + +// an edgeMemCtr records a backedge, together with the memory and +// counter phi functions at the target of the backedge that must +// be updated when a rescheduling check replaces the backedge. +type edgeMemCtr struct { + e Edge + m *Value // phi for memory at dest of e + c *Value // phi for counter at dest of e +} + +// a rewriteTarget is a a value-argindex pair indicating +// where a rewrite is applied. Note that this is for values, +// not for block controls, because block controls are not targets +// for the rewrites performed in inserting rescheduling checks. +type rewriteTarget struct { + v *Value + i int +} + +type rewrite struct { + before, after *Value // before is the expected value before rewrite, after is the new value installed. + rewrites []rewriteTarget // all the targets for this rewrite. +} + +func (r *rewrite) String() string { + s := "\n\tbefore=" + r.before.String() + ", after=" + r.after.String() + for _, rw := range r.rewrites { + s += ", (i=" + fmt.Sprint(rw.i) + ", v=" + rw.v.LongString() + ")" + } + s += "\n" + return s +} + +const initialRescheduleCounterValue = 1021 // Largest 10-bit prime. 97 nSec loop bodies will check every 100 uSec. + +// insertLoopReschedChecks inserts rescheduling checks on loop backedges. +func insertLoopReschedChecks(f *Func) { + // TODO: when split information is recorded in export data, insert checks only on backedges that can be reached on a split-call-free path. + + // Loop reschedule checks decrement a per-function counter + // shared by all loops, and when the counter becomes non-positive + // a call is made to a rescheduling check in the runtime. + // + // Steps: + // 1. locate backedges. + // 2. Record memory definitions at block end so that + // the SSA graph for mem can be prperly modified. + // 3. Define a counter and record its future uses (at backedges) + // (Same process as 2, applied to a single definition of the counter. + // difference for mem is that there are zero-to-many existing mem + // definitions, versus exactly one for the new counter.) + // 4. Ensure that phi functions that will-be-needed for mem and counter + // are present in the graph, initially with trivial inputs. + // 5. Record all to-be-modified uses of mem and counter; + // apply modifications (split into two steps to simplify and + // avoided nagging order-dependences). + // 6. Rewrite backedges to include counter check, reschedule check, + // and modify destination phi function appropriately with new + // definitions for mem and counter. + + if f.NoSplit { // nosplit functions don't reschedule. + return + } + + backedges := backedges(f) + if len(backedges) == 0 { // no backedges means no rescheduling checks. + return + } + + lastMems := findLastMems(f) + + idom := f.Idom() + sdom := f.sdom() + + if f.pass.debug > 2 { + fmt.Printf("before %s = %s\n", f.Name, sdom.treestructure(f.Entry)) + } + + tofixBackedges := []edgeMemCtr{} + + for _, e := range backedges { // TODO: could filter here by calls in loops, if declared and inferred nosplit are recorded in export data. + tofixBackedges = append(tofixBackedges, edgeMemCtr{e, nil, nil}) + } + + // It's possible that there is no memory state (no global/pointer loads/stores or calls) + if lastMems[f.Entry.ID] == nil { + lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Line, OpInitMem, TypeMem) + } + + memDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block. + + // Propagate last mem definitions forward through successor blocks. + po := f.postorder() + for i := len(po) - 1; i >= 0; i-- { + b := po[i] + mem := lastMems[b.ID] + for j := 0; mem == nil; j++ { // if there's no def, then there's no phi, so the visible mem is identical in all predecessors. + // loop because there might be backedges that haven't been visited yet. + mem = memDefsAtBlockEnds[b.Preds[j].b.ID] + } + memDefsAtBlockEnds[b.ID] = mem + } + + // Set up counter. There are no phis etc pre-existing for it. + counter0 := f.Entry.NewValue0I(f.Entry.Line, OpConst32, f.Config.fe.TypeInt32(), initialRescheduleCounterValue) + ctrDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, def visible at its end, if that def will be used. + + // There's a minor difference between memDefsAtBlockEnds and ctrDefsAtBlockEnds; + // because the counter only matter for loops and code that reaches them, it is nil for blocks where the ctr is no + // longer live. This will avoid creation of dead phi functions. This optimization is ignored for the mem variable + // because it is harder and also less likely to be helpful, though dead code elimination ought to clean this out anyhow. + + for _, emc := range tofixBackedges { + e := emc.e + // set initial uses of counter zero (note available-at-bottom and use are the same thing initially.) + // each back-edge will be rewritten to include a reschedule check, and that will use the counter. + src := e.b.Preds[e.i].b + ctrDefsAtBlockEnds[src.ID] = counter0 + } + + // Push uses towards root + for _, b := range f.postorder() { + bd := ctrDefsAtBlockEnds[b.ID] + if bd == nil { + continue + } + for _, e := range b.Preds { + p := e.b + if ctrDefsAtBlockEnds[p.ID] == nil { + ctrDefsAtBlockEnds[p.ID] = bd + } + } + } + + // Maps from block to newly-inserted phi function in block. + newmemphis := make(map[*Block]rewrite) + newctrphis := make(map[*Block]rewrite) + + // Insert phi functions as necessary for future changes to flow graph. + for i, emc := range tofixBackedges { + e := emc.e + h := e.b + + // find the phi function for the memory input at "h", if there is one. + var headerMemPhi *Value // look for header mem phi + + for _, v := range h.Values { + if v.Op == OpPhi && v.Type.IsMemory() { + headerMemPhi = v + } + } + + if headerMemPhi == nil { + // if the header is nil, make a trivial phi from the dominator + mem0 := memDefsAtBlockEnds[idom[h.ID].ID] + headerMemPhi = newPhiFor(h, mem0) + newmemphis[h] = rewrite{before: mem0, after: headerMemPhi} + addDFphis(mem0, h, h, f, memDefsAtBlockEnds, newmemphis) + + } + tofixBackedges[i].m = headerMemPhi + + var headerCtrPhi *Value + rw, ok := newctrphis[h] + if !ok { + headerCtrPhi = newPhiFor(h, counter0) + newctrphis[h] = rewrite{before: counter0, after: headerCtrPhi} + addDFphis(counter0, h, h, f, ctrDefsAtBlockEnds, newctrphis) + } else { + headerCtrPhi = rw.after + } + tofixBackedges[i].c = headerCtrPhi + } + + rewriteNewPhis(f.Entry, f.Entry, f, memDefsAtBlockEnds, newmemphis) + rewriteNewPhis(f.Entry, f.Entry, f, ctrDefsAtBlockEnds, newctrphis) + + if f.pass.debug > 0 { + for b, r := range newmemphis { + fmt.Printf("b=%s, rewrite=%s\n", b, r.String()) + } + + for b, r := range newctrphis { + fmt.Printf("b=%s, rewrite=%s\n", b, r.String()) + } + } + + // Apply collected rewrites. + for _, r := range newmemphis { + for _, rw := range r.rewrites { + rw.v.SetArg(rw.i, r.after) + } + } + + for _, r := range newctrphis { + for _, rw := range r.rewrites { + rw.v.SetArg(rw.i, r.after) + } + } + + zero := f.Entry.NewValue0I(f.Entry.Line, OpConst32, f.Config.fe.TypeInt32(), 0) + one := f.Entry.NewValue0I(f.Entry.Line, OpConst32, f.Config.fe.TypeInt32(), 1) + + // Rewrite backedges to include reschedule checks. + for _, emc := range tofixBackedges { + e := emc.e + headerMemPhi := emc.m + headerCtrPhi := emc.c + h := e.b + i := e.i + p := h.Preds[i] + bb := p.b + mem0 := headerMemPhi.Args[i] + ctr0 := headerCtrPhi.Args[i] + // bb e->p h, + // Because we're going to insert a rare-call, make sure the + // looping edge still looks likely. + likely := BranchLikely + if p.i != 0 { + likely = BranchUnlikely + } + bb.Likely = likely + + // rewrite edge to include reschedule check + // existing edges: + // + // bb.Succs[p.i] == Edge{h, i} + // h.Preds[i] == p == Edge{bb,p.i} + // + // new block(s): + // test: + // ctr1 := ctr0 - 1 + // if ctr1 <= 0 { goto sched } + // goto join + // sched: + // mem1 := call resched (mem0) + // goto join + // join: + // ctr2 := phi(ctr1, counter0) // counter0 is the constant + // mem2 := phi(mem0, mem1) + // goto h + // + // and correct arg i of headerMemPhi and headerCtrPhi + // + // EXCEPT: block containing only phi functions is bad + // for the register allocator. Therefore, there is no + // join, and instead branches targeting join instead target + // the header, and the other phi functions within header are + // adjusted for the additional input. + + test := f.NewBlock(BlockIf) + sched := f.NewBlock(BlockPlain) + + test.Line = bb.Line + sched.Line = bb.Line + + // ctr1 := ctr0 - 1 + // if ctr1 <= 0 { goto sched } + // goto header + ctr1 := test.NewValue2(bb.Line, OpSub32, f.Config.fe.TypeInt32(), ctr0, one) + cmp := test.NewValue2(bb.Line, OpLeq32, f.Config.fe.TypeBool(), ctr1, zero) + test.SetControl(cmp) + test.AddEdgeTo(sched) // if true + // if false -- rewrite edge to header. + // do NOT remove+add, because that will perturb all the other phi functions + // as well as messing up other edges to the header. + test.Succs = append(test.Succs, Edge{h, i}) + h.Preds[i] = Edge{test, 1} + headerMemPhi.SetArg(i, mem0) + headerCtrPhi.SetArg(i, ctr1) + + test.Likely = BranchUnlikely + + // sched: + // mem1 := call resched (mem0) + // goto header + resched := f.Config.fe.Syslook("goschedguarded") + mem1 := sched.NewValue1A(bb.Line, OpStaticCall, TypeMem, resched, mem0) + sched.AddEdgeTo(h) + headerMemPhi.AddArg(mem1) + headerCtrPhi.AddArg(counter0) + + bb.Succs[p.i] = Edge{test, 0} + test.Preds = append(test.Preds, Edge{bb, p.i}) + + // Must correct all the other phi functions in the header for new incoming edge. + // Except for mem and counter phis, it will be the same value seen on the original + // backedge at index i. + for _, v := range h.Values { + if v.Op == OpPhi && v != headerMemPhi && v != headerCtrPhi { + v.AddArg(v.Args[i]) + } + } + } + + f.invalidateCFG() + + if f.pass.debug > 2 { + sdom = newSparseTree(f, f.Idom()) + fmt.Printf("after %s = %s\n", f.Name, sdom.treestructure(f.Entry)) + } + + return +} + +// newPhiFor inserts a new Phi function into b, +// with all inputs set to v. +func newPhiFor(b *Block, v *Value) *Value { + phiV := b.NewValue0(b.Line, OpPhi, v.Type) + + for range b.Preds { + phiV.AddArg(v) + } + return phiV +} + +// rewriteNewPhis updates newphis[h] to record all places where the new phi function inserted +// in block h will replace a previous definition. Block b is the block currently being processed; +// if b has its own phi definition then it takes the place of h. +// defsForUses provides information about other definitions of the variable that are present +// (and if nil, indicates that the variable is no longer live) +func rewriteNewPhis(h, b *Block, f *Func, defsForUses []*Value, newphis map[*Block]rewrite) { + // If b is a block with a new phi, then a new rewrite applies below it in the dominator tree. + if _, ok := newphis[b]; ok { + h = b + } + change := newphis[h] + x := change.before + y := change.after + + // Apply rewrites to this block + if x != nil { // don't waste time on the common case of no definition. + p := &change.rewrites + for _, v := range b.Values { + if v == y { // don't rewrite self -- phi inputs are handled below. + continue + } + for i, w := range v.Args { + if w != x { + continue + } + *p = append(*p, rewriteTarget{v, i}) + } + } + + // Rewrite appropriate inputs of phis reached in successors + // in dominance frontier, self, and dominated. + // If the variable def reaching uses in b is itself defined in b, then the new phi function + // does not reach the successors of b. (This assumes a bit about the structure of the + // phi use-def graph, but it's true for memory and the inserted counter.) + if dfu := defsForUses[b.ID]; dfu != nil && dfu.Block != b { + for _, e := range b.Succs { + s := e.b + if sphi, ok := newphis[s]; ok { // saves time to find the phi this way. + *p = append(*p, rewriteTarget{sphi.after, e.i}) + continue + } + for _, v := range s.Values { + if v.Op == OpPhi && v.Args[e.i] == x { + *p = append(*p, rewriteTarget{v, e.i}) + break + } + } + } + } + newphis[h] = change + } + + sdom := f.sdom() + + for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling { + rewriteNewPhis(h, c, f, defsForUses, newphis) // TODO: convert to explicit stack from recursion. + } +} + +// addDFphis creates new trivial phis that are necessary to correctly reflect (within SSA) +// a new definition for variable "x" inserted at h (usually but not necessarily a phi). +// These new phis can only occur at the dominance frontier of h; block s is in the dominance +// frontier of h if h does not strictly dominate s and if s is a successor of a block b where +// either b = h or h strictly dominates b. +// These newly created phis are themselves new definitions that may require addition of their +// own trivial phi functions in their own dominance frontier, and this is handled recursively. +func addDFphis(x *Value, h, b *Block, f *Func, defForUses []*Value, newphis map[*Block]rewrite) { + oldv := defForUses[b.ID] + if oldv != x { // either a new definition replacing x, or nil if it is proven that there are no uses reachable from b + return + } + sdom := f.sdom() + idom := f.Idom() +outer: + for _, e := range b.Succs { + s := e.b + // check phi functions in the dominance frontier + if sdom.isAncestor(h, s) { + continue // h dominates s, successor of b, therefore s is not in the frontier. + } + if _, ok := newphis[s]; ok { + continue // successor s of b already has a new phi function, so there is no need to add another. + } + if x != nil { + for _, v := range s.Values { + if v.Op == OpPhi && v.Args[e.i] == x { + continue outer // successor s of b has an old phi function, so there is no need to add another. + } + } + } + + old := defForUses[idom[s.ID].ID] // new phi function is correct-but-redundant, combining value "old" on all inputs. + headerPhi := newPhiFor(s, old) + // the new phi will replace "old" in block s and all blocks dominated by s. + newphis[s] = rewrite{before: old, after: headerPhi} // record new phi, to have inputs labeled "old" rewritten to "headerPhi" + addDFphis(old, s, s, f, defForUses, newphis) // the new definition may also create new phi functions. + } + for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling { + addDFphis(x, h, c, f, defForUses, newphis) // TODO: convert to explicit stack from recursion. + } +} + +// findLastMems maps block ids to last memory-output op in a block, if any +func findLastMems(f *Func) []*Value { + + var stores []*Value + lastMems := make([]*Value, f.NumBlocks()) + storeUse := f.newSparseSet(f.NumValues()) + defer f.retSparseSet(storeUse) + for _, b := range f.Blocks { + // Find all the stores in this block. Categorize their uses: + // storeUse contains stores which are used by a subsequent store. + storeUse.clear() + stores = stores[:0] + var memPhi *Value + for _, v := range b.Values { + if v.Op == OpPhi { + if v.Type.IsMemory() { + memPhi = v + } + continue + } + if v.Type.IsMemory() { + stores = append(stores, v) + if v.Op == OpSelect1 { + // Use the arg of the tuple-generating op. + v = v.Args[0] + } + for _, a := range v.Args { + if a.Block == b && a.Type.IsMemory() { + storeUse.add(a.ID) + } + } + } + } + if len(stores) == 0 { + lastMems[b.ID] = memPhi + continue + } + + // find last store in the block + var last *Value + for _, v := range stores { + if storeUse.contains(v.ID) { + continue + } + if last != nil { + b.Fatalf("two final stores - simultaneous live stores %s %s", last, v) + } + last = v + } + if last == nil { + b.Fatalf("no last store found - cycle?") + } + lastMems[b.ID] = last + } + return lastMems +} + +type backedgesState struct { + b *Block + i int +} + +// backedges returns a slice of successor edges that are back +// edges. For reducible loops, edge.b is the header. +func backedges(f *Func) []Edge { + edges := []Edge{} + mark := make([]markKind, f.NumBlocks()) + stack := []backedgesState{} + + mark[f.Entry.ID] = notExplored + stack = append(stack, backedgesState{f.Entry, 0}) + + for len(stack) > 0 { + l := len(stack) + x := stack[l-1] + if x.i < len(x.b.Succs) { + e := x.b.Succs[x.i] + stack[l-1].i++ + s := e.b + if mark[s.ID] == notFound { + mark[s.ID] = notExplored + stack = append(stack, backedgesState{s, 0}) + } else if mark[s.ID] == notExplored { + edges = append(edges, e) + } + } else { + mark[x.b.ID] = done + stack = stack[0 : l-1] + } + } + return edges +} |