src/cmd/compile/internal/ssa/loopreschedchecks.go


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517

// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package ssa

import "fmt"

// an edgeMemCtr records a backedge, together with the memory and
// counter phi functions at the target of the backedge that must
// be updated when a rescheduling check replaces the backedge.
type edgeMemCtr struct {
	e Edge
	m *Value // phi for memory at dest of e
	c *Value // phi for counter at dest of e
}

// a rewriteTarget is a a value-argindex pair indicating
// where a rewrite is applied.  Note that this is for values,
// not for block controls, because block controls are not targets
// for the rewrites performed in inserting rescheduling checks.
type rewriteTarget struct {
	v *Value
	i int
}

type rewrite struct {
	before, after *Value          // before is the expected value before rewrite, after is the new value installed.
	rewrites      []rewriteTarget // all the targets for this rewrite.
}

func (r *rewrite) String() string {
	s := "\n\tbefore=" + r.before.String() + ", after=" + r.after.String()
	for _, rw := range r.rewrites {
		s += ", (i=" + fmt.Sprint(rw.i) + ", v=" + rw.v.LongString() + ")"
	}
	s += "\n"
	return s
}

const initialRescheduleCounterValue = 1021 // Largest 10-bit prime. 97 nSec loop bodies will check every 100 uSec.

// insertLoopReschedChecks inserts rescheduling checks on loop backedges.
func insertLoopReschedChecks(f *Func) {
	// TODO: when split information is recorded in export data, insert checks only on backedges that can be reached on a split-call-free path.

	// Loop reschedule checks decrement a per-function counter
	// shared by all loops, and when the counter becomes non-positive
	// a call is made to a rescheduling check in the runtime.
	//
	// Steps:
	// 1. locate backedges.
	// 2. Record memory definitions at block end so that
	//    the SSA graph for mem can be prperly modified.
	// 3. Define a counter and record its future uses (at backedges)
	//    (Same process as 2, applied to a single definition of the counter.
	//     difference for mem is that there are zero-to-many existing mem
	//     definitions, versus exactly one for the new counter.)
	// 4. Ensure that phi functions that will-be-needed for mem and counter
	//    are present in the graph, initially with trivial inputs.
	// 5. Record all to-be-modified uses of mem and counter;
	//    apply modifications (split into two steps to simplify and
	//    avoided nagging order-dependences).
	// 6. Rewrite backedges to include counter check, reschedule check,
	//    and modify destination phi function appropriately with new
	//    definitions for mem and counter.

	if f.NoSplit { // nosplit functions don't reschedule.
		return
	}

	backedges := backedges(f)
	if len(backedges) == 0 { // no backedges means no rescheduling checks.
		return
	}

	lastMems := findLastMems(f)

	idom := f.Idom()
	sdom := f.sdom()

	if f.pass.debug > 2 {
		fmt.Printf("before %s = %s\n", f.Name, sdom.treestructure(f.Entry))
	}

	tofixBackedges := []edgeMemCtr{}

	for _, e := range backedges { // TODO: could filter here by calls in loops, if declared and inferred nosplit are recorded in export data.
		tofixBackedges = append(tofixBackedges, edgeMemCtr{e, nil, nil})
	}

	// It's possible that there is no memory state (no global/pointer loads/stores or calls)
	if lastMems[f.Entry.ID] == nil {
		lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Line, OpInitMem, TypeMem)
	}

	memDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block.

	// Propagate last mem definitions forward through successor blocks.
	po := f.postorder()
	for i := len(po) - 1; i >= 0; i-- {
		b := po[i]
		mem := lastMems[b.ID]
		for j := 0; mem == nil; j++ { // if there's no def, then there's no phi, so the visible mem is identical in all predecessors.
			// loop because there might be backedges that haven't been visited yet.
			mem = memDefsAtBlockEnds[b.Preds[j].b.ID]
		}
		memDefsAtBlockEnds[b.ID] = mem
	}

	// Set up counter.  There are no phis etc pre-existing for it.
	counter0 := f.Entry.NewValue0I(f.Entry.Line, OpConst32, f.Config.fe.TypeInt32(), initialRescheduleCounterValue)
	ctrDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, def visible at its end, if that def will be used.

	// There's a minor difference between memDefsAtBlockEnds and ctrDefsAtBlockEnds;
	// because the counter only matter for loops and code that reaches them, it is nil for blocks where the ctr is no
	// longer live.  This will avoid creation of dead phi functions.  This optimization is ignored for the mem variable
	// because it is harder and also less likely to be helpful, though dead code elimination ought to clean this out anyhow.

	for _, emc := range tofixBackedges {
		e := emc.e
		// set initial uses of counter zero (note available-at-bottom and use are the same thing initially.)
		// each back-edge will be rewritten to include a reschedule check, and that will use the counter.
		src := e.b.Preds[e.i].b
		ctrDefsAtBlockEnds[src.ID] = counter0
	}

	// Push uses towards root
	for _, b := range f.postorder() {
		bd := ctrDefsAtBlockEnds[b.ID]
		if bd == nil {
			continue
		}
		for _, e := range b.Preds {
			p := e.b
			if ctrDefsAtBlockEnds[p.ID] == nil {
				ctrDefsAtBlockEnds[p.ID] = bd
			}
		}
	}

	// Maps from block to newly-inserted phi function in block.
	newmemphis := make(map[*Block]rewrite)
	newctrphis := make(map[*Block]rewrite)

	// Insert phi functions as necessary for future changes to flow graph.
	for i, emc := range tofixBackedges {
		e := emc.e
		h := e.b

		// find the phi function for the memory input at "h", if there is one.
		var headerMemPhi *Value // look for header mem phi

		for _, v := range h.Values {
			if v.Op == OpPhi && v.Type.IsMemory() {
				headerMemPhi = v
			}
		}

		if headerMemPhi == nil {
			// if the header is nil, make a trivial phi from the dominator
			mem0 := memDefsAtBlockEnds[idom[h.ID].ID]
			headerMemPhi = newPhiFor(h, mem0)
			newmemphis[h] = rewrite{before: mem0, after: headerMemPhi}
			addDFphis(mem0, h, h, f, memDefsAtBlockEnds, newmemphis)

		}
		tofixBackedges[i].m = headerMemPhi

		var headerCtrPhi *Value
		rw, ok := newctrphis[h]
		if !ok {
			headerCtrPhi = newPhiFor(h, counter0)
			newctrphis[h] = rewrite{before: counter0, after: headerCtrPhi}
			addDFphis(counter0, h, h, f, ctrDefsAtBlockEnds, newctrphis)
		} else {
			headerCtrPhi = rw.after
		}
		tofixBackedges[i].c = headerCtrPhi
	}

	rewriteNewPhis(f.Entry, f.Entry, f, memDefsAtBlockEnds, newmemphis)
	rewriteNewPhis(f.Entry, f.Entry, f, ctrDefsAtBlockEnds, newctrphis)

	if f.pass.debug > 0 {
		for b, r := range newmemphis {
			fmt.Printf("b=%s, rewrite=%s\n", b, r.String())
		}

		for b, r := range newctrphis {
			fmt.Printf("b=%s, rewrite=%s\n", b, r.String())
		}
	}

	// Apply collected rewrites.
	for _, r := range newmemphis {
		for _, rw := range r.rewrites {
			rw.v.SetArg(rw.i, r.after)
		}
	}

	for _, r := range newctrphis {
		for _, rw := range r.rewrites {
			rw.v.SetArg(rw.i, r.after)
		}
	}

	zero := f.Entry.NewValue0I(f.Entry.Line, OpConst32, f.Config.fe.TypeInt32(), 0)
	one := f.Entry.NewValue0I(f.Entry.Line, OpConst32, f.Config.fe.TypeInt32(), 1)

	// Rewrite backedges to include reschedule checks.
	for _, emc := range tofixBackedges {
		e := emc.e
		headerMemPhi := emc.m
		headerCtrPhi := emc.c
		h := e.b
		i := e.i
		p := h.Preds[i]
		bb := p.b
		mem0 := headerMemPhi.Args[i]
		ctr0 := headerCtrPhi.Args[i]
		// bb e->p h,
		// Because we're going to insert a rare-call, make sure the
		// looping edge still looks likely.
		likely := BranchLikely
		if p.i != 0 {
			likely = BranchUnlikely
		}
		bb.Likely = likely

		// rewrite edge to include reschedule check
		// existing edges:
		//
		// bb.Succs[p.i] == Edge{h, i}
		// h.Preds[i] == p == Edge{bb,p.i}
		//
		// new block(s):
		// test:
		//    ctr1 := ctr0 - 1
		//    if ctr1 <= 0 { goto sched }
		//    goto join
		// sched:
		//    mem1 := call resched (mem0)
		//    goto join
		// join:
		//    ctr2 := phi(ctr1, counter0) // counter0 is the constant
		//    mem2 := phi(mem0, mem1)
		//    goto h
		//
		// and correct arg i of headerMemPhi and headerCtrPhi
		//
		// EXCEPT: block containing only phi functions is bad
		// for the register allocator.  Therefore, there is no
		// join, and instead branches targeting join instead target
		// the header, and the other phi functions within header are
		// adjusted for the additional input.

		test := f.NewBlock(BlockIf)
		sched := f.NewBlock(BlockPlain)

		test.Line = bb.Line
		sched.Line = bb.Line

		//    ctr1 := ctr0 - 1
		//    if ctr1 <= 0 { goto sched }
		//    goto header
		ctr1 := test.NewValue2(bb.Line, OpSub32, f.Config.fe.TypeInt32(), ctr0, one)
		cmp := test.NewValue2(bb.Line, OpLeq32, f.Config.fe.TypeBool(), ctr1, zero)
		test.SetControl(cmp)
		test.AddEdgeTo(sched) // if true
		// if false -- rewrite edge to header.
		// do NOT remove+add, because that will perturb all the other phi functions
		// as well as messing up other edges to the header.
		test.Succs = append(test.Succs, Edge{h, i})
		h.Preds[i] = Edge{test, 1}
		headerMemPhi.SetArg(i, mem0)
		headerCtrPhi.SetArg(i, ctr1)

		test.Likely = BranchUnlikely

		// sched:
		//    mem1 := call resched (mem0)
		//    goto header
		resched := f.Config.fe.Syslook("goschedguarded")
		mem1 := sched.NewValue1A(bb.Line, OpStaticCall, TypeMem, resched, mem0)
		sched.AddEdgeTo(h)
		headerMemPhi.AddArg(mem1)
		headerCtrPhi.AddArg(counter0)

		bb.Succs[p.i] = Edge{test, 0}
		test.Preds = append(test.Preds, Edge{bb, p.i})

		// Must correct all the other phi functions in the header for new incoming edge.
		// Except for mem and counter phis, it will be the same value seen on the original
		// backedge at index i.
		for _, v := range h.Values {
			if v.Op == OpPhi && v != headerMemPhi && v != headerCtrPhi {
				v.AddArg(v.Args[i])
			}
		}
	}

	f.invalidateCFG()

	if f.pass.debug > 2 {
		sdom = newSparseTree(f, f.Idom())
		fmt.Printf("after %s = %s\n", f.Name, sdom.treestructure(f.Entry))
	}

	return
}

// newPhiFor inserts a new Phi function into b,
// with all inputs set to v.
func newPhiFor(b *Block, v *Value) *Value {
	phiV := b.NewValue0(b.Line, OpPhi, v.Type)

	for range b.Preds {
		phiV.AddArg(v)
	}
	return phiV
}

// rewriteNewPhis updates newphis[h] to record all places where the new phi function inserted
// in block h will replace a previous definition.  Block b is the block currently being processed;
// if b has its own phi definition then it takes the place of h.
// defsForUses provides information about other definitions of the variable that are present
// (and if nil, indicates that the variable is no longer live)
func rewriteNewPhis(h, b *Block, f *Func, defsForUses []*Value, newphis map[*Block]rewrite) {
	// If b is a block with a new phi, then a new rewrite applies below it in the dominator tree.
	if _, ok := newphis[b]; ok {
		h = b
	}
	change := newphis[h]
	x := change.before
	y := change.after

	// Apply rewrites to this block
	if x != nil { // don't waste time on the common case of no definition.
		p := &change.rewrites
		for _, v := range b.Values {
			if v == y { // don't rewrite self -- phi inputs are handled below.
				continue
			}
			for i, w := range v.Args {
				if w != x {
					continue
				}
				*p = append(*p, rewriteTarget{v, i})
			}
		}

		// Rewrite appropriate inputs of phis reached in successors
		// in dominance frontier, self, and dominated.
		// If the variable def reaching uses in b is itself defined in b, then the new phi function
		// does not reach the successors of b.  (This assumes a bit about the structure of the
		// phi use-def graph, but it's true for memory and the inserted counter.)
		if dfu := defsForUses[b.ID]; dfu != nil && dfu.Block != b {
			for _, e := range b.Succs {
				s := e.b
				if sphi, ok := newphis[s]; ok { // saves time to find the phi this way.
					*p = append(*p, rewriteTarget{sphi.after, e.i})
					continue
				}
				for _, v := range s.Values {
					if v.Op == OpPhi && v.Args[e.i] == x {
						*p = append(*p, rewriteTarget{v, e.i})
						break
					}
				}
			}
		}
		newphis[h] = change
	}

	sdom := f.sdom()

	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
		rewriteNewPhis(h, c, f, defsForUses, newphis) // TODO: convert to explicit stack from recursion.
	}
}

// addDFphis creates new trivial phis that are necessary to correctly reflect (within SSA)
// a new definition for variable "x" inserted at h (usually but not necessarily a phi).
// These new phis can only occur at the dominance frontier of h; block s is in the dominance
// frontier of h if h does not strictly dominate s and if s is a successor of a block b where
// either b = h or h strictly dominates b.
// These newly created phis are themselves new definitions that may require addition of their
// own trivial phi functions in their own dominance frontier, and this is handled recursively.
func addDFphis(x *Value, h, b *Block, f *Func, defForUses []*Value, newphis map[*Block]rewrite) {
	oldv := defForUses[b.ID]
	if oldv != x { // either a new definition replacing x, or nil if it is proven that there are no uses reachable from b
		return
	}
	sdom := f.sdom()
	idom := f.Idom()
outer:
	for _, e := range b.Succs {
		s := e.b
		// check phi functions in the dominance frontier
		if sdom.isAncestor(h, s) {
			continue // h dominates s, successor of b, therefore s is not in the frontier.
		}
		if _, ok := newphis[s]; ok {
			continue // successor s of b already has a new phi function, so there is no need to add another.
		}
		if x != nil {
			for _, v := range s.Values {
				if v.Op == OpPhi && v.Args[e.i] == x {
					continue outer // successor s of b has an old phi function, so there is no need to add another.
				}
			}
		}

		old := defForUses[idom[s.ID].ID] // new phi function is correct-but-redundant, combining value "old" on all inputs.
		headerPhi := newPhiFor(s, old)
		// the new phi will replace "old" in block s and all blocks dominated by s.
		newphis[s] = rewrite{before: old, after: headerPhi} // record new phi, to have inputs labeled "old" rewritten to "headerPhi"
		addDFphis(old, s, s, f, defForUses, newphis)        // the new definition may also create new phi functions.
	}
	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
		addDFphis(x, h, c, f, defForUses, newphis) // TODO: convert to explicit stack from recursion.
	}
}

// findLastMems maps block ids to last memory-output op in a block, if any
func findLastMems(f *Func) []*Value {

	var stores []*Value
	lastMems := make([]*Value, f.NumBlocks())
	storeUse := f.newSparseSet(f.NumValues())
	defer f.retSparseSet(storeUse)
	for _, b := range f.Blocks {
		// Find all the stores in this block. Categorize their uses:
		//  storeUse contains stores which are used by a subsequent store.
		storeUse.clear()
		stores = stores[:0]
		var memPhi *Value
		for _, v := range b.Values {
			if v.Op == OpPhi {
				if v.Type.IsMemory() {
					memPhi = v
				}
				continue
			}
			if v.Type.IsMemory() {
				stores = append(stores, v)
				if v.Op == OpSelect1 {
					// Use the arg of the tuple-generating op.
					v = v.Args[0]
				}
				for _, a := range v.Args {
					if a.Block == b && a.Type.IsMemory() {
						storeUse.add(a.ID)
					}
				}
			}
		}
		if len(stores) == 0 {
			lastMems[b.ID] = memPhi
			continue
		}

		// find last store in the block
		var last *Value
		for _, v := range stores {
			if storeUse.contains(v.ID) {
				continue
			}
			if last != nil {
				b.Fatalf("two final stores - simultaneous live stores %s %s", last, v)
			}
			last = v
		}
		if last == nil {
			b.Fatalf("no last store found - cycle?")
		}
		lastMems[b.ID] = last
	}
	return lastMems
}

type backedgesState struct {
	b *Block
	i int
}

// backedges returns a slice of successor edges that are back
// edges.  For reducible loops, edge.b is the header.
func backedges(f *Func) []Edge {
	edges := []Edge{}
	mark := make([]markKind, f.NumBlocks())
	stack := []backedgesState{}

	mark[f.Entry.ID] = notExplored
	stack = append(stack, backedgesState{f.Entry, 0})

	for len(stack) > 0 {
		l := len(stack)
		x := stack[l-1]
		if x.i < len(x.b.Succs) {
			e := x.b.Succs[x.i]
			stack[l-1].i++
			s := e.b
			if mark[s.ID] == notFound {
				mark[s.ID] = notExplored
				stack = append(stack, backedgesState{s, 0})
			} else if mark[s.ID] == notExplored {
				edges = append(edges, e)
			}
		} else {
			mark[x.b.ID] = done
			stack = stack[0 : l-1]
		}
	}
	return edges
}