aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/inline/inlheur/scoring.go
blob: 3ef7c9b79af51a4a81eb158eb481fccdf514555f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package inlheur

import (
	"cmd/compile/internal/base"
	"cmd/compile/internal/ir"
	"cmd/compile/internal/pgoir"
	"cmd/compile/internal/types"
	"fmt"
	"os"
	"sort"
	"strconv"
	"strings"
)

// These constants enumerate the set of possible ways/scenarios
// in which we'll adjust the score of a given callsite.
type scoreAdjustTyp uint

// These constants capture the various ways in which the inliner's
// scoring phase can adjust a callsite score based on heuristics. They
// fall broadly into three categories:
//
// 1) adjustments based solely on the callsite context (ex: call
// appears on panic path)
//
// 2) adjustments that take into account specific interesting values
// passed at a call site (ex: passing a constant that could result in
// cprop/deadcode in the caller)
//
// 3) adjustments that take into account values returned from the call
// at a callsite (ex: call always returns the same inlinable function,
// and return value flows unmodified into an indirect call)
//
// For categories 2 and 3 above, each adjustment can have either a
// "must" version and a "may" version (but not both). Here the idea is
// that in the "must" version the value flow is unconditional: if the
// callsite executes, then the condition we're interested in (ex:
// param feeding call) is guaranteed to happen. For the "may" version,
// there may be control flow that could cause the benefit to be
// bypassed.
const (
	// Category 1 adjustments (see above)
	panicPathAdj scoreAdjustTyp = (1 << iota)
	initFuncAdj
	inLoopAdj

	// Category 2 adjustments (see above).
	passConstToIfAdj
	passConstToNestedIfAdj
	passConcreteToItfCallAdj
	passConcreteToNestedItfCallAdj
	passFuncToIndCallAdj
	passFuncToNestedIndCallAdj
	passInlinableFuncToIndCallAdj
	passInlinableFuncToNestedIndCallAdj

	// Category 3 adjustments.
	returnFeedsConstToIfAdj
	returnFeedsFuncToIndCallAdj
	returnFeedsInlinableFuncToIndCallAdj
	returnFeedsConcreteToInterfaceCallAdj

	sentinelScoreAdj // sentinel; not a real adjustment
)

// This table records the specific values we use to adjust call
// site scores in a given scenario.
// NOTE: these numbers are chosen very arbitrarily; ideally
// we will go through some sort of turning process to decide
// what value for each one produces the best performance.

var adjValues = map[scoreAdjustTyp]int{
	panicPathAdj:                          40,
	initFuncAdj:                           20,
	inLoopAdj:                             -5,
	passConstToIfAdj:                      -20,
	passConstToNestedIfAdj:                -15,
	passConcreteToItfCallAdj:              -30,
	passConcreteToNestedItfCallAdj:        -25,
	passFuncToIndCallAdj:                  -25,
	passFuncToNestedIndCallAdj:            -20,
	passInlinableFuncToIndCallAdj:         -45,
	passInlinableFuncToNestedIndCallAdj:   -40,
	returnFeedsConstToIfAdj:               -15,
	returnFeedsFuncToIndCallAdj:           -25,
	returnFeedsInlinableFuncToIndCallAdj:  -40,
	returnFeedsConcreteToInterfaceCallAdj: -25,
}

// SetupScoreAdjustments interprets the value of the -d=inlscoreadj
// debugging option, if set. The value of this flag is expected to be
// a series of "/"-separated clauses of the form adj1:value1. Example:
// -d=inlscoreadj=inLoopAdj=0/passConstToIfAdj=-99
func SetupScoreAdjustments() {
	if base.Debug.InlScoreAdj == "" {
		return
	}
	if err := parseScoreAdj(base.Debug.InlScoreAdj); err != nil {
		base.Fatalf("malformed -d=inlscoreadj argument %q: %v",
			base.Debug.InlScoreAdj, err)
	}
}

func adjStringToVal(s string) (scoreAdjustTyp, bool) {
	for adj := scoreAdjustTyp(1); adj < sentinelScoreAdj; adj <<= 1 {
		if adj.String() == s {
			return adj, true
		}
	}
	return 0, false
}

func parseScoreAdj(val string) error {
	clauses := strings.Split(val, "/")
	if len(clauses) == 0 {
		return fmt.Errorf("no clauses")
	}
	for _, clause := range clauses {
		elems := strings.Split(clause, ":")
		if len(elems) < 2 {
			return fmt.Errorf("clause %q: expected colon", clause)
		}
		if len(elems) != 2 {
			return fmt.Errorf("clause %q has %d elements, wanted 2", clause,
				len(elems))
		}
		adj, ok := adjStringToVal(elems[0])
		if !ok {
			return fmt.Errorf("clause %q: unknown adjustment", clause)
		}
		val, err := strconv.Atoi(elems[1])
		if err != nil {
			return fmt.Errorf("clause %q: malformed value: %v", clause, err)
		}
		adjValues[adj] = val
	}
	return nil
}

func adjValue(x scoreAdjustTyp) int {
	if val, ok := adjValues[x]; ok {
		return val
	} else {
		panic("internal error unregistered adjustment type")
	}
}

var mayMustAdj = [...]struct{ may, must scoreAdjustTyp }{
	{may: passConstToNestedIfAdj, must: passConstToIfAdj},
	{may: passConcreteToNestedItfCallAdj, must: passConcreteToItfCallAdj},
	{may: passFuncToNestedIndCallAdj, must: passFuncToNestedIndCallAdj},
	{may: passInlinableFuncToNestedIndCallAdj, must: passInlinableFuncToIndCallAdj},
}

func isMay(x scoreAdjustTyp) bool {
	return mayToMust(x) != 0
}

func isMust(x scoreAdjustTyp) bool {
	return mustToMay(x) != 0
}

func mayToMust(x scoreAdjustTyp) scoreAdjustTyp {
	for _, v := range mayMustAdj {
		if x == v.may {
			return v.must
		}
	}
	return 0
}

func mustToMay(x scoreAdjustTyp) scoreAdjustTyp {
	for _, v := range mayMustAdj {
		if x == v.must {
			return v.may
		}
	}
	return 0
}

// computeCallSiteScore takes a given call site whose ir node is
// 'call' and callee function is 'callee' and with previously computed
// call site properties 'csflags', then computes a score for the
// callsite that combines the size cost of the callee with heuristics
// based on previously computed argument and function properties,
// then stores the score and the adjustment mask in the appropriate
// fields in 'cs'
func (cs *CallSite) computeCallSiteScore(csa *callSiteAnalyzer, calleeProps *FuncProps) {
	callee := cs.Callee
	csflags := cs.Flags
	call := cs.Call

	// Start with the size-based score for the callee.
	score := int(callee.Inl.Cost)
	var tmask scoreAdjustTyp

	if debugTrace&debugTraceScoring != 0 {
		fmt.Fprintf(os.Stderr, "=-= scoring call to %s at %s , initial=%d\n",
			callee.Sym().Name, fmtFullPos(call.Pos()), score)
	}

	// First some score adjustments to discourage inlining in selected cases.
	if csflags&CallSiteOnPanicPath != 0 {
		score, tmask = adjustScore(panicPathAdj, score, tmask)
	}
	if csflags&CallSiteInInitFunc != 0 {
		score, tmask = adjustScore(initFuncAdj, score, tmask)
	}

	// Then adjustments to encourage inlining in selected cases.
	if csflags&CallSiteInLoop != 0 {
		score, tmask = adjustScore(inLoopAdj, score, tmask)
	}

	// Stop here if no callee props.
	if calleeProps == nil {
		cs.Score, cs.ScoreMask = score, tmask
		return
	}

	// Walk through the actual expressions being passed at the call.
	calleeRecvrParms := callee.Type().RecvParams()
	for idx := range call.Args {
		// ignore blanks
		if calleeRecvrParms[idx].Sym == nil ||
			calleeRecvrParms[idx].Sym.IsBlank() {
			continue
		}
		arg := call.Args[idx]
		pflag := calleeProps.ParamFlags[idx]
		if debugTrace&debugTraceScoring != 0 {
			fmt.Fprintf(os.Stderr, "=-= arg %d of %d: val %v flags=%s\n",
				idx, len(call.Args), arg, pflag.String())
		}

		if len(cs.ArgProps) == 0 {
			continue
		}
		argProps := cs.ArgProps[idx]

		if debugTrace&debugTraceScoring != 0 {
			fmt.Fprintf(os.Stderr, "=-= arg %d props %s value %v\n",
				idx, argProps.String(), arg)
		}

		if argProps&ActualExprConstant != 0 {
			if pflag&ParamMayFeedIfOrSwitch != 0 {
				score, tmask = adjustScore(passConstToNestedIfAdj, score, tmask)
			}
			if pflag&ParamFeedsIfOrSwitch != 0 {
				score, tmask = adjustScore(passConstToIfAdj, score, tmask)
			}
		}

		if argProps&ActualExprIsConcreteConvIface != 0 {
			// FIXME: ideally here it would be nice to make a
			// distinction between the inlinable case and the
			// non-inlinable case, but this is hard to do. Example:
			//
			//    type I interface { Tiny() int; Giant() }
			//    type Conc struct { x int }
			//    func (c *Conc) Tiny() int { return 42 }
			//    func (c *Conc) Giant() { <huge amounts of code> }
			//
			//    func passConcToItf(c *Conc) {
			//        makesItfMethodCall(c)
			//    }
			//
			// In the code above, function properties will only tell
			// us that 'makesItfMethodCall' invokes a method on its
			// interface parameter, but we don't know whether it calls
			// "Tiny" or "Giant". If we knew if called "Tiny", then in
			// theory in addition to converting the interface call to
			// a direct call, we could also inline (in which case
			// we'd want to decrease the score even more).
			//
			// One thing we could do (not yet implemented) is iterate
			// through all of the methods of "*Conc" that allow it to
			// satisfy I, and if all are inlinable, then exploit that.
			if pflag&ParamMayFeedInterfaceMethodCall != 0 {
				score, tmask = adjustScore(passConcreteToNestedItfCallAdj, score, tmask)
			}
			if pflag&ParamFeedsInterfaceMethodCall != 0 {
				score, tmask = adjustScore(passConcreteToItfCallAdj, score, tmask)
			}
		}

		if argProps&(ActualExprIsFunc|ActualExprIsInlinableFunc) != 0 {
			mayadj := passFuncToNestedIndCallAdj
			mustadj := passFuncToIndCallAdj
			if argProps&ActualExprIsInlinableFunc != 0 {
				mayadj = passInlinableFuncToNestedIndCallAdj
				mustadj = passInlinableFuncToIndCallAdj
			}
			if pflag&ParamMayFeedIndirectCall != 0 {
				score, tmask = adjustScore(mayadj, score, tmask)
			}
			if pflag&ParamFeedsIndirectCall != 0 {
				score, tmask = adjustScore(mustadj, score, tmask)
			}
		}
	}

	cs.Score, cs.ScoreMask = score, tmask
}

func adjustScore(typ scoreAdjustTyp, score int, mask scoreAdjustTyp) (int, scoreAdjustTyp) {

	if isMust(typ) {
		if mask&typ != 0 {
			return score, mask
		}
		may := mustToMay(typ)
		if mask&may != 0 {
			// promote may to must, so undo may
			score -= adjValue(may)
			mask &^= may
		}
	} else if isMay(typ) {
		must := mayToMust(typ)
		if mask&(must|typ) != 0 {
			return score, mask
		}
	}
	if mask&typ == 0 {
		if debugTrace&debugTraceScoring != 0 {
			fmt.Fprintf(os.Stderr, "=-= applying adj %d for %s\n",
				adjValue(typ), typ.String())
		}
		score += adjValue(typ)
		mask |= typ
	}
	return score, mask
}

var resultFlagToPositiveAdj map[ResultPropBits]scoreAdjustTyp
var paramFlagToPositiveAdj map[ParamPropBits]scoreAdjustTyp

func setupFlagToAdjMaps() {
	resultFlagToPositiveAdj = map[ResultPropBits]scoreAdjustTyp{
		ResultIsAllocatedMem:     returnFeedsConcreteToInterfaceCallAdj,
		ResultAlwaysSameFunc:     returnFeedsFuncToIndCallAdj,
		ResultAlwaysSameConstant: returnFeedsConstToIfAdj,
	}
	paramFlagToPositiveAdj = map[ParamPropBits]scoreAdjustTyp{
		ParamMayFeedInterfaceMethodCall: passConcreteToNestedItfCallAdj,
		ParamFeedsInterfaceMethodCall:   passConcreteToItfCallAdj,
		ParamMayFeedIndirectCall:        passInlinableFuncToNestedIndCallAdj,
		ParamFeedsIndirectCall:          passInlinableFuncToIndCallAdj,
	}
}

// LargestNegativeScoreAdjustment tries to estimate the largest possible
// negative score adjustment that could be applied to a call of the
// function with the specified props. Example:
//
//	func foo() {                  func bar(x int, p *int) int {
//	   ...                          if x < 0 { *p = x }
//	}                               return 99
//	                              }
//
// Function 'foo' above on the left has no interesting properties,
// thus as a result the most we'll adjust any call to is the value for
// "call in loop". If the calculated cost of the function is 150, and
// the in-loop adjustment is 5 (for example), then there is not much
// point treating it as inlinable. On the other hand "bar" has a param
// property (parameter "x" feeds unmodified to an "if" statement") and
// a return property (always returns same constant) meaning that a
// given call _could_ be rescored down as much as -35 points-- thus if
// the size of "bar" is 100 (for example) then there is at least a
// chance that scoring will enable inlining.
func LargestNegativeScoreAdjustment(fn *ir.Func, props *FuncProps) int {
	if resultFlagToPositiveAdj == nil {
		setupFlagToAdjMaps()
	}
	var tmask scoreAdjustTyp
	score := adjValues[inLoopAdj] // any call can be in a loop
	for _, pf := range props.ParamFlags {
		if adj, ok := paramFlagToPositiveAdj[pf]; ok {
			score, tmask = adjustScore(adj, score, tmask)
		}
	}
	for _, rf := range props.ResultFlags {
		if adj, ok := resultFlagToPositiveAdj[rf]; ok {
			score, tmask = adjustScore(adj, score, tmask)
		}
	}

	if debugTrace&debugTraceScoring != 0 {
		fmt.Fprintf(os.Stderr, "=-= largestScore(%v) is %d\n",
			fn, score)
	}

	return score
}

// LargestPositiveScoreAdjustment tries to estimate the largest possible
// positive score adjustment that could be applied to a given callsite.
// At the moment we don't have very many positive score adjustments, so
// this is just hard-coded, not table-driven.
func LargestPositiveScoreAdjustment(fn *ir.Func) int {
	return adjValues[panicPathAdj] + adjValues[initFuncAdj]
}

// callSiteTab contains entries for each call in the function
// currently being processed by InlineCalls; this variable will either
// be set to 'cstabCache' below (for non-inlinable routines) or to the
// local 'cstab' entry in the fnInlHeur object for inlinable routines.
//
// NOTE: this assumes that inlining operations are happening in a serial,
// single-threaded fashion,f which is true today but probably won't hold
// in the future (for example, we might want to score the callsites
// in multiple functions in parallel); if the inliner evolves in this
// direction we'll need to come up with a different approach here.
var callSiteTab CallSiteTab

// scoreCallsCache caches a call site table and call site list between
// invocations of ScoreCalls so that we can reuse previously allocated
// storage.
var scoreCallsCache scoreCallsCacheType

type scoreCallsCacheType struct {
	tab CallSiteTab
	csl []*CallSite
}

// ScoreCalls assigns numeric scores to each of the callsites in
// function 'fn'; the lower the score, the more helpful we think it
// will be to inline.
//
// Unlike a lot of the other inline heuristics machinery, callsite
// scoring can't be done as part of the CanInline call for a function,
// due to fact that we may be working on a non-trivial SCC. So for
// example with this SCC:
//
//	func foo(x int) {           func bar(x int, f func()) {
//	  if x != 0 {                  f()
//	    bar(x, func(){})           foo(x-1)
//	  }                         }
//	}
//
// We don't want to perform scoring for the 'foo' call in "bar" until
// after foo has been analyzed, but it's conceivable that CanInline
// might visit bar before foo for this SCC.
func ScoreCalls(fn *ir.Func) {
	if len(fn.Body) == 0 {
		return
	}
	enableDebugTraceIfEnv()

	nameFinder := newNameFinder(fn)

	if debugTrace&debugTraceScoring != 0 {
		fmt.Fprintf(os.Stderr, "=-= ScoreCalls(%v)\n", ir.FuncName(fn))
	}

	// If this is an inlinable function, use the precomputed
	// call site table for it. If the function wasn't an inline
	// candidate, collect a callsite table for it now.
	var cstab CallSiteTab
	if funcInlHeur, ok := fpmap[fn]; ok {
		cstab = funcInlHeur.cstab
	} else {
		if len(scoreCallsCache.tab) != 0 {
			panic("missing call to ScoreCallsCleanup")
		}
		if scoreCallsCache.tab == nil {
			scoreCallsCache.tab = make(CallSiteTab)
		}
		if debugTrace&debugTraceScoring != 0 {
			fmt.Fprintf(os.Stderr, "=-= building cstab for non-inl func %s\n",
				ir.FuncName(fn))
		}
		cstab = computeCallSiteTable(fn, fn.Body, scoreCallsCache.tab, nil, 0,
			nameFinder)
	}

	csa := makeCallSiteAnalyzer(fn)
	const doCallResults = true
	csa.scoreCallsRegion(fn, fn.Body, cstab, doCallResults, nil)

	disableDebugTrace()
}

// scoreCallsRegion assigns numeric scores to each of the callsites in
// region 'region' within function 'fn'. This can be called on
// an entire function, or with 'region' set to a chunk of
// code corresponding to an inlined call.
func (csa *callSiteAnalyzer) scoreCallsRegion(fn *ir.Func, region ir.Nodes, cstab CallSiteTab, doCallResults bool, ic *ir.InlinedCallExpr) {
	if debugTrace&debugTraceScoring != 0 {
		fmt.Fprintf(os.Stderr, "=-= scoreCallsRegion(%v, %s) len(cstab)=%d\n",
			ir.FuncName(fn), region[0].Op().String(), len(cstab))
	}

	// Sort callsites to avoid any surprises with non deterministic
	// map iteration order (this is probably not needed, but here just
	// in case).
	csl := scoreCallsCache.csl[:0]
	for _, cs := range cstab {
		csl = append(csl, cs)
	}
	scoreCallsCache.csl = csl[:0]
	sort.Slice(csl, func(i, j int) bool {
		return csl[i].ID < csl[j].ID
	})

	// Score each call site.
	var resultNameTab map[*ir.Name]resultPropAndCS
	for _, cs := range csl {
		var cprops *FuncProps
		fihcprops := false
		desercprops := false
		if funcInlHeur, ok := fpmap[cs.Callee]; ok {
			cprops = funcInlHeur.props
			fihcprops = true
		} else if cs.Callee.Inl != nil {
			cprops = DeserializeFromString(cs.Callee.Inl.Properties)
			desercprops = true
		} else {
			if base.Debug.DumpInlFuncProps != "" {
				fmt.Fprintf(os.Stderr, "=-= *** unable to score call to %s from %s\n", cs.Callee.Sym().Name, fmtFullPos(cs.Call.Pos()))
				panic("should never happen")
			} else {
				continue
			}
		}
		cs.computeCallSiteScore(csa, cprops)

		if doCallResults {
			if debugTrace&debugTraceScoring != 0 {
				fmt.Fprintf(os.Stderr, "=-= examineCallResults at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops)
			}
			resultNameTab = csa.examineCallResults(cs, resultNameTab)
		}

		if debugTrace&debugTraceScoring != 0 {
			fmt.Fprintf(os.Stderr, "=-= scoring call at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops)
		}
	}

	if resultNameTab != nil {
		csa.rescoreBasedOnCallResultUses(fn, resultNameTab, cstab)
	}

	disableDebugTrace()

	if ic != nil && callSiteTab != nil {
		// Integrate the calls from this cstab into the table for the caller.
		if err := callSiteTab.merge(cstab); err != nil {
			base.FatalfAt(ic.Pos(), "%v", err)
		}
	} else {
		callSiteTab = cstab
	}
}

// ScoreCallsCleanup resets the state of the callsite cache
// once ScoreCalls is done with a function.
func ScoreCallsCleanup() {
	if base.Debug.DumpInlCallSiteScores != 0 {
		if allCallSites == nil {
			allCallSites = make(CallSiteTab)
		}
		for call, cs := range callSiteTab {
			allCallSites[call] = cs
		}
	}
	for k := range scoreCallsCache.tab {
		delete(scoreCallsCache.tab, k)
	}
}

// GetCallSiteScore returns the previously calculated score for call
// within fn.
func GetCallSiteScore(fn *ir.Func, call *ir.CallExpr) (int, bool) {
	if funcInlHeur, ok := fpmap[fn]; ok {
		if cs, ok := funcInlHeur.cstab[call]; ok {
			return cs.Score, true
		}
	}
	if cs, ok := callSiteTab[call]; ok {
		return cs.Score, true
	}
	return 0, false
}

// BudgetExpansion returns the amount to relax/expand the base
// inlining budget when the new inliner is turned on; the inliner
// will add the returned value to the hairiness budget.
//
// Background: with the new inliner, the score for a given callsite
// can be adjusted down by some amount due to heuristics, however we
// won't know whether this is going to happen until much later after
// the CanInline call. This function returns the amount to relax the
// budget initially (to allow for a large score adjustment); later on
// in RevisitInlinability we'll look at each individual function to
// demote it if needed.
func BudgetExpansion(maxBudget int32) int32 {
	if base.Debug.InlBudgetSlack != 0 {
		return int32(base.Debug.InlBudgetSlack)
	}
	// In the default case, return maxBudget, which will effectively
	// double the budget from 80 to 160; this should be good enough
	// for most cases.
	return maxBudget
}

var allCallSites CallSiteTab

// DumpInlCallSiteScores is invoked by the inliner if the debug flag
// "-d=dumpinlcallsitescores" is set; it dumps out a human-readable
// summary of all (potentially) inlinable callsites in the package,
// along with info on call site scoring and the adjustments made to a
// given score. Here profile is the PGO profile in use (may be
// nil), budgetCallback is a callback that can be invoked to find out
// the original pre-adjustment hairiness limit for the function, and
// inlineHotMaxBudget is the constant of the same name used in the
// inliner. Sample output lines:
//
// Score  Adjustment  Status  Callee  CallerPos ScoreFlags
// 115    40          DEMOTED cmd/compile/internal/abi.(*ABIParamAssignment).Offset     expand_calls.go:1679:14|6       panicPathAdj
// 76     -5n         PROMOTED runtime.persistentalloc   mcheckmark.go:48:45|3   inLoopAdj
// 201    0           --- PGO  unicode.DecodeRuneInString        utf8.go:312:30|1
// 7      -5          --- PGO  internal/abi.Name.DataChecked     type.go:625:22|0        inLoopAdj
//
// In the dump above, "Score" is the final score calculated for the
// callsite, "Adjustment" is the amount added to or subtracted from
// the original hairiness estimate to form the score. "Status" shows
// whether anything changed with the site -- did the adjustment bump
// it down just below the threshold ("PROMOTED") or instead bump it
// above the threshold ("DEMOTED"); this will be blank ("---") if no
// threshold was crossed as a result of the heuristics. Note that
// "Status" also shows whether PGO was involved. "Callee" is the name
// of the function called, "CallerPos" is the position of the
// callsite, and "ScoreFlags" is a digest of the specific properties
// we used to make adjustments to callsite score via heuristics.
func DumpInlCallSiteScores(profile *pgoir.Profile, budgetCallback func(fn *ir.Func, profile *pgoir.Profile) (int32, bool)) {

	var indirectlyDueToPromotion func(cs *CallSite) bool
	indirectlyDueToPromotion = func(cs *CallSite) bool {
		bud, _ := budgetCallback(cs.Callee, profile)
		hairyval := cs.Callee.Inl.Cost
		score := int32(cs.Score)
		if hairyval > bud && score <= bud {
			return true
		}
		if cs.parent != nil {
			return indirectlyDueToPromotion(cs.parent)
		}
		return false
	}

	genstatus := func(cs *CallSite) string {
		hairyval := cs.Callee.Inl.Cost
		bud, isPGO := budgetCallback(cs.Callee, profile)
		score := int32(cs.Score)
		st := "---"
		expinl := false
		switch {
		case hairyval <= bud && score <= bud:
			// "Normal" inlined case: hairy val sufficiently low that
			// it would have been inlined anyway without heuristics.
			expinl = true
		case hairyval > bud && score > bud:
			// "Normal" not inlined case: hairy val sufficiently high
			// and scoring didn't lower it.
		case hairyval > bud && score <= bud:
			// Promoted: we would not have inlined it before, but
			// after score adjustment we decided to inline.
			st = "PROMOTED"
			expinl = true
		case hairyval <= bud && score > bud:
			// Demoted: we would have inlined it before, but after
			// score adjustment we decided not to inline.
			st = "DEMOTED"
		}
		inlined := cs.aux&csAuxInlined != 0
		indprom := false
		if cs.parent != nil {
			indprom = indirectlyDueToPromotion(cs.parent)
		}
		if inlined && indprom {
			st += "|INDPROM"
		}
		if inlined && !expinl {
			st += "|[NI?]"
		} else if !inlined && expinl {
			st += "|[IN?]"
		}
		if isPGO {
			st += "|PGO"
		}
		return st
	}

	if base.Debug.DumpInlCallSiteScores != 0 {
		var sl []*CallSite
		for _, cs := range allCallSites {
			sl = append(sl, cs)
		}
		sort.Slice(sl, func(i, j int) bool {
			if sl[i].Score != sl[j].Score {
				return sl[i].Score < sl[j].Score
			}
			fni := ir.PkgFuncName(sl[i].Callee)
			fnj := ir.PkgFuncName(sl[j].Callee)
			if fni != fnj {
				return fni < fnj
			}
			ecsi := EncodeCallSiteKey(sl[i])
			ecsj := EncodeCallSiteKey(sl[j])
			return ecsi < ecsj
		})

		mkname := func(fn *ir.Func) string {
			var n string
			if fn == nil || fn.Nname == nil {
				return "<nil>"
			}
			if fn.Sym().Pkg == types.LocalPkg {
				n = "·" + fn.Sym().Name
			} else {
				n = ir.PkgFuncName(fn)
			}
			// don't try to print super-long names
			if len(n) <= 64 {
				return n
			}
			return n[:32] + "..." + n[len(n)-32:len(n)]
		}

		if len(sl) != 0 {
			fmt.Fprintf(os.Stdout, "# scores for package %s\n", types.LocalPkg.Path)
			fmt.Fprintf(os.Stdout, "# Score  Adjustment  Status  Callee  CallerPos Flags ScoreFlags\n")
		}
		for _, cs := range sl {
			hairyval := cs.Callee.Inl.Cost
			adj := int32(cs.Score) - hairyval
			nm := mkname(cs.Callee)
			ecc := EncodeCallSiteKey(cs)
			fmt.Fprintf(os.Stdout, "%d  %d\t%s\t%s\t%s\t%s\n",
				cs.Score, adj, genstatus(cs),
				nm, ecc,
				cs.ScoreMask.String())
		}
	}
}