aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThan McIntosh <thanm@google.com>2023-12-28 14:26:34 +0000
committerThan McIntosh <thanm@google.com>2024-03-29 23:09:29 +0000
commit89f7805c2e1ec3a1f708957ca8f43b04f3f2834f (patch)
tree45654419d3718cddbd87cb6bbb9c7e569f0a5f16
parent754f870381ef5e2c60c0edd4f902e7063ffb4452 (diff)
downloadgo-89f7805c2e1ec3a1f708957ca8f43b04f3f2834f.tar.gz
go-89f7805c2e1ec3a1f708957ca8f43b04f3f2834f.zip
cmd/compile/internal: merge stack slots for selected local auto vars
Preliminary compiler support for merging/overlapping stack slots of local variables whose access patterns are disjoint. This patch includes changes in AllocFrame to do the actual merging/overlapping based on information returned from a new liveness.MergeLocals helper. The MergeLocals helper identifies candidates by looking for sets of AUTO variables that either A) have the same size and GC shape (if types contain pointers), or B) have the same size (but potentially different types as long as those types have no pointers). Variables must be greater than (3*types.PtrSize) in size to be considered for merging. After forming candidates, MergeLocals collects variables into "can be overlapped" equivalence classes or partitions; this process is driven by an additional liveness analysis pass. Ideally it would be nice to move the existing stackmap liveness pass up before AllocFrame and "widen" it to include merge candidates so that we can do just a single liveness as opposed to two passes, however this may be difficult given that the merge-locals liveness has to take into account writes corresponding to dead stores. This patch also required a change to the way ssa.OpVarDef pseudo-ops are generated; prior to this point they would only be created for variables whose type included pointers; if stack slot merging is enabled then the ssagen code creates OpVarDef ops for all auto vars that are merge candidates. Note that some temporaries created late in the compilation process (e.g. during ssa backend) are difficult to reason about, especially in cases where we take the address of a temp and pass it to the runtime. For the time being we mark most of the vars created post-ssagen as "not a merge candidate". Stack slot merging for locals/autos is enabled by default if "-N" is not in effect, and can be disabled via "-gcflags=-d=mergelocals=0". Fixmes/todos/restrictions: - try lowering size restrictions - re-evaluate the various skips that happen in SSA-created autotmps Fixes #62737. Updates #65532. Updates #65495. Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest Change-Id: Ibc22e8a76c87e47bc9fafe4959804d9ea923623d Reviewed-on: https://go-review.googlesource.com/c/go/+/553055 Reviewed-by: Cherry Mui <cherryyz@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
-rw-r--r--src/cmd/compile/internal/base/debug.go4
-rw-r--r--src/cmd/compile/internal/base/flag.go4
-rw-r--r--src/cmd/compile/internal/base/hashdebug.go7
-rw-r--r--src/cmd/compile/internal/ir/name.go3
-rw-r--r--src/cmd/compile/internal/liveness/mergelocals.go691
-rw-r--r--src/cmd/compile/internal/liveness/plive.go24
-rw-r--r--src/cmd/compile/internal/ssa/check.go5
-rw-r--r--src/cmd/compile/internal/ssa/func.go22
-rw-r--r--src/cmd/compile/internal/ssagen/pgen.go69
-rw-r--r--src/cmd/compile/internal/ssagen/ssa.go9
-rw-r--r--src/cmd/compile/internal/test/mergelocals_test.go184
-rw-r--r--src/cmd/compile/internal/test/testdata/mergelocals/integration.go83
-rw-r--r--src/cmd/compile/internal/walk/temp.go4
-rw-r--r--test/fixedbugs/bug385_64.go625
14 files changed, 1512 insertions, 222 deletions
diff --git a/src/cmd/compile/internal/base/debug.go b/src/cmd/compile/internal/base/debug.go
index 420ad1305e..08ccef3065 100644
--- a/src/cmd/compile/internal/base/debug.go
+++ b/src/cmd/compile/internal/base/debug.go
@@ -41,6 +41,10 @@ type DebugFlags struct {
LoopVarHash string `help:"for debugging changes in loop behavior. Overrides experiment and loopvar flag."`
LocationLists int `help:"print information about DWARF location list creation"`
MaxShapeLen int `help:"hash shape names longer than this threshold (default 500)" concurrent:"ok"`
+ MergeLocals int `help:"merge together non-interfering local stack slots" concurrent:"ok"`
+ MergeLocalsDumpFunc string `help:"dump specified func in merge locals"`
+ MergeLocalsHash string `help:"hash value for debugging stack slot merging of local variables" concurrent:"ok"`
+ MergeLocalsTrace int `help:"trace debug output for locals merging"`
Nil int `help:"print information about nil checks"`
NoOpenDefer int `help:"disable open-coded defers" concurrent:"ok"`
NoRefName int `help:"do not include referenced symbol names in object file" concurrent:"ok"`
diff --git a/src/cmd/compile/internal/base/flag.go b/src/cmd/compile/internal/base/flag.go
index 5b3c3ad8c6..1ee3337088 100644
--- a/src/cmd/compile/internal/base/flag.go
+++ b/src/cmd/compile/internal/base/flag.go
@@ -184,6 +184,7 @@ func ParseFlags() {
Debug.SyncFrames = -1 // disable sync markers by default
Debug.ZeroCopy = 1
Debug.RangeFuncCheck = 1
+ Debug.MergeLocals = 1
Debug.Checkptr = -1 // so we can tell whether it is set explicitly
@@ -260,6 +261,9 @@ func ParseFlags() {
if Debug.PGOHash != "" {
PGOHash = NewHashDebug("pgohash", Debug.PGOHash, nil)
}
+ if Debug.MergeLocalsHash != "" {
+ MergeLocalsHash = NewHashDebug("mergelocals", Debug.MergeLocalsHash, nil)
+ }
if Flag.MSan && !platform.MSanSupported(buildcfg.GOOS, buildcfg.GOARCH) {
log.Fatalf("%s/%s does not support -msan", buildcfg.GOOS, buildcfg.GOARCH)
diff --git a/src/cmd/compile/internal/base/hashdebug.go b/src/cmd/compile/internal/base/hashdebug.go
index 4e36c8d549..7a5cc42578 100644
--- a/src/cmd/compile/internal/base/hashdebug.go
+++ b/src/cmd/compile/internal/base/hashdebug.go
@@ -53,9 +53,10 @@ func (d *HashDebug) SetInlineSuffixOnly(b bool) *HashDebug {
// The default compiler-debugging HashDebug, for "-d=gossahash=..."
var hashDebug *HashDebug
-var FmaHash *HashDebug // for debugging fused-multiply-add floating point changes
-var LoopVarHash *HashDebug // for debugging shared/private loop variable changes
-var PGOHash *HashDebug // for debugging PGO optimization decisions
+var FmaHash *HashDebug // for debugging fused-multiply-add floating point changes
+var LoopVarHash *HashDebug // for debugging shared/private loop variable changes
+var PGOHash *HashDebug // for debugging PGO optimization decisions
+var MergeLocalsHash *HashDebug // for debugging local stack slot merging changes
// DebugHashMatchPkgFunc reports whether debug variable Gossahash
//
diff --git a/src/cmd/compile/internal/ir/name.go b/src/cmd/compile/internal/ir/name.go
index 758158651e..1ce6e43d0b 100644
--- a/src/cmd/compile/internal/ir/name.go
+++ b/src/cmd/compile/internal/ir/name.go
@@ -194,6 +194,7 @@ const (
nameLibfuzzer8BitCounter // if PEXTERN should be assigned to __sancov_cntrs section
nameCoverageAuxVar // instrumentation counter var or pkg ID for cmd/cover
nameAlias // is type name an alias
+ nameNonMergeable // not a candidate for stack slot merging
)
func (n *Name) Readonly() bool { return n.flags&nameReadonly != 0 }
@@ -209,6 +210,7 @@ func (n *Name) InlLocal() bool { return n.flags&nameInlLocal !=
func (n *Name) OpenDeferSlot() bool { return n.flags&nameOpenDeferSlot != 0 }
func (n *Name) Libfuzzer8BitCounter() bool { return n.flags&nameLibfuzzer8BitCounter != 0 }
func (n *Name) CoverageAuxVar() bool { return n.flags&nameCoverageAuxVar != 0 }
+func (n *Name) NonMergeable() bool { return n.flags&nameNonMergeable != 0 }
func (n *Name) setReadonly(b bool) { n.flags.set(nameReadonly, b) }
func (n *Name) SetNeedzero(b bool) { n.flags.set(nameNeedzero, b) }
@@ -223,6 +225,7 @@ func (n *Name) SetInlLocal(b bool) { n.flags.set(nameInlLocal, b
func (n *Name) SetOpenDeferSlot(b bool) { n.flags.set(nameOpenDeferSlot, b) }
func (n *Name) SetLibfuzzer8BitCounter(b bool) { n.flags.set(nameLibfuzzer8BitCounter, b) }
func (n *Name) SetCoverageAuxVar(b bool) { n.flags.set(nameCoverageAuxVar, b) }
+func (n *Name) SetNonMergeable(b bool) { n.flags.set(nameNonMergeable, b) }
// OnStack reports whether variable n may reside on the stack.
func (n *Name) OnStack() bool {
diff --git a/src/cmd/compile/internal/liveness/mergelocals.go b/src/cmd/compile/internal/liveness/mergelocals.go
new file mode 100644
index 0000000000..a1342efce6
--- /dev/null
+++ b/src/cmd/compile/internal/liveness/mergelocals.go
@@ -0,0 +1,691 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package liveness
+
+import (
+ "cmd/compile/internal/base"
+ "cmd/compile/internal/bitvec"
+ "cmd/compile/internal/ir"
+ "cmd/compile/internal/reflectdata"
+ "cmd/compile/internal/ssa"
+ "cmd/internal/obj"
+ "cmd/internal/src"
+ "fmt"
+ "os"
+ "path/filepath"
+ "sort"
+ "strings"
+)
+
+// MergeLocalsState encapsulates information about which AUTO
+// (stack-allocated) variables within a function can be safely
+// merged/overlapped, e.g. share a stack slot with some other auto).
+// An instance of MergeLocalsState is produced by MergeLocals() below
+// and then consumed in ssagen.AllocFrame. The map 'partition' contains
+// entries of the form <N,SL> where N is an *ir.Name and SL is a slice
+// holding the indices (within 'vars') of other variables that share the
+// same slot. For example, if a function contains five variables where
+// v1/v2/v3 are safe to overlap and v4/v5 are safe to overlap, the
+// MergeLocalsState content might look like
+//
+// vars: [v1, v2, v3, v4, v5]
+// partition: v1 -> [1, 0, 2], v2 -> [1, 0, 2], v3 -> [1, 0, 2]
+// v4 -> [3, 4], v5 -> [3, 4]
+//
+// A nil MergeLocalsState indicates that no local variables meet the
+// necessary criteria for overlap.
+type MergeLocalsState struct {
+ // contains auto vars that participate in overlapping
+ vars []*ir.Name
+ // maps auto variable to overlap partition
+ partition map[*ir.Name][]int
+}
+
+// candRegion is a sub-range (start, end) corresponding to an interval
+// [st,en] within the list of candidate variables.
+type candRegion struct {
+ st, en int
+}
+
+// MergeLocals analyzes the specified ssa function f to determine which
+// of its auto variables can safely share the same stack slot, returning
+// a state object that describes how the overlap should be done.
+func MergeLocals(fn *ir.Func, f *ssa.Func) *MergeLocalsState {
+ cands, idx, regions := collectMergeCandidates(fn)
+ if len(regions) == 0 {
+ return nil
+ }
+ lv := newliveness(fn, f, cands, idx, 0)
+
+ // If we have a local variable such as "r2" below that's written
+ // but then not read, something like:
+ //
+ // vardef r1
+ // r1.x = ...
+ // vardef r2
+ // r2.x = 0
+ // r2.y = ...
+ // <call foo>
+ // // no subsequent use of r2
+ // ... = r1.x
+ //
+ // then for the purpose of calculating stack maps at the call, we
+ // can ignore "r2" completely during liveness analysis for stack
+ // maps, however for stack slock merging we most definitely want
+ // to treat the writes as "uses".
+ lv.conservativeWrites = true
+
+ lv.prologue()
+ lv.solve()
+ cs := &cstate{
+ fn: fn,
+ ibuilders: make([]IntervalsBuilder, len(cands)),
+ }
+ computeIntervals(lv, cs)
+ rv := performMerging(lv, cs, regions)
+ if err := rv.check(); err != nil {
+ base.FatalfAt(fn.Pos(), "invalid mergelocals state: %v", err)
+ }
+ return rv
+}
+
+// Subsumed returns whether variable n is subsumed, e.g. appears
+// in an overlap position but is not the leader in that partition.
+func (mls *MergeLocalsState) Subsumed(n *ir.Name) bool {
+ if sl, ok := mls.partition[n]; ok && mls.vars[sl[0]] != n {
+ return true
+ }
+ return false
+}
+
+// IsLeader returns whether a variable n is the leader (first element)
+// in a sharing partition.
+func (mls *MergeLocalsState) IsLeader(n *ir.Name) bool {
+ if sl, ok := mls.partition[n]; ok && mls.vars[sl[0]] == n {
+ return true
+ }
+ return false
+}
+
+// Leader returns the leader variable for subsumed var n.
+func (mls *MergeLocalsState) Leader(n *ir.Name) *ir.Name {
+ if sl, ok := mls.partition[n]; ok {
+ if mls.vars[sl[0]] == n {
+ panic("variable is not subsumed")
+ }
+ return mls.vars[sl[0]]
+ }
+ panic("not a merge candidate")
+}
+
+// Followers writes a list of the followers for leader n into the slice tmp.
+func (mls *MergeLocalsState) Followers(n *ir.Name, tmp []*ir.Name) []*ir.Name {
+ tmp = tmp[:0]
+ sl, ok := mls.partition[n]
+ if !ok {
+ panic("no entry for leader")
+ }
+ if mls.vars[sl[0]] != n {
+ panic("followers invoked on subsumed var")
+ }
+ for _, k := range sl[1:] {
+ tmp = append(tmp, mls.vars[k])
+ }
+ sort.SliceStable(tmp, func(i, j int) bool {
+ return tmp[i].Sym().Name < tmp[j].Sym().Name
+ })
+ return tmp
+}
+
+// EstSavings returns the estimated reduction in stack size for
+// the given merge locals state.
+func (mls *MergeLocalsState) EstSavings() int {
+ tot := 0
+ for n := range mls.partition {
+ if mls.Subsumed(n) {
+ tot += int(n.Type().Size())
+ }
+ }
+ return tot
+}
+
+// check tests for various inconsistencies and problems in mls,
+// returning an error if any problems are found.
+func (mls *MergeLocalsState) check() error {
+ if mls == nil {
+ return nil
+ }
+ used := make(map[int]bool)
+ seenv := make(map[*ir.Name]int)
+ for ii, v := range mls.vars {
+ if prev, ok := seenv[v]; ok {
+ return fmt.Errorf("duplicate var %q in vslots: %d and %d\n",
+ v.Sym().Name, ii, prev)
+ }
+ seenv[v] = ii
+ }
+ for k, sl := range mls.partition {
+ // length of slice value needs to be more than 1
+ if len(sl) < 2 {
+ return fmt.Errorf("k=%q v=%+v slice len %d invalid",
+ k.Sym().Name, sl, len(sl))
+ }
+ // values in the slice need to be var indices
+ for i, v := range sl {
+ if v < 0 || v > len(mls.vars)-1 {
+ return fmt.Errorf("k=%q v=+%v slpos %d vslot %d out of range of m.v", k.Sym().Name, sl, i, v)
+ }
+ }
+ }
+ for k, sl := range mls.partition {
+ foundk := false
+ for i, v := range sl {
+ vv := mls.vars[v]
+ if i == 0 {
+ if !mls.IsLeader(vv) {
+ return fmt.Errorf("k=%s v=+%v slpos 0 vslot %d IsLeader(%q) is false should be true", k.Sym().Name, sl, v, vv.Sym().Name)
+ }
+ } else {
+ if !mls.Subsumed(vv) {
+ return fmt.Errorf("k=%s v=+%v slpos %d vslot %d Subsumed(%q) is false should be true", k.Sym().Name, sl, i, v, vv.Sym().Name)
+ }
+ if mls.Leader(vv) != mls.vars[sl[0]] {
+ return fmt.Errorf("k=%s v=+%v slpos %d vslot %d Leader(%q) got %v want %v", k.Sym().Name, sl, i, v, vv.Sym().Name, mls.Leader(vv), mls.vars[sl[0]])
+ }
+ }
+ if vv == k {
+ foundk = true
+ if used[v] {
+ return fmt.Errorf("k=%s v=+%v val slice used violation at slpos %d vslot %d", k.Sym().Name, sl, i, v)
+ }
+ used[v] = true
+ }
+ }
+ if !foundk {
+ return fmt.Errorf("k=%s v=+%v slice value missing k", k.Sym().Name, sl)
+ }
+ }
+ for i := range used {
+ if !used[i] {
+ return fmt.Errorf("pos %d var %q unused", i, mls.vars[i])
+ }
+ }
+ return nil
+}
+
+func (mls *MergeLocalsState) String() string {
+ var leaders []*ir.Name
+ for n, sl := range mls.partition {
+ if n == mls.vars[sl[0]] {
+ leaders = append(leaders, n)
+ }
+ }
+ sort.Slice(leaders, func(i, j int) bool {
+ return leaders[i].Sym().Name < leaders[j].Sym().Name
+ })
+ var sb strings.Builder
+ for _, n := range leaders {
+ sb.WriteString(n.Sym().Name + ":")
+ sl := mls.partition[n]
+ for _, k := range sl[1:] {
+ n := mls.vars[k]
+ sb.WriteString(" " + n.Sym().Name)
+ }
+ sb.WriteString("\n")
+ }
+ return sb.String()
+}
+
+// collectMergeCandidates visits all of the AUTO vars declared in
+// function fn and returns a list of candidate variables for merging /
+// overlapping. Return values are: 1) a slice of ir.Name's
+// corresponding to the candidates, 2) a map that maps ir.Name to slot
+// in the slice, and 3) a slice containing regions (start/end pairs)
+// corresponding to variables that could be overlapped provided that
+// their lifetimes are disjoint.
+func collectMergeCandidates(fn *ir.Func) ([]*ir.Name, map[*ir.Name]int32, []candRegion) {
+ m := make(map[*ir.Name]int32)
+ var cands []*ir.Name
+ var regions []candRegion
+
+ // Collect up the available set of appropriate AUTOs in the
+ // function as a first step.
+ for _, n := range fn.Dcl {
+ if !n.Used() {
+ continue
+ }
+ if !ssa.IsMergeCandidate(n) {
+ continue
+ }
+ cands = append(cands, n)
+ }
+ if len(cands) < 2 {
+ return nil, nil, nil
+ }
+
+ // Sort by pointerness, size, and then name.
+ sort.SliceStable(cands, func(i, j int) bool {
+ ci, cj := cands[i], cands[j]
+ ihp, jhp := 0, 0
+ var ilsym, jlsym *obj.LSym
+ if ci.Type().HasPointers() {
+ ihp = 1
+ ilsym, _, _ = reflectdata.GCSym(ci.Type())
+ }
+ if cj.Type().HasPointers() {
+ jhp = 1
+ jlsym, _, _ = reflectdata.GCSym(cj.Type())
+ }
+ if ihp != jhp {
+ return ihp < jhp
+ }
+ if ci.Type().Size() != cj.Type().Size() {
+ return ci.Type().Size() < cj.Type().Size()
+ }
+ if ihp != 0 && jhp != 0 && ilsym != jlsym {
+ // FIXME: find less clunky way to do this
+ return fmt.Sprintf("%v", ilsym) < fmt.Sprintf("%v", jlsym)
+ }
+ if ci.Sym().Name != cj.Sym().Name {
+ return ci.Sym().Name < cj.Sym().Name
+ }
+ return fmt.Sprintf("%v", ci.Pos()) < fmt.Sprintf("%v", ci.Pos())
+ })
+
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= raw cand list for func %v:\n", fn)
+ for i := range cands {
+ dumpCand(cands[i], i)
+ }
+ }
+
+ // Now generate a pruned candidate list-- we only want to return a
+ // non-empty list if there is some possibility of overlapping two
+ // vars.
+ var pruned []*ir.Name
+ st := 0
+ for {
+ en := nextRegion(cands, st)
+ if en == -1 {
+ break
+ }
+ if st == en {
+ // region has just one element, we can skip it
+ st++
+ continue
+ }
+ pst := len(pruned)
+ pen := pst + (en - st)
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= add part %d -> %d\n", pst, pen)
+ }
+
+ // non-empty region, add to pruned
+ pruned = append(pruned, cands[st:en+1]...)
+ regions = append(regions, candRegion{st: pst, en: pen})
+ st = en + 1
+ }
+ if len(pruned) < 2 {
+ return nil, nil, nil
+ }
+ for i, n := range pruned {
+ m[n] = int32(i)
+ }
+
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= pruned candidate list for func %v:\n", fn)
+ for i := range pruned {
+ dumpCand(pruned[i], i)
+ }
+ }
+ return pruned, m, regions
+}
+
+// nextRegion starts at location idx and walks forward in the cands
+// slice looking for variables that are "compatible" (overlappable)
+// with the variable at position idx; it returns the end of the new
+// region (range of compatible variables starting at idx).
+func nextRegion(cands []*ir.Name, idx int) int {
+ n := len(cands)
+ if idx >= n {
+ return -1
+ }
+ c0 := cands[idx]
+ hp0 := c0.Type().HasPointers()
+ for j := idx + 1; j < n; j++ {
+ cj := cands[j]
+ hpj := cj.Type().HasPointers()
+ ok := true
+ if hp0 {
+ if !hpj || c0.Type().Size() != cj.Type().Size() {
+ return j - 1
+ }
+ // GC shape must match if both types have pointers.
+ gcsym0, _, _ := reflectdata.GCSym(c0.Type())
+ gcsymj, _, _ := reflectdata.GCSym(cj.Type())
+ if gcsym0 != gcsymj {
+ return j - 1
+ }
+ } else {
+ // If no pointers, match size only.
+ if !ok || hp0 != hpj || c0.Type().Size() != cj.Type().Size() {
+ return j - 1
+ }
+ }
+ }
+ return n - 1
+}
+
+type cstate struct {
+ fn *ir.Func
+ ibuilders []IntervalsBuilder
+}
+
+// mergeVisitRegion tries to perform overlapping of variables with a
+// given subrange of cands described by st and en (indices into our
+// candidate var list), where the variables within this range have
+// already been determined to be compatible with respect to type,
+// size, etc. Overlapping is done in a a greedy fashion: we select the
+// first element in the st->en range, then walk the rest of the
+// elements adding in vars whose lifetimes don't overlap with the
+// first element, then repeat the process until we run out of work to do.
+func (mls *MergeLocalsState) mergeVisitRegion(lv *liveness, ivs []Intervals, st, en int) {
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= mergeVisitRegion(st=%d, en=%d)\n", st, en)
+ }
+ n := en - st + 1
+ used := bitvec.New(int32(n))
+
+ nxt := func(slot int) int {
+ for c := slot - st; c < n; c++ {
+ if used.Get(int32(c)) {
+ continue
+ }
+ return c + st
+ }
+ return -1
+ }
+
+ navail := n
+ cands := lv.vars
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, " =-= navail = %d\n", navail)
+ }
+ for navail >= 2 {
+ leader := nxt(st)
+ used.Set(int32(leader - st))
+ navail--
+
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, " =-= begin leader %d used=%s\n", leader,
+ used.String())
+ }
+ elems := []int{leader}
+ lints := ivs[leader]
+
+ for succ := nxt(leader + 1); succ != -1; succ = nxt(succ + 1) {
+
+ // Skip if de-selected by merge locals hash.
+ if base.Debug.MergeLocalsHash != "" {
+ if !base.MergeLocalsHash.MatchPosWithInfo(cands[succ].Pos(), "mergelocals", nil) {
+ continue
+ }
+ }
+ // Skip if already used.
+ if used.Get(int32(succ - st)) {
+ continue
+ }
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, " =-= overlap of %d[%v] {%s} with %d[%v] {%s} is: %v\n", leader, cands[leader], lints.String(), succ, cands[succ], ivs[succ].String(), lints.Overlaps(ivs[succ]))
+ }
+
+ // Can we overlap leader with this var?
+ if lints.Overlaps(ivs[succ]) {
+ continue
+ } else {
+ // Add to overlap set.
+ elems = append(elems, succ)
+ lints = lints.Merge(ivs[succ])
+ }
+ }
+ if len(elems) > 1 {
+ // We found some things to overlap with leader. Add the
+ // candidate elements to "vars" and update "partition".
+ off := len(mls.vars)
+ sl := make([]int, len(elems))
+ for i, candslot := range elems {
+ sl[i] = off + i
+ mls.vars = append(mls.vars, cands[candslot])
+ mls.partition[cands[candslot]] = sl
+ }
+ navail -= (len(elems) - 1)
+ for i := range elems {
+ used.Set(int32(elems[i] - st))
+ }
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= overlapping %+v:\n", sl)
+ for i := range sl {
+ dumpCand(mls.vars[sl[i]], sl[i])
+ }
+ for i, v := range elems {
+ fmt.Fprintf(os.Stderr, "=-= %d: sl=%d %s\n", i, v, ivs[v])
+ }
+ }
+ }
+ }
+}
+
+// performMerging carries out variable merging within each of the
+// candidate ranges in regions, returning a state object
+// that describes the variable overlaps.
+func performMerging(lv *liveness, cs *cstate, regions []candRegion) *MergeLocalsState {
+ cands := lv.vars
+ mls := &MergeLocalsState{
+ partition: make(map[*ir.Name][]int),
+ }
+
+ // Finish intervals construction.
+ ivs := make([]Intervals, len(cands))
+ for i := range cands {
+ var err error
+ ivs[i], err = cs.ibuilders[i].Finish()
+ if err != nil {
+ ninstr := 0
+ if base.Debug.MergeLocalsTrace != 0 {
+ iidx := 0
+ for k := 0; k < len(lv.f.Blocks); k++ {
+ b := lv.f.Blocks[k]
+ fmt.Fprintf(os.Stderr, "\n")
+ for _, v := range b.Values {
+ fmt.Fprintf(os.Stderr, " b%d %d: %s\n", k, iidx, v.LongString())
+ iidx++
+ ninstr++
+ }
+ }
+ }
+ base.FatalfAt(cands[i].Pos(), "interval construct error for var %q in func %q (%d instrs): %v", cands[i].Sym().Name, ir.FuncName(cs.fn), ninstr, err)
+ return nil
+ }
+ }
+
+ // Dump state before attempting overlap.
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= cands live before overlap:\n")
+ for i := range cands {
+ c := cands[i]
+ fmt.Fprintf(os.Stderr, "%d: %v sz=%d ivs=%s\n",
+ i, c.Sym().Name, c.Type().Size(), ivs[i].String())
+ }
+ fmt.Fprintf(os.Stderr, "=-= regions (%d): ", len(regions))
+ for _, cr := range regions {
+ fmt.Fprintf(os.Stderr, " [%d,%d]", cr.st, cr.en)
+ }
+ fmt.Fprintf(os.Stderr, "\n")
+ }
+
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= len(regions) = %d\n", len(regions))
+ }
+
+ // Apply a greedy merge/overlap strategy within each region
+ // of compatible variables.
+ for _, cr := range regions {
+ mls.mergeVisitRegion(lv, ivs, cr.st, cr.en)
+ }
+ if len(mls.vars) == 0 {
+ return nil
+ }
+ return mls
+}
+
+// computeIntervals performs a backwards sweep over the instructions
+// of the function we're compiling, building up an Intervals object
+// for each candidate variable by looking for upwards exposed uses
+// and kills.
+func computeIntervals(lv *liveness, cs *cstate) {
+ nvars := int32(len(lv.vars))
+ liveout := bitvec.New(nvars)
+
+ if base.Debug.MergeLocalsDumpFunc != "" &&
+ strings.HasSuffix(fmt.Sprintf("%v", cs.fn), base.Debug.MergeLocalsDumpFunc) {
+ fmt.Fprintf(os.Stderr, "=-= mergelocalsdumpfunc %v:\n", cs.fn)
+ ii := 0
+ for k, b := range lv.f.Blocks {
+ fmt.Fprintf(os.Stderr, "b%d:\n", k)
+ for _, v := range b.Values {
+ pos := base.Ctxt.PosTable.Pos(v.Pos)
+ fmt.Fprintf(os.Stderr, "=-= %d L%d|C%d %s\n", ii, pos.RelLine(), pos.RelCol(), v.LongString())
+ ii++
+ }
+ }
+ }
+
+ // Count instructions.
+ ninstr := 0
+ for _, b := range lv.f.Blocks {
+ ninstr += len(b.Values)
+ }
+ // current instruction index during backwards walk
+ iidx := ninstr - 1
+
+ // Make a backwards pass over all blocks
+ for k := len(lv.f.Blocks) - 1; k >= 0; k-- {
+ b := lv.f.Blocks[k]
+ be := lv.blockEffects(b)
+
+ if base.Debug.MergeLocalsTrace > 2 {
+ fmt.Fprintf(os.Stderr, "=-= liveout from tail of b%d: ", k)
+ for j := range lv.vars {
+ if be.liveout.Get(int32(j)) {
+ fmt.Fprintf(os.Stderr, " %q", lv.vars[j].Sym().Name)
+ }
+ }
+ fmt.Fprintf(os.Stderr, "\n")
+ }
+
+ // Take into account effects taking place at end of this basic
+ // block by comparing our current live set with liveout for
+ // the block. If a given var was not live before and is now
+ // becoming live we need to mark this transition with a
+ // builder "Live" call; similarly if a var was live before and
+ // is now no longer live, we need a "Kill" call.
+ for j := range lv.vars {
+ isLive := liveout.Get(int32(j))
+ blockLiveOut := be.liveout.Get(int32(j))
+ if isLive {
+ if !blockLiveOut {
+ if base.Debug.MergeLocalsTrace > 2 {
+ fmt.Fprintf(os.Stderr, "=+= at instr %d block boundary kill of %v\n", iidx, lv.vars[j])
+ }
+ cs.ibuilders[j].Kill(iidx)
+ }
+ } else if blockLiveOut {
+ if base.Debug.MergeLocalsTrace > 2 {
+ fmt.Fprintf(os.Stderr, "=+= at block-end instr %d %v becomes live\n",
+ iidx, lv.vars[j])
+ }
+ cs.ibuilders[j].Live(iidx)
+ }
+ }
+
+ // Set our working "currently live" set to the previously
+ // computed live out set for the block.
+ liveout.Copy(be.liveout)
+
+ // Now walk backwards through this block.
+ for i := len(b.Values) - 1; i >= 0; i-- {
+ v := b.Values[i]
+
+ if base.Debug.MergeLocalsTrace > 2 {
+ fmt.Fprintf(os.Stderr, "=-= b%d instr %d: %s\n", k, iidx, v.LongString())
+ }
+
+ // Update liveness based on what we see happening in this
+ // instruction.
+ pos, e := lv.valueEffects(v)
+ becomeslive := e&uevar != 0
+ iskilled := e&varkill != 0
+ if becomeslive && iskilled {
+ // we do not ever expect to see both a kill and an
+ // upwards exposed use given our size constraints.
+ panic("should never happen")
+ }
+ if iskilled && liveout.Get(pos) {
+ cs.ibuilders[pos].Kill(iidx)
+ liveout.Unset(pos)
+ if base.Debug.MergeLocalsTrace > 2 {
+ fmt.Fprintf(os.Stderr, "=+= at instr %d kill of %v\n",
+ iidx, lv.vars[pos])
+ }
+ } else if becomeslive && !liveout.Get(pos) {
+ cs.ibuilders[pos].Live(iidx)
+ liveout.Set(pos)
+ if base.Debug.MergeLocalsTrace > 2 {
+ fmt.Fprintf(os.Stderr, "=+= at instr %d upwards-exposed use of %v\n",
+ iidx, lv.vars[pos])
+ }
+ }
+ iidx--
+ }
+
+ if b == lv.f.Entry {
+ for j, v := range lv.vars {
+ if liveout.Get(int32(j)) {
+ lv.f.Fatalf("%v %L recorded as live on entry",
+ lv.fn.Nname, v)
+ }
+ }
+ }
+ }
+ if iidx != -1 {
+ panic("iidx underflow")
+ }
+}
+
+func dumpCand(c *ir.Name, i int) {
+ fmtFullPos := func(p src.XPos) string {
+ var sb strings.Builder
+ sep := ""
+ base.Ctxt.AllPos(p, func(pos src.Pos) {
+ fmt.Fprintf(&sb, sep)
+ sep = "|"
+ file := filepath.Base(pos.Filename())
+ fmt.Fprintf(&sb, "%s:%d:%d", file, pos.Line(), pos.Col())
+ })
+ return sb.String()
+ }
+ fmt.Fprintf(os.Stderr, " %d: %s %q sz=%d hp=%v t=%v\n",
+ i, fmtFullPos(c.Pos()), c.Sym().Name, c.Type().Size(),
+ c.Type().HasPointers(), c.Type())
+}
+
+// for unit testing only.
+func MakeMergeLocalsState(partition map[*ir.Name][]int, vars []*ir.Name) (*MergeLocalsState, error) {
+ mls := &MergeLocalsState{partition: partition, vars: vars}
+ if err := mls.check(); err != nil {
+ return nil, err
+ }
+ return mls, nil
+}
diff --git a/src/cmd/compile/internal/liveness/plive.go b/src/cmd/compile/internal/liveness/plive.go
index e4dbfa9fa3..ab1a7df930 100644
--- a/src/cmd/compile/internal/liveness/plive.go
+++ b/src/cmd/compile/internal/liveness/plive.go
@@ -143,6 +143,11 @@ type liveness struct {
doClobber bool // Whether to clobber dead stack slots in this function.
noClobberArgs bool // Do not clobber function arguments
+
+ // treat "dead" writes as equivalent to reads during the analysis;
+ // used only during liveness analysis for stack slot merging (doesn't
+ // make sense for stackmap analysis).
+ conservativeWrites bool
}
// Map maps from *ssa.Value to StackMapIndex.
@@ -312,8 +317,12 @@ func (lv *liveness) valueEffects(v *ssa.Value) (int32, liveEffect) {
if e&(ssa.SymRead|ssa.SymAddr) != 0 {
effect |= uevar
}
- if e&ssa.SymWrite != 0 && (!isfat(n.Type()) || v.Op == ssa.OpVarDef) {
- effect |= varkill
+ if e&ssa.SymWrite != 0 {
+ if !isfat(n.Type()) || v.Op == ssa.OpVarDef {
+ effect |= varkill
+ } else if lv.conservativeWrites {
+ effect |= uevar
+ }
}
if effect == 0 {
@@ -450,6 +459,11 @@ func (lv *liveness) blockEffects(b *ssa.Block) *blockEffects {
// this argument and the in arguments are always assumed live. The vars
// argument is a slice of *Nodes.
func (lv *liveness) pointerMap(liveout bitvec.BitVec, vars []*ir.Name, args, locals bitvec.BitVec) {
+ var slotsSeen map[int64]*ir.Name
+ checkForDuplicateSlots := base.Debug.MergeLocals != 0
+ if checkForDuplicateSlots {
+ slotsSeen = make(map[int64]*ir.Name)
+ }
for i := int32(0); ; i++ {
i = liveout.Next(i)
if i < 0 {
@@ -468,6 +482,12 @@ func (lv *liveness) pointerMap(liveout bitvec.BitVec, vars []*ir.Name, args, loc
fallthrough // PPARAMOUT in registers acts memory-allocates like an AUTO
case ir.PAUTO:
typebits.Set(node.Type(), node.FrameOffset()+lv.stkptrsize, locals)
+ if checkForDuplicateSlots {
+ if prev, ok := slotsSeen[node.FrameOffset()]; ok {
+ base.FatalfAt(node.Pos(), "two vars live at pointerMap generation: %q and %q", prev.Sym().Name, node.Sym().Name)
+ }
+ slotsSeen[node.FrameOffset()] = node
+ }
}
}
}
diff --git a/src/cmd/compile/internal/ssa/check.go b/src/cmd/compile/internal/ssa/check.go
index bbfdaceaad..cb6788cd95 100644
--- a/src/cmd/compile/internal/ssa/check.go
+++ b/src/cmd/compile/internal/ssa/check.go
@@ -314,8 +314,9 @@ func checkFunc(f *Func) {
f.Fatalf("bad arg 1 type to %s: want integer, have %s", v.Op, v.Args[1].LongString())
}
case OpVarDef:
- if !v.Aux.(*ir.Name).Type().HasPointers() {
- f.Fatalf("vardef must have pointer type %s", v.Aux.(*ir.Name).Type().String())
+ n := v.Aux.(*ir.Name)
+ if !n.Type().HasPointers() && !IsMergeCandidate(n) {
+ f.Fatalf("vardef must be merge candidate or have pointer type %s", v.Aux.(*ir.Name).Type().String())
}
case OpNilCheck:
// nil checks have pointer type before scheduling, and
diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go
index 031d94f90c..38b459a2ff 100644
--- a/src/cmd/compile/internal/ssa/func.go
+++ b/src/cmd/compile/internal/ssa/func.go
@@ -838,5 +838,25 @@ func (f *Func) useFMA(v *Value) bool {
// NewLocal returns a new anonymous local variable of the given type.
func (f *Func) NewLocal(pos src.XPos, typ *types.Type) *ir.Name {
- return typecheck.TempAt(pos, f.fe.Func(), typ) // Note: adds new auto to fn.Dcl list
+ nn := typecheck.TempAt(pos, f.fe.Func(), typ) // Note: adds new auto to fn.Dcl list
+ nn.SetNonMergeable(true)
+ return nn
+}
+
+// IsMergeCandidate returns true if variable n could participate in
+// stack slot merging. For now we're restricting the set to things to
+// items larger than what CanSSA would allow (approximateky, we disallow things
+// marked as open defer slots so as to avoid complicating liveness
+// analysis.
+func IsMergeCandidate(n *ir.Name) bool {
+ if base.Debug.MergeLocals == 0 ||
+ base.Flag.N != 0 ||
+ n.Class != ir.PAUTO ||
+ n.Type().Size() <= int64(3*types.PtrSize) ||
+ n.Addrtaken() ||
+ n.NonMergeable() ||
+ n.OpenDeferSlot() {
+ return false
+ }
+ return true
}
diff --git a/src/cmd/compile/internal/ssagen/pgen.go b/src/cmd/compile/internal/ssagen/pgen.go
index c3d9ec3091..d0045e7ee3 100644
--- a/src/cmd/compile/internal/ssagen/pgen.go
+++ b/src/cmd/compile/internal/ssagen/pgen.go
@@ -13,6 +13,7 @@ import (
"cmd/compile/internal/base"
"cmd/compile/internal/ir"
+ "cmd/compile/internal/liveness"
"cmd/compile/internal/objw"
"cmd/compile/internal/ssa"
"cmd/compile/internal/types"
@@ -151,6 +152,18 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
}
}
+ var mls *liveness.MergeLocalsState
+ if base.Debug.MergeLocals != 0 {
+ mls = liveness.MergeLocals(fn, f)
+ if base.Debug.MergeLocalsTrace == 1 && mls != nil {
+ fmt.Fprintf(os.Stderr, "%s: %d bytes of stack space saved via stack slot merging\n", ir.FuncName(fn), mls.EstSavings())
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= merge locals state for %v:\n%v",
+ fn, mls)
+ }
+ }
+ }
+
// Use sort.SliceStable instead of sort.Slice so stack layout (and thus
// compiler output) is less sensitive to frontend changes that
// introduce or remove unused variables.
@@ -158,6 +171,22 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
return cmpstackvarlt(fn.Dcl[i], fn.Dcl[j])
})
+ if base.Debug.MergeLocalsTrace > 1 && mls != nil {
+ fmt.Fprintf(os.Stderr, "=-= sorted DCL for %v:\n", fn)
+ for i, v := range fn.Dcl {
+ if !ssa.IsMergeCandidate(v) {
+ continue
+ }
+ fmt.Fprintf(os.Stderr, " %d: %q isleader=%v subsumed=%v used=%v\n", i, v.Sym().Name, mls.IsLeader(v), mls.Subsumed(v), v.Used())
+
+ }
+ }
+
+ var leaders map[*ir.Name]int64
+ if mls != nil {
+ leaders = make(map[*ir.Name]int64)
+ }
+
// Reassign stack offsets of the locals that are used.
lastHasPtr := false
for i, n := range fn.Dcl {
@@ -165,12 +194,14 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
// i.e., stack assign if AUTO, or if PARAMOUT in registers (which has no predefined spill locations)
continue
}
+ if mls != nil && mls.Subsumed(n) {
+ continue
+ }
if !n.Used() {
fn.DebugInfo.(*ssa.FuncDebug).OptDcl = fn.Dcl[i:]
fn.Dcl = fn.Dcl[:i]
break
}
-
types.CalcSize(n.Type())
w := n.Type().Size()
if w >= types.MaxWidth || w < 0 {
@@ -195,6 +226,42 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
lastHasPtr = false
}
n.SetFrameOffset(-s.stksize)
+ if mls != nil && mls.IsLeader(n) {
+ leaders[n] = -s.stksize
+ }
+ }
+
+ if mls != nil {
+ followers := []*ir.Name{}
+ newdcl := make([]*ir.Name, 0, len(fn.Dcl))
+ for i := 0; i < len(fn.Dcl); i++ {
+ n := fn.Dcl[i]
+ if mls.Subsumed(n) {
+ continue
+ }
+ newdcl = append(newdcl, n)
+ if off, ok := leaders[n]; ok {
+ followers = mls.Followers(n, followers)
+ for _, f := range followers {
+ // Set the stack offset for each follower to be
+ // the same as the leader.
+ f.SetFrameOffset(off)
+ }
+ // position followers immediately after leader
+ newdcl = append(newdcl, followers...)
+ }
+ }
+ fn.Dcl = newdcl
+ }
+
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= stack layout for %v:\n", fn)
+ for i, v := range fn.Dcl {
+ if v.Op() != ir.ONAME || (v.Class != ir.PAUTO && !(v.Class == ir.PPARAMOUT && v.IsOutputParamInRegisters())) {
+ continue
+ }
+ fmt.Fprintf(os.Stderr, " %d: %q frameoff %d used=%v\n", i, v.Sym().Name, v.FrameOffset(), v.Used())
+ }
}
s.stksize = types.RoundUp(s.stksize, s.stkalign)
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go
index 37d6165e42..f27e3acc90 100644
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@@ -633,7 +633,7 @@ func (s *state) zeroResults() {
if typ := n.Type(); ssa.CanSSA(typ) {
s.assign(n, s.zeroVal(typ), false, 0)
} else {
- if typ.HasPointers() {
+ if typ.HasPointers() || ssa.IsMergeCandidate(n) {
s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, n, s.mem())
}
s.zero(n.Type(), s.decladdrs[n])
@@ -3942,7 +3942,7 @@ func (s *state) assignWhichMayOverlap(left ir.Node, right *ssa.Value, deref bool
// If this assignment clobbers an entire local variable, then emit
// OpVarDef so liveness analysis knows the variable is redefined.
- if base, ok := clobberBase(left).(*ir.Name); ok && base.OnStack() && skip == 0 && t.HasPointers() {
+ if base, ok := clobberBase(left).(*ir.Name); ok && base.OnStack() && skip == 0 && (t.HasPointers() || ssa.IsMergeCandidate(base)) {
s.vars[memVar] = s.newValue1Apos(ssa.OpVarDef, types.TypeMem, base, s.mem(), !ir.IsAutoTmp(base))
}
@@ -5382,7 +5382,8 @@ func (s *state) call(n *ir.CallExpr, k callKind, returnResultAddr bool, deferExt
}
// Make a defer struct on the stack.
t := deferstruct()
- _, addr := s.temp(n.Pos(), t)
+ n, addr := s.temp(n.Pos(), t)
+ n.SetNonMergeable(true)
s.store(closure.Type,
s.newValue1I(ssa.OpOffPtr, closure.Type.PtrTo(), t.FieldOff(deferStructFnField), addr),
closure)
@@ -6886,7 +6887,7 @@ func (s *state) dottype1(pos src.XPos, src, dst *types.Type, iface, source, targ
// temp allocates a temp of type t at position pos
func (s *state) temp(pos src.XPos, t *types.Type) (*ir.Name, *ssa.Value) {
tmp := typecheck.TempAt(pos, s.curfn, t)
- if t.HasPointers() {
+ if t.HasPointers() || (ssa.IsMergeCandidate(tmp) && t != deferstruct()) {
s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, tmp, s.mem())
}
addr := s.addr(tmp)
diff --git a/src/cmd/compile/internal/test/mergelocals_test.go b/src/cmd/compile/internal/test/mergelocals_test.go
new file mode 100644
index 0000000000..f070197c80
--- /dev/null
+++ b/src/cmd/compile/internal/test/mergelocals_test.go
@@ -0,0 +1,184 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+ "cmd/compile/internal/ir"
+ "cmd/compile/internal/liveness"
+ "cmd/compile/internal/typecheck"
+ "cmd/compile/internal/types"
+ "cmd/internal/src"
+ "internal/testenv"
+ "path/filepath"
+ "slices"
+ "sort"
+ "strings"
+ "testing"
+)
+
+func TestMergeLocalState(t *testing.T) {
+ mkiv := func(name string) *ir.Name {
+ i32 := types.Types[types.TINT32]
+ s := typecheck.Lookup(name)
+ v := ir.NewNameAt(src.NoXPos, s, i32)
+ return v
+ }
+ v1 := mkiv("v1")
+ v2 := mkiv("v2")
+ v3 := mkiv("v3")
+
+ testcases := []struct {
+ vars []*ir.Name
+ partition map[*ir.Name][]int
+ experr bool
+ }{
+ {
+ vars: []*ir.Name{v1, v2, v3},
+ partition: map[*ir.Name][]int{
+ v1: []int{0, 1, 2},
+ v2: []int{0, 1, 2},
+ v3: []int{0, 1, 2},
+ },
+ experr: false,
+ },
+ {
+ // invalid mls.v slot -1
+ vars: []*ir.Name{v1, v2, v3},
+ partition: map[*ir.Name][]int{
+ v1: []int{-1, 0},
+ v2: []int{0, 1, 2},
+ v3: []int{0, 1, 2},
+ },
+ experr: true,
+ },
+ {
+ // duplicate var in v
+ vars: []*ir.Name{v1, v2, v2},
+ partition: map[*ir.Name][]int{
+ v1: []int{0, 1, 2},
+ v2: []int{0, 1, 2},
+ v3: []int{0, 1, 2},
+ },
+ experr: true,
+ },
+ {
+ // single element in partition
+ vars: []*ir.Name{v1, v2, v3},
+ partition: map[*ir.Name][]int{
+ v1: []int{0},
+ v2: []int{0, 1, 2},
+ v3: []int{0, 1, 2},
+ },
+ experr: true,
+ },
+ {
+ // missing element 2
+ vars: []*ir.Name{v1, v2, v3},
+ partition: map[*ir.Name][]int{
+ v1: []int{0, 1},
+ v2: []int{0, 1},
+ v3: []int{0, 1},
+ },
+ experr: true,
+ },
+ {
+ // partitions disagree for v1 vs v2
+ vars: []*ir.Name{v1, v2, v3},
+ partition: map[*ir.Name][]int{
+ v1: []int{0, 1, 2},
+ v2: []int{1, 0, 2},
+ v3: []int{0, 1, 2},
+ },
+ experr: true,
+ },
+ }
+
+ for k, testcase := range testcases {
+ mls, err := liveness.MakeMergeLocalsState(testcase.partition, testcase.vars)
+ t.Logf("tc %d err is %v\n", k, err)
+ if testcase.experr && err == nil {
+ t.Fatalf("tc:%d missing error mls %v", k, mls)
+ } else if !testcase.experr && err != nil {
+ t.Fatalf("tc:%d unexpected error mls %v", k, err)
+ }
+ if mls != nil {
+ t.Logf("tc %d: mls: %v\n", k, mls.String())
+ }
+ }
+}
+
+func TestMergeLocalsIntegration(t *testing.T) {
+ testenv.MustHaveGoBuild(t)
+
+ // This test does a build of a specific canned package to
+ // check whether merging of stack slots is taking place.
+ // The idea is to do the compile with a trace option turned
+ // on and then pick up on the frame offsets of specific
+ // variables.
+ //
+ // Stack slot merging is a greedy algorithm, and there can
+ // be many possible ways to overlap a given set of candidate
+ // variables, all of them legal. Rather than locking down
+ // a specific set of overlappings or frame offsets, this
+ // tests just verifies that there is one clump of 3 vars that
+ // get overlapped, then another clump of 2 that share the same
+ // frame offset.
+ //
+ // The expected output blob we're interested in looks like this:
+ //
+ // =-= stack layout for ABC:
+ // 2: "p1" frameoff -8200 used=true
+ // 3: "xp3" frameoff -8200 used=true
+ // 4: "xp4" frameoff -8200 used=true
+ // 5: "p2" frameoff -16400 used=true
+ // 6: "s" frameoff -24592 used=true
+ // 7: "v1" frameoff -32792 used=true
+ // 8: "v3" frameoff -32792 used=true
+ // 9: "v2" frameoff -40992 used=true
+ //
+ tmpdir := t.TempDir()
+ src := filepath.Join("testdata", "mergelocals", "integration.go")
+ obj := filepath.Join(tmpdir, "p.a")
+ out, err := testenv.Command(t, testenv.GoToolPath(t), "tool", "compile", "-p=p", "-c", "1", "-o", obj, "-d=mergelocalstrace=2,mergelocals=1", src).CombinedOutput()
+ if err != nil {
+ t.Fatalf("failed to compile: %v\n%s", err, out)
+ }
+ vars := make(map[string]string)
+ lines := strings.Split(string(out), "\n")
+ prolog := true
+ varsAtFrameOffset := make(map[string]int)
+ for _, line := range lines {
+ if line == "=-= stack layout for ABC:" {
+ prolog = false
+ continue
+ } else if prolog || line == "" {
+ continue
+ }
+ fields := strings.Fields(line)
+ if len(fields) != 5 {
+ t.Fatalf("bad trace output line: %s", line)
+ }
+ vname := fields[1]
+ frameoff := fields[3]
+ varsAtFrameOffset[frameoff] = varsAtFrameOffset[frameoff] + 1
+ vars[vname] = frameoff
+ }
+ wantvnum := 8
+ gotvnum := len(vars)
+ if wantvnum != gotvnum {
+ t.Fatalf("expected trace output on %d vars got %d\n", wantvnum, gotvnum)
+ }
+
+ // We expect one clump of 3, another clump of 2, and the rest singletons.
+ expected := []int{1, 1, 1, 2, 3}
+ got := []int{}
+ for _, v := range varsAtFrameOffset {
+ got = append(got, v)
+ }
+ sort.Ints(got)
+ if !slices.Equal(got, expected) {
+ t.Fatalf("expected variable clumps %+v not equal to what we got: %+v", expected, got)
+ }
+}
diff --git a/src/cmd/compile/internal/test/testdata/mergelocals/integration.go b/src/cmd/compile/internal/test/testdata/mergelocals/integration.go
new file mode 100644
index 0000000000..d640c6fce8
--- /dev/null
+++ b/src/cmd/compile/internal/test/testdata/mergelocals/integration.go
@@ -0,0 +1,83 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package p
+
+// This type and the following one will share the same GC shape and size.
+type Pointery struct {
+ p *Pointery
+ x [1024]int
+}
+
+type Pointery2 struct {
+ p *Pointery2
+ x [1024]int
+}
+
+// This type and the following one will have the same size.
+type Vanilla struct {
+ np uintptr
+ x [1024]int
+}
+
+type Vanilla2 struct {
+ np uintptr
+ x [1023]int
+ y int
+}
+
+type Single struct {
+ np uintptr
+ x [1023]int
+}
+
+func ABC(i, j int) int {
+ r := 0
+
+ // here v1 interferes with v2 but could be overlapped with v3.
+ // we can also overlap v1 with v3.
+ var v1 Vanilla
+ if i < 101 {
+ var v2 Vanilla
+ v1.x[i] = j
+ r += v1.x[j]
+ v2.x[i] = j
+ r += v2.x[j]
+ }
+
+ {
+ var v3 Vanilla2
+ v3.x[i] = j
+ r += v3.x[j]
+ }
+
+ var s Single
+ s.x[i] = j
+ r += s.x[j]
+
+ // Here p1 and p2 interfere, but p1 could be overlapped with xp3.
+ var p1, p2 Pointery
+ p1.x[i] = j
+ r += p1.x[j]
+ p2.x[i] = j
+ r += p2.x[j]
+ {
+ var xp3 Pointery2
+ xp3.x[i] = j
+ r += xp3.x[j]
+ }
+
+ if i == j*2 {
+ // p2 live on this path
+ p2.x[i] += j
+ r += p2.x[j]
+ } else {
+ // p2 not live on this path
+ var xp4 Pointery2
+ xp4.x[i] = j
+ r += xp4.x[j]
+ }
+
+ return r
+}
diff --git a/src/cmd/compile/internal/walk/temp.go b/src/cmd/compile/internal/walk/temp.go
index 886b5beec3..604ac17367 100644
--- a/src/cmd/compile/internal/walk/temp.go
+++ b/src/cmd/compile/internal/walk/temp.go
@@ -25,7 +25,9 @@ func initStackTemp(init *ir.Nodes, tmp *ir.Name, val ir.Node) *ir.AddrExpr {
// allocated temporary variable of the given type. Statements to
// zero-initialize tmp are appended to init.
func stackTempAddr(init *ir.Nodes, typ *types.Type) *ir.AddrExpr {
- return initStackTemp(init, typecheck.TempAt(base.Pos, ir.CurFunc, typ), nil)
+ n := typecheck.TempAt(base.Pos, ir.CurFunc, typ)
+ n.SetNonMergeable(true)
+ return initStackTemp(init, n, nil)
}
// stackBufAddr returns the expression &tmp, where tmp is a newly
diff --git a/test/fixedbugs/bug385_64.go b/test/fixedbugs/bug385_64.go
index 3240960f1a..deba9c9fae 100644
--- a/test/fixedbugs/bug385_64.go
+++ b/test/fixedbugs/bug385_64.go
@@ -11,214 +11,423 @@
package main
-var z [10<<20]byte
+var z [10 << 20]byte
func main() { // GC_ERROR "stack frame too large"
- // seq 1 206 | sed 's/.*/ var x& [10<<20]byte; z = x&/'
- var x1 [10<<20]byte; z = x1
- var x2 [10<<20]byte; z = x2
- var x3 [10<<20]byte; z = x3
- var x4 [10<<20]byte; z = x4
- var x5 [10<<20]byte; z = x5
- var x6 [10<<20]byte; z = x6
- var x7 [10<<20]byte; z = x7
- var x8 [10<<20]byte; z = x8
- var x9 [10<<20]byte; z = x9
- var x10 [10<<20]byte; z = x10
- var x11 [10<<20]byte; z = x11
- var x12 [10<<20]byte; z = x12
- var x13 [10<<20]byte; z = x13
- var x14 [10<<20]byte; z = x14
- var x15 [10<<20]byte; z = x15
- var x16 [10<<20]byte; z = x16
- var x17 [10<<20]byte; z = x17
- var x18 [10<<20]byte; z = x18
- var x19 [10<<20]byte; z = x19
- var x20 [10<<20]byte; z = x20
- var x21 [10<<20]byte; z = x21
- var x22 [10<<20]byte; z = x22
- var x23 [10<<20]byte; z = x23
- var x24 [10<<20]byte; z = x24
- var x25 [10<<20]byte; z = x25
- var x26 [10<<20]byte; z = x26
- var x27 [10<<20]byte; z = x27
- var x28 [10<<20]byte; z = x28
- var x29 [10<<20]byte; z = x29
- var x30 [10<<20]byte; z = x30
- var x31 [10<<20]byte; z = x31
- var x32 [10<<20]byte; z = x32
- var x33 [10<<20]byte; z = x33
- var x34 [10<<20]byte; z = x34
- var x35 [10<<20]byte; z = x35
- var x36 [10<<20]byte; z = x36
- var x37 [10<<20]byte; z = x37
- var x38 [10<<20]byte; z = x38
- var x39 [10<<20]byte; z = x39
- var x40 [10<<20]byte; z = x40
- var x41 [10<<20]byte; z = x41
- var x42 [10<<20]byte; z = x42
- var x43 [10<<20]byte; z = x43
- var x44 [10<<20]byte; z = x44
- var x45 [10<<20]byte; z = x45
- var x46 [10<<20]byte; z = x46
- var x47 [10<<20]byte; z = x47
- var x48 [10<<20]byte; z = x48
- var x49 [10<<20]byte; z = x49
- var x50 [10<<20]byte; z = x50
- var x51 [10<<20]byte; z = x51
- var x52 [10<<20]byte; z = x52
- var x53 [10<<20]byte; z = x53
- var x54 [10<<20]byte; z = x54
- var x55 [10<<20]byte; z = x55
- var x56 [10<<20]byte; z = x56
- var x57 [10<<20]byte; z = x57
- var x58 [10<<20]byte; z = x58
- var x59 [10<<20]byte; z = x59
- var x60 [10<<20]byte; z = x60
- var x61 [10<<20]byte; z = x61
- var x62 [10<<20]byte; z = x62
- var x63 [10<<20]byte; z = x63
- var x64 [10<<20]byte; z = x64
- var x65 [10<<20]byte; z = x65
- var x66 [10<<20]byte; z = x66
- var x67 [10<<20]byte; z = x67
- var x68 [10<<20]byte; z = x68
- var x69 [10<<20]byte; z = x69
- var x70 [10<<20]byte; z = x70
- var x71 [10<<20]byte; z = x71
- var x72 [10<<20]byte; z = x72
- var x73 [10<<20]byte; z = x73
- var x74 [10<<20]byte; z = x74
- var x75 [10<<20]byte; z = x75
- var x76 [10<<20]byte; z = x76
- var x77 [10<<20]byte; z = x77
- var x78 [10<<20]byte; z = x78
- var x79 [10<<20]byte; z = x79
- var x80 [10<<20]byte; z = x80
- var x81 [10<<20]byte; z = x81
- var x82 [10<<20]byte; z = x82
- var x83 [10<<20]byte; z = x83
- var x84 [10<<20]byte; z = x84
- var x85 [10<<20]byte; z = x85
- var x86 [10<<20]byte; z = x86
- var x87 [10<<20]byte; z = x87
- var x88 [10<<20]byte; z = x88
- var x89 [10<<20]byte; z = x89
- var x90 [10<<20]byte; z = x90
- var x91 [10<<20]byte; z = x91
- var x92 [10<<20]byte; z = x92
- var x93 [10<<20]byte; z = x93
- var x94 [10<<20]byte; z = x94
- var x95 [10<<20]byte; z = x95
- var x96 [10<<20]byte; z = x96
- var x97 [10<<20]byte; z = x97
- var x98 [10<<20]byte; z = x98
- var x99 [10<<20]byte; z = x99
- var x100 [10<<20]byte; z = x100
- var x101 [10<<20]byte; z = x101
- var x102 [10<<20]byte; z = x102
- var x103 [10<<20]byte; z = x103
- var x104 [10<<20]byte; z = x104
- var x105 [10<<20]byte; z = x105
- var x106 [10<<20]byte; z = x106
- var x107 [10<<20]byte; z = x107
- var x108 [10<<20]byte; z = x108
- var x109 [10<<20]byte; z = x109
- var x110 [10<<20]byte; z = x110
- var x111 [10<<20]byte; z = x111
- var x112 [10<<20]byte; z = x112
- var x113 [10<<20]byte; z = x113
- var x114 [10<<20]byte; z = x114
- var x115 [10<<20]byte; z = x115
- var x116 [10<<20]byte; z = x116
- var x117 [10<<20]byte; z = x117
- var x118 [10<<20]byte; z = x118
- var x119 [10<<20]byte; z = x119
- var x120 [10<<20]byte; z = x120
- var x121 [10<<20]byte; z = x121
- var x122 [10<<20]byte; z = x122
- var x123 [10<<20]byte; z = x123
- var x124 [10<<20]byte; z = x124
- var x125 [10<<20]byte; z = x125
- var x126 [10<<20]byte; z = x126
- var x127 [10<<20]byte; z = x127
- var x128 [10<<20]byte; z = x128
- var x129 [10<<20]byte; z = x129
- var x130 [10<<20]byte; z = x130
- var x131 [10<<20]byte; z = x131
- var x132 [10<<20]byte; z = x132
- var x133 [10<<20]byte; z = x133
- var x134 [10<<20]byte; z = x134
- var x135 [10<<20]byte; z = x135
- var x136 [10<<20]byte; z = x136
- var x137 [10<<20]byte; z = x137
- var x138 [10<<20]byte; z = x138
- var x139 [10<<20]byte; z = x139
- var x140 [10<<20]byte; z = x140
- var x141 [10<<20]byte; z = x141
- var x142 [10<<20]byte; z = x142
- var x143 [10<<20]byte; z = x143
- var x144 [10<<20]byte; z = x144
- var x145 [10<<20]byte; z = x145
- var x146 [10<<20]byte; z = x146
- var x147 [10<<20]byte; z = x147
- var x148 [10<<20]byte; z = x148
- var x149 [10<<20]byte; z = x149
- var x150 [10<<20]byte; z = x150
- var x151 [10<<20]byte; z = x151
- var x152 [10<<20]byte; z = x152
- var x153 [10<<20]byte; z = x153
- var x154 [10<<20]byte; z = x154
- var x155 [10<<20]byte; z = x155
- var x156 [10<<20]byte; z = x156
- var x157 [10<<20]byte; z = x157
- var x158 [10<<20]byte; z = x158
- var x159 [10<<20]byte; z = x159
- var x160 [10<<20]byte; z = x160
- var x161 [10<<20]byte; z = x161
- var x162 [10<<20]byte; z = x162
- var x163 [10<<20]byte; z = x163
- var x164 [10<<20]byte; z = x164
- var x165 [10<<20]byte; z = x165
- var x166 [10<<20]byte; z = x166
- var x167 [10<<20]byte; z = x167
- var x168 [10<<20]byte; z = x168
- var x169 [10<<20]byte; z = x169
- var x170 [10<<20]byte; z = x170
- var x171 [10<<20]byte; z = x171
- var x172 [10<<20]byte; z = x172
- var x173 [10<<20]byte; z = x173
- var x174 [10<<20]byte; z = x174
- var x175 [10<<20]byte; z = x175
- var x176 [10<<20]byte; z = x176
- var x177 [10<<20]byte; z = x177
- var x178 [10<<20]byte; z = x178
- var x179 [10<<20]byte; z = x179
- var x180 [10<<20]byte; z = x180
- var x181 [10<<20]byte; z = x181
- var x182 [10<<20]byte; z = x182
- var x183 [10<<20]byte; z = x183
- var x184 [10<<20]byte; z = x184
- var x185 [10<<20]byte; z = x185
- var x186 [10<<20]byte; z = x186
- var x187 [10<<20]byte; z = x187
- var x188 [10<<20]byte; z = x188
- var x189 [10<<20]byte; z = x189
- var x190 [10<<20]byte; z = x190
- var x191 [10<<20]byte; z = x191
- var x192 [10<<20]byte; z = x192
- var x193 [10<<20]byte; z = x193
- var x194 [10<<20]byte; z = x194
- var x195 [10<<20]byte; z = x195
- var x196 [10<<20]byte; z = x196
- var x197 [10<<20]byte; z = x197
- var x198 [10<<20]byte; z = x198
- var x199 [10<<20]byte; z = x199
- var x200 [10<<20]byte; z = x200
- var x201 [10<<20]byte; z = x201
- var x202 [10<<20]byte; z = x202
- var x203 [10<<20]byte; z = x203
- var x204 [10<<20]byte; z = x204
- var x205 [10<<20]byte; z = x205
- var x206 [10<<20]byte; z = x206
+ // seq 1 206 | sed 's/.*/ var x& [10<<20]byte/'
+ // seq 1 206 | sed 's/.*/ z = x&/'
+ var x1 [10<<20]byte
+ var x2 [10<<20]byte
+ var x3 [10<<20]byte
+ var x4 [10<<20]byte
+ var x5 [10<<20]byte
+ var x6 [10<<20]byte
+ var x7 [10<<20]byte
+ var x8 [10<<20]byte
+ var x9 [10<<20]byte
+ var x10 [10<<20]byte
+ var x11 [10<<20]byte
+ var x12 [10<<20]byte
+ var x13 [10<<20]byte
+ var x14 [10<<20]byte
+ var x15 [10<<20]byte
+ var x16 [10<<20]byte
+ var x17 [10<<20]byte
+ var x18 [10<<20]byte
+ var x19 [10<<20]byte
+ var x20 [10<<20]byte
+ var x21 [10<<20]byte
+ var x22 [10<<20]byte
+ var x23 [10<<20]byte
+ var x24 [10<<20]byte
+ var x25 [10<<20]byte
+ var x26 [10<<20]byte
+ var x27 [10<<20]byte
+ var x28 [10<<20]byte
+ var x29 [10<<20]byte
+ var x30 [10<<20]byte
+ var x31 [10<<20]byte
+ var x32 [10<<20]byte
+ var x33 [10<<20]byte
+ var x34 [10<<20]byte
+ var x35 [10<<20]byte
+ var x36 [10<<20]byte
+ var x37 [10<<20]byte
+ var x38 [10<<20]byte
+ var x39 [10<<20]byte
+ var x40 [10<<20]byte
+ var x41 [10<<20]byte
+ var x42 [10<<20]byte
+ var x43 [10<<20]byte
+ var x44 [10<<20]byte
+ var x45 [10<<20]byte
+ var x46 [10<<20]byte
+ var x47 [10<<20]byte
+ var x48 [10<<20]byte
+ var x49 [10<<20]byte
+ var x50 [10<<20]byte
+ var x51 [10<<20]byte
+ var x52 [10<<20]byte
+ var x53 [10<<20]byte
+ var x54 [10<<20]byte
+ var x55 [10<<20]byte
+ var x56 [10<<20]byte
+ var x57 [10<<20]byte
+ var x58 [10<<20]byte
+ var x59 [10<<20]byte
+ var x60 [10<<20]byte
+ var x61 [10<<20]byte
+ var x62 [10<<20]byte
+ var x63 [10<<20]byte
+ var x64 [10<<20]byte
+ var x65 [10<<20]byte
+ var x66 [10<<20]byte
+ var x67 [10<<20]byte
+ var x68 [10<<20]byte
+ var x69 [10<<20]byte
+ var x70 [10<<20]byte
+ var x71 [10<<20]byte
+ var x72 [10<<20]byte
+ var x73 [10<<20]byte
+ var x74 [10<<20]byte
+ var x75 [10<<20]byte
+ var x76 [10<<20]byte
+ var x77 [10<<20]byte
+ var x78 [10<<20]byte
+ var x79 [10<<20]byte
+ var x80 [10<<20]byte
+ var x81 [10<<20]byte
+ var x82 [10<<20]byte
+ var x83 [10<<20]byte
+ var x84 [10<<20]byte
+ var x85 [10<<20]byte
+ var x86 [10<<20]byte
+ var x87 [10<<20]byte
+ var x88 [10<<20]byte
+ var x89 [10<<20]byte
+ var x90 [10<<20]byte
+ var x91 [10<<20]byte
+ var x92 [10<<20]byte
+ var x93 [10<<20]byte
+ var x94 [10<<20]byte
+ var x95 [10<<20]byte
+ var x96 [10<<20]byte
+ var x97 [10<<20]byte
+ var x98 [10<<20]byte
+ var x99 [10<<20]byte
+ var x100 [10<<20]byte
+ var x101 [10<<20]byte
+ var x102 [10<<20]byte
+ var x103 [10<<20]byte
+ var x104 [10<<20]byte
+ var x105 [10<<20]byte
+ var x106 [10<<20]byte
+ var x107 [10<<20]byte
+ var x108 [10<<20]byte
+ var x109 [10<<20]byte
+ var x110 [10<<20]byte
+ var x111 [10<<20]byte
+ var x112 [10<<20]byte
+ var x113 [10<<20]byte
+ var x114 [10<<20]byte
+ var x115 [10<<20]byte
+ var x116 [10<<20]byte
+ var x117 [10<<20]byte
+ var x118 [10<<20]byte
+ var x119 [10<<20]byte
+ var x120 [10<<20]byte
+ var x121 [10<<20]byte
+ var x122 [10<<20]byte
+ var x123 [10<<20]byte
+ var x124 [10<<20]byte
+ var x125 [10<<20]byte
+ var x126 [10<<20]byte
+ var x127 [10<<20]byte
+ var x128 [10<<20]byte
+ var x129 [10<<20]byte
+ var x130 [10<<20]byte
+ var x131 [10<<20]byte
+ var x132 [10<<20]byte
+ var x133 [10<<20]byte
+ var x134 [10<<20]byte
+ var x135 [10<<20]byte
+ var x136 [10<<20]byte
+ var x137 [10<<20]byte
+ var x138 [10<<20]byte
+ var x139 [10<<20]byte
+ var x140 [10<<20]byte
+ var x141 [10<<20]byte
+ var x142 [10<<20]byte
+ var x143 [10<<20]byte
+ var x144 [10<<20]byte
+ var x145 [10<<20]byte
+ var x146 [10<<20]byte
+ var x147 [10<<20]byte
+ var x148 [10<<20]byte
+ var x149 [10<<20]byte
+ var x150 [10<<20]byte
+ var x151 [10<<20]byte
+ var x152 [10<<20]byte
+ var x153 [10<<20]byte
+ var x154 [10<<20]byte
+ var x155 [10<<20]byte
+ var x156 [10<<20]byte
+ var x157 [10<<20]byte
+ var x158 [10<<20]byte
+ var x159 [10<<20]byte
+ var x160 [10<<20]byte
+ var x161 [10<<20]byte
+ var x162 [10<<20]byte
+ var x163 [10<<20]byte
+ var x164 [10<<20]byte
+ var x165 [10<<20]byte
+ var x166 [10<<20]byte
+ var x167 [10<<20]byte
+ var x168 [10<<20]byte
+ var x169 [10<<20]byte
+ var x170 [10<<20]byte
+ var x171 [10<<20]byte
+ var x172 [10<<20]byte
+ var x173 [10<<20]byte
+ var x174 [10<<20]byte
+ var x175 [10<<20]byte
+ var x176 [10<<20]byte
+ var x177 [10<<20]byte
+ var x178 [10<<20]byte
+ var x179 [10<<20]byte
+ var x180 [10<<20]byte
+ var x181 [10<<20]byte
+ var x182 [10<<20]byte
+ var x183 [10<<20]byte
+ var x184 [10<<20]byte
+ var x185 [10<<20]byte
+ var x186 [10<<20]byte
+ var x187 [10<<20]byte
+ var x188 [10<<20]byte
+ var x189 [10<<20]byte
+ var x190 [10<<20]byte
+ var x191 [10<<20]byte
+ var x192 [10<<20]byte
+ var x193 [10<<20]byte
+ var x194 [10<<20]byte
+ var x195 [10<<20]byte
+ var x196 [10<<20]byte
+ var x197 [10<<20]byte
+ var x198 [10<<20]byte
+ var x199 [10<<20]byte
+ var x200 [10<<20]byte
+ var x201 [10<<20]byte
+ var x202 [10<<20]byte
+ var x203 [10<<20]byte
+ var x204 [10<<20]byte
+ var x205 [10<<20]byte
+ var x206 [10<<20]byte
+ var x207 [10<<20]byte
+ z = x1
+ z = x2
+ z = x3
+ z = x4
+ z = x5
+ z = x6
+ z = x7
+ z = x8
+ z = x9
+ z = x10
+ z = x11
+ z = x12
+ z = x13
+ z = x14
+ z = x15
+ z = x16
+ z = x17
+ z = x18
+ z = x19
+ z = x20
+ z = x21
+ z = x22
+ z = x23
+ z = x24
+ z = x25
+ z = x26
+ z = x27
+ z = x28
+ z = x29
+ z = x30
+ z = x31
+ z = x32
+ z = x33
+ z = x34
+ z = x35
+ z = x36
+ z = x37
+ z = x38
+ z = x39
+ z = x40
+ z = x41
+ z = x42
+ z = x43
+ z = x44
+ z = x45
+ z = x46
+ z = x47
+ z = x48
+ z = x49
+ z = x50
+ z = x51
+ z = x52
+ z = x53
+ z = x54
+ z = x55
+ z = x56
+ z = x57
+ z = x58
+ z = x59
+ z = x60
+ z = x61
+ z = x62
+ z = x63
+ z = x64
+ z = x65
+ z = x66
+ z = x67
+ z = x68
+ z = x69
+ z = x70
+ z = x71
+ z = x72
+ z = x73
+ z = x74
+ z = x75
+ z = x76
+ z = x77
+ z = x78
+ z = x79
+ z = x80
+ z = x81
+ z = x82
+ z = x83
+ z = x84
+ z = x85
+ z = x86
+ z = x87
+ z = x88
+ z = x89
+ z = x90
+ z = x91
+ z = x92
+ z = x93
+ z = x94
+ z = x95
+ z = x96
+ z = x97
+ z = x98
+ z = x99
+ z = x100
+ z = x101
+ z = x102
+ z = x103
+ z = x104
+ z = x105
+ z = x106
+ z = x107
+ z = x108
+ z = x109
+ z = x110
+ z = x111
+ z = x112
+ z = x113
+ z = x114
+ z = x115
+ z = x116
+ z = x117
+ z = x118
+ z = x119
+ z = x120
+ z = x121
+ z = x122
+ z = x123
+ z = x124
+ z = x125
+ z = x126
+ z = x127
+ z = x128
+ z = x129
+ z = x130
+ z = x131
+ z = x132
+ z = x133
+ z = x134
+ z = x135
+ z = x136
+ z = x137
+ z = x138
+ z = x139
+ z = x140
+ z = x141
+ z = x142
+ z = x143
+ z = x144
+ z = x145
+ z = x146
+ z = x147
+ z = x148
+ z = x149
+ z = x150
+ z = x151
+ z = x152
+ z = x153
+ z = x154
+ z = x155
+ z = x156
+ z = x157
+ z = x158
+ z = x159
+ z = x160
+ z = x161
+ z = x162
+ z = x163
+ z = x164
+ z = x165
+ z = x166
+ z = x167
+ z = x168
+ z = x169
+ z = x170
+ z = x171
+ z = x172
+ z = x173
+ z = x174
+ z = x175
+ z = x176
+ z = x177
+ z = x178
+ z = x179
+ z = x180
+ z = x181
+ z = x182
+ z = x183
+ z = x184
+ z = x185
+ z = x186
+ z = x187
+ z = x188
+ z = x189
+ z = x190
+ z = x191
+ z = x192
+ z = x193
+ z = x194
+ z = x195
+ z = x196
+ z = x197
+ z = x198
+ z = x199
+ z = x200
+ z = x201
+ z = x202
+ z = x203
+ z = x204
+ z = x205
+ z = x206
+ z = x207
}