aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cmd/compile/internal/base/debug.go4
-rw-r--r--src/cmd/compile/internal/base/flag.go3
-rw-r--r--src/cmd/compile/internal/base/hashdebug.go7
-rw-r--r--src/cmd/compile/internal/ir/name.go3
-rw-r--r--src/cmd/compile/internal/liveness/mergelocals.go691
-rw-r--r--src/cmd/compile/internal/liveness/plive.go24
-rw-r--r--src/cmd/compile/internal/ssa/check.go5
-rw-r--r--src/cmd/compile/internal/ssa/func.go22
-rw-r--r--src/cmd/compile/internal/ssagen/pgen.go69
-rw-r--r--src/cmd/compile/internal/ssagen/ssa.go9
-rw-r--r--src/cmd/compile/internal/test/mergelocals_test.go184
-rw-r--r--src/cmd/compile/internal/test/testdata/mergelocals/integration.go83
-rw-r--r--src/cmd/compile/internal/walk/temp.go4
-rw-r--r--test/fixedbugs/bug385_64.go625
14 files changed, 1511 insertions, 222 deletions
diff --git a/src/cmd/compile/internal/base/debug.go b/src/cmd/compile/internal/base/debug.go
index 420ad1305e..08ccef3065 100644
--- a/src/cmd/compile/internal/base/debug.go
+++ b/src/cmd/compile/internal/base/debug.go
@@ -41,6 +41,10 @@ type DebugFlags struct {
LoopVarHash string `help:"for debugging changes in loop behavior. Overrides experiment and loopvar flag."`
LocationLists int `help:"print information about DWARF location list creation"`
MaxShapeLen int `help:"hash shape names longer than this threshold (default 500)" concurrent:"ok"`
+ MergeLocals int `help:"merge together non-interfering local stack slots" concurrent:"ok"`
+ MergeLocalsDumpFunc string `help:"dump specified func in merge locals"`
+ MergeLocalsHash string `help:"hash value for debugging stack slot merging of local variables" concurrent:"ok"`
+ MergeLocalsTrace int `help:"trace debug output for locals merging"`
Nil int `help:"print information about nil checks"`
NoOpenDefer int `help:"disable open-coded defers" concurrent:"ok"`
NoRefName int `help:"do not include referenced symbol names in object file" concurrent:"ok"`
diff --git a/src/cmd/compile/internal/base/flag.go b/src/cmd/compile/internal/base/flag.go
index 5b3c3ad8c6..0889c37b0d 100644
--- a/src/cmd/compile/internal/base/flag.go
+++ b/src/cmd/compile/internal/base/flag.go
@@ -260,6 +260,9 @@ func ParseFlags() {
if Debug.PGOHash != "" {
PGOHash = NewHashDebug("pgohash", Debug.PGOHash, nil)
}
+ if Debug.MergeLocalsHash != "" {
+ MergeLocalsHash = NewHashDebug("mergelocals", Debug.MergeLocalsHash, nil)
+ }
if Flag.MSan && !platform.MSanSupported(buildcfg.GOOS, buildcfg.GOARCH) {
log.Fatalf("%s/%s does not support -msan", buildcfg.GOOS, buildcfg.GOARCH)
diff --git a/src/cmd/compile/internal/base/hashdebug.go b/src/cmd/compile/internal/base/hashdebug.go
index 4e36c8d549..7a5cc42578 100644
--- a/src/cmd/compile/internal/base/hashdebug.go
+++ b/src/cmd/compile/internal/base/hashdebug.go
@@ -53,9 +53,10 @@ func (d *HashDebug) SetInlineSuffixOnly(b bool) *HashDebug {
// The default compiler-debugging HashDebug, for "-d=gossahash=..."
var hashDebug *HashDebug
-var FmaHash *HashDebug // for debugging fused-multiply-add floating point changes
-var LoopVarHash *HashDebug // for debugging shared/private loop variable changes
-var PGOHash *HashDebug // for debugging PGO optimization decisions
+var FmaHash *HashDebug // for debugging fused-multiply-add floating point changes
+var LoopVarHash *HashDebug // for debugging shared/private loop variable changes
+var PGOHash *HashDebug // for debugging PGO optimization decisions
+var MergeLocalsHash *HashDebug // for debugging local stack slot merging changes
// DebugHashMatchPkgFunc reports whether debug variable Gossahash
//
diff --git a/src/cmd/compile/internal/ir/name.go b/src/cmd/compile/internal/ir/name.go
index 758158651e..1ce6e43d0b 100644
--- a/src/cmd/compile/internal/ir/name.go
+++ b/src/cmd/compile/internal/ir/name.go
@@ -194,6 +194,7 @@ const (
nameLibfuzzer8BitCounter // if PEXTERN should be assigned to __sancov_cntrs section
nameCoverageAuxVar // instrumentation counter var or pkg ID for cmd/cover
nameAlias // is type name an alias
+ nameNonMergeable // not a candidate for stack slot merging
)
func (n *Name) Readonly() bool { return n.flags&nameReadonly != 0 }
@@ -209,6 +210,7 @@ func (n *Name) InlLocal() bool { return n.flags&nameInlLocal !=
func (n *Name) OpenDeferSlot() bool { return n.flags&nameOpenDeferSlot != 0 }
func (n *Name) Libfuzzer8BitCounter() bool { return n.flags&nameLibfuzzer8BitCounter != 0 }
func (n *Name) CoverageAuxVar() bool { return n.flags&nameCoverageAuxVar != 0 }
+func (n *Name) NonMergeable() bool { return n.flags&nameNonMergeable != 0 }
func (n *Name) setReadonly(b bool) { n.flags.set(nameReadonly, b) }
func (n *Name) SetNeedzero(b bool) { n.flags.set(nameNeedzero, b) }
@@ -223,6 +225,7 @@ func (n *Name) SetInlLocal(b bool) { n.flags.set(nameInlLocal, b
func (n *Name) SetOpenDeferSlot(b bool) { n.flags.set(nameOpenDeferSlot, b) }
func (n *Name) SetLibfuzzer8BitCounter(b bool) { n.flags.set(nameLibfuzzer8BitCounter, b) }
func (n *Name) SetCoverageAuxVar(b bool) { n.flags.set(nameCoverageAuxVar, b) }
+func (n *Name) SetNonMergeable(b bool) { n.flags.set(nameNonMergeable, b) }
// OnStack reports whether variable n may reside on the stack.
func (n *Name) OnStack() bool {
diff --git a/src/cmd/compile/internal/liveness/mergelocals.go b/src/cmd/compile/internal/liveness/mergelocals.go
new file mode 100644
index 0000000000..a1342efce6
--- /dev/null
+++ b/src/cmd/compile/internal/liveness/mergelocals.go
@@ -0,0 +1,691 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package liveness
+
+import (
+ "cmd/compile/internal/base"
+ "cmd/compile/internal/bitvec"
+ "cmd/compile/internal/ir"
+ "cmd/compile/internal/reflectdata"
+ "cmd/compile/internal/ssa"
+ "cmd/internal/obj"
+ "cmd/internal/src"
+ "fmt"
+ "os"
+ "path/filepath"
+ "sort"
+ "strings"
+)
+
+// MergeLocalsState encapsulates information about which AUTO
+// (stack-allocated) variables within a function can be safely
+// merged/overlapped, e.g. share a stack slot with some other auto).
+// An instance of MergeLocalsState is produced by MergeLocals() below
+// and then consumed in ssagen.AllocFrame. The map 'partition' contains
+// entries of the form <N,SL> where N is an *ir.Name and SL is a slice
+// holding the indices (within 'vars') of other variables that share the
+// same slot. For example, if a function contains five variables where
+// v1/v2/v3 are safe to overlap and v4/v5 are safe to overlap, the
+// MergeLocalsState content might look like
+//
+// vars: [v1, v2, v3, v4, v5]
+// partition: v1 -> [1, 0, 2], v2 -> [1, 0, 2], v3 -> [1, 0, 2]
+// v4 -> [3, 4], v5 -> [3, 4]
+//
+// A nil MergeLocalsState indicates that no local variables meet the
+// necessary criteria for overlap.
+type MergeLocalsState struct {
+ // contains auto vars that participate in overlapping
+ vars []*ir.Name
+ // maps auto variable to overlap partition
+ partition map[*ir.Name][]int
+}
+
+// candRegion is a sub-range (start, end) corresponding to an interval
+// [st,en] within the list of candidate variables.
+type candRegion struct {
+ st, en int
+}
+
+// MergeLocals analyzes the specified ssa function f to determine which
+// of its auto variables can safely share the same stack slot, returning
+// a state object that describes how the overlap should be done.
+func MergeLocals(fn *ir.Func, f *ssa.Func) *MergeLocalsState {
+ cands, idx, regions := collectMergeCandidates(fn)
+ if len(regions) == 0 {
+ return nil
+ }
+ lv := newliveness(fn, f, cands, idx, 0)
+
+ // If we have a local variable such as "r2" below that's written
+ // but then not read, something like:
+ //
+ // vardef r1
+ // r1.x = ...
+ // vardef r2
+ // r2.x = 0
+ // r2.y = ...
+ // <call foo>
+ // // no subsequent use of r2
+ // ... = r1.x
+ //
+ // then for the purpose of calculating stack maps at the call, we
+ // can ignore "r2" completely during liveness analysis for stack
+ // maps, however for stack slock merging we most definitely want
+ // to treat the writes as "uses".
+ lv.conservativeWrites = true
+
+ lv.prologue()
+ lv.solve()
+ cs := &cstate{
+ fn: fn,
+ ibuilders: make([]IntervalsBuilder, len(cands)),
+ }
+ computeIntervals(lv, cs)
+ rv := performMerging(lv, cs, regions)
+ if err := rv.check(); err != nil {
+ base.FatalfAt(fn.Pos(), "invalid mergelocals state: %v", err)
+ }
+ return rv
+}
+
+// Subsumed returns whether variable n is subsumed, e.g. appears
+// in an overlap position but is not the leader in that partition.
+func (mls *MergeLocalsState) Subsumed(n *ir.Name) bool {
+ if sl, ok := mls.partition[n]; ok && mls.vars[sl[0]] != n {
+ return true
+ }
+ return false
+}
+
+// IsLeader returns whether a variable n is the leader (first element)
+// in a sharing partition.
+func (mls *MergeLocalsState) IsLeader(n *ir.Name) bool {
+ if sl, ok := mls.partition[n]; ok && mls.vars[sl[0]] == n {
+ return true
+ }
+ return false
+}
+
+// Leader returns the leader variable for subsumed var n.
+func (mls *MergeLocalsState) Leader(n *ir.Name) *ir.Name {
+ if sl, ok := mls.partition[n]; ok {
+ if mls.vars[sl[0]] == n {
+ panic("variable is not subsumed")
+ }
+ return mls.vars[sl[0]]
+ }
+ panic("not a merge candidate")
+}
+
+// Followers writes a list of the followers for leader n into the slice tmp.
+func (mls *MergeLocalsState) Followers(n *ir.Name, tmp []*ir.Name) []*ir.Name {
+ tmp = tmp[:0]
+ sl, ok := mls.partition[n]
+ if !ok {
+ panic("no entry for leader")
+ }
+ if mls.vars[sl[0]] != n {
+ panic("followers invoked on subsumed var")
+ }
+ for _, k := range sl[1:] {
+ tmp = append(tmp, mls.vars[k])
+ }
+ sort.SliceStable(tmp, func(i, j int) bool {
+ return tmp[i].Sym().Name < tmp[j].Sym().Name
+ })
+ return tmp
+}
+
+// EstSavings returns the estimated reduction in stack size for
+// the given merge locals state.
+func (mls *MergeLocalsState) EstSavings() int {
+ tot := 0
+ for n := range mls.partition {
+ if mls.Subsumed(n) {
+ tot += int(n.Type().Size())
+ }
+ }
+ return tot
+}
+
+// check tests for various inconsistencies and problems in mls,
+// returning an error if any problems are found.
+func (mls *MergeLocalsState) check() error {
+ if mls == nil {
+ return nil
+ }
+ used := make(map[int]bool)
+ seenv := make(map[*ir.Name]int)
+ for ii, v := range mls.vars {
+ if prev, ok := seenv[v]; ok {
+ return fmt.Errorf("duplicate var %q in vslots: %d and %d\n",
+ v.Sym().Name, ii, prev)
+ }
+ seenv[v] = ii
+ }
+ for k, sl := range mls.partition {
+ // length of slice value needs to be more than 1
+ if len(sl) < 2 {
+ return fmt.Errorf("k=%q v=%+v slice len %d invalid",
+ k.Sym().Name, sl, len(sl))
+ }
+ // values in the slice need to be var indices
+ for i, v := range sl {
+ if v < 0 || v > len(mls.vars)-1 {
+ return fmt.Errorf("k=%q v=+%v slpos %d vslot %d out of range of m.v", k.Sym().Name, sl, i, v)
+ }
+ }
+ }
+ for k, sl := range mls.partition {
+ foundk := false
+ for i, v := range sl {
+ vv := mls.vars[v]
+ if i == 0 {
+ if !mls.IsLeader(vv) {
+ return fmt.Errorf("k=%s v=+%v slpos 0 vslot %d IsLeader(%q) is false should be true", k.Sym().Name, sl, v, vv.Sym().Name)
+ }
+ } else {
+ if !mls.Subsumed(vv) {
+ return fmt.Errorf("k=%s v=+%v slpos %d vslot %d Subsumed(%q) is false should be true", k.Sym().Name, sl, i, v, vv.Sym().Name)
+ }
+ if mls.Leader(vv) != mls.vars[sl[0]] {
+ return fmt.Errorf("k=%s v=+%v slpos %d vslot %d Leader(%q) got %v want %v", k.Sym().Name, sl, i, v, vv.Sym().Name, mls.Leader(vv), mls.vars[sl[0]])
+ }
+ }
+ if vv == k {
+ foundk = true
+ if used[v] {
+ return fmt.Errorf("k=%s v=+%v val slice used violation at slpos %d vslot %d", k.Sym().Name, sl, i, v)
+ }
+ used[v] = true
+ }
+ }
+ if !foundk {
+ return fmt.Errorf("k=%s v=+%v slice value missing k", k.Sym().Name, sl)
+ }
+ }
+ for i := range used {
+ if !used[i] {
+ return fmt.Errorf("pos %d var %q unused", i, mls.vars[i])
+ }
+ }
+ return nil
+}
+
+func (mls *MergeLocalsState) String() string {
+ var leaders []*ir.Name
+ for n, sl := range mls.partition {
+ if n == mls.vars[sl[0]] {
+ leaders = append(leaders, n)
+ }
+ }
+ sort.Slice(leaders, func(i, j int) bool {
+ return leaders[i].Sym().Name < leaders[j].Sym().Name
+ })
+ var sb strings.Builder
+ for _, n := range leaders {
+ sb.WriteString(n.Sym().Name + ":")
+ sl := mls.partition[n]
+ for _, k := range sl[1:] {
+ n := mls.vars[k]
+ sb.WriteString(" " + n.Sym().Name)
+ }
+ sb.WriteString("\n")
+ }
+ return sb.String()
+}
+
+// collectMergeCandidates visits all of the AUTO vars declared in
+// function fn and returns a list of candidate variables for merging /
+// overlapping. Return values are: 1) a slice of ir.Name's
+// corresponding to the candidates, 2) a map that maps ir.Name to slot
+// in the slice, and 3) a slice containing regions (start/end pairs)
+// corresponding to variables that could be overlapped provided that
+// their lifetimes are disjoint.
+func collectMergeCandidates(fn *ir.Func) ([]*ir.Name, map[*ir.Name]int32, []candRegion) {
+ m := make(map[*ir.Name]int32)
+ var cands []*ir.Name
+ var regions []candRegion
+
+ // Collect up the available set of appropriate AUTOs in the
+ // function as a first step.
+ for _, n := range fn.Dcl {
+ if !n.Used() {
+ continue
+ }
+ if !ssa.IsMergeCandidate(n) {
+ continue
+ }
+ cands = append(cands, n)
+ }
+ if len(cands) < 2 {
+ return nil, nil, nil
+ }
+
+ // Sort by pointerness, size, and then name.
+ sort.SliceStable(cands, func(i, j int) bool {
+ ci, cj := cands[i], cands[j]
+ ihp, jhp := 0, 0
+ var ilsym, jlsym *obj.LSym
+ if ci.Type().HasPointers() {
+ ihp = 1
+ ilsym, _, _ = reflectdata.GCSym(ci.Type())
+ }
+ if cj.Type().HasPointers() {
+ jhp = 1
+ jlsym, _, _ = reflectdata.GCSym(cj.Type())
+ }
+ if ihp != jhp {
+ return ihp < jhp
+ }
+ if ci.Type().Size() != cj.Type().Size() {
+ return ci.Type().Size() < cj.Type().Size()
+ }
+ if ihp != 0 && jhp != 0 && ilsym != jlsym {
+ // FIXME: find less clunky way to do this
+ return fmt.Sprintf("%v", ilsym) < fmt.Sprintf("%v", jlsym)
+ }
+ if ci.Sym().Name != cj.Sym().Name {
+ return ci.Sym().Name < cj.Sym().Name
+ }
+ return fmt.Sprintf("%v", ci.Pos()) < fmt.Sprintf("%v", ci.Pos())
+ })
+
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= raw cand list for func %v:\n", fn)
+ for i := range cands {
+ dumpCand(cands[i], i)
+ }
+ }
+
+ // Now generate a pruned candidate list-- we only want to return a
+ // non-empty list if there is some possibility of overlapping two
+ // vars.
+ var pruned []*ir.Name
+ st := 0
+ for {
+ en := nextRegion(cands, st)
+ if en == -1 {
+ break
+ }
+ if st == en {
+ // region has just one element, we can skip it
+ st++
+ continue
+ }
+ pst := len(pruned)
+ pen := pst + (en - st)
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= add part %d -> %d\n", pst, pen)
+ }
+
+ // non-empty region, add to pruned
+ pruned = append(pruned, cands[st:en+1]...)
+ regions = append(regions, candRegion{st: pst, en: pen})
+ st = en + 1
+ }
+ if len(pruned) < 2 {
+ return nil, nil, nil
+ }
+ for i, n := range pruned {
+ m[n] = int32(i)
+ }
+
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= pruned candidate list for func %v:\n", fn)
+ for i := range pruned {
+ dumpCand(pruned[i], i)
+ }
+ }
+ return pruned, m, regions
+}
+
+// nextRegion starts at location idx and walks forward in the cands
+// slice looking for variables that are "compatible" (overlappable)
+// with the variable at position idx; it returns the end of the new
+// region (range of compatible variables starting at idx).
+func nextRegion(cands []*ir.Name, idx int) int {
+ n := len(cands)
+ if idx >= n {
+ return -1
+ }
+ c0 := cands[idx]
+ hp0 := c0.Type().HasPointers()
+ for j := idx + 1; j < n; j++ {
+ cj := cands[j]
+ hpj := cj.Type().HasPointers()
+ ok := true
+ if hp0 {
+ if !hpj || c0.Type().Size() != cj.Type().Size() {
+ return j - 1
+ }
+ // GC shape must match if both types have pointers.
+ gcsym0, _, _ := reflectdata.GCSym(c0.Type())
+ gcsymj, _, _ := reflectdata.GCSym(cj.Type())
+ if gcsym0 != gcsymj {
+ return j - 1
+ }
+ } else {
+ // If no pointers, match size only.
+ if !ok || hp0 != hpj || c0.Type().Size() != cj.Type().Size() {
+ return j - 1
+ }
+ }
+ }
+ return n - 1
+}
+
+type cstate struct {
+ fn *ir.Func
+ ibuilders []IntervalsBuilder
+}
+
+// mergeVisitRegion tries to perform overlapping of variables with a
+// given subrange of cands described by st and en (indices into our
+// candidate var list), where the variables within this range have
+// already been determined to be compatible with respect to type,
+// size, etc. Overlapping is done in a a greedy fashion: we select the
+// first element in the st->en range, then walk the rest of the
+// elements adding in vars whose lifetimes don't overlap with the
+// first element, then repeat the process until we run out of work to do.
+func (mls *MergeLocalsState) mergeVisitRegion(lv *liveness, ivs []Intervals, st, en int) {
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= mergeVisitRegion(st=%d, en=%d)\n", st, en)
+ }
+ n := en - st + 1
+ used := bitvec.New(int32(n))
+
+ nxt := func(slot int) int {
+ for c := slot - st; c < n; c++ {
+ if used.Get(int32(c)) {
+ continue
+ }
+ return c + st
+ }
+ return -1
+ }
+
+ navail := n
+ cands := lv.vars
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, " =-= navail = %d\n", navail)
+ }
+ for navail >= 2 {
+ leader := nxt(st)
+ used.Set(int32(leader - st))
+ navail--
+
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, " =-= begin leader %d used=%s\n", leader,
+ used.String())
+ }
+ elems := []int{leader}
+ lints := ivs[leader]
+
+ for succ := nxt(leader + 1); succ != -1; succ = nxt(succ + 1) {
+
+ // Skip if de-selected by merge locals hash.
+ if base.Debug.MergeLocalsHash != "" {
+ if !base.MergeLocalsHash.MatchPosWithInfo(cands[succ].Pos(), "mergelocals", nil) {
+ continue
+ }
+ }
+ // Skip if already used.
+ if used.Get(int32(succ - st)) {
+ continue
+ }
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, " =-= overlap of %d[%v] {%s} with %d[%v] {%s} is: %v\n", leader, cands[leader], lints.String(), succ, cands[succ], ivs[succ].String(), lints.Overlaps(ivs[succ]))
+ }
+
+ // Can we overlap leader with this var?
+ if lints.Overlaps(ivs[succ]) {
+ continue
+ } else {
+ // Add to overlap set.
+ elems = append(elems, succ)
+ lints = lints.Merge(ivs[succ])
+ }
+ }
+ if len(elems) > 1 {
+ // We found some things to overlap with leader. Add the
+ // candidate elements to "vars" and update "partition".
+ off := len(mls.vars)
+ sl := make([]int, len(elems))
+ for i, candslot := range elems {
+ sl[i] = off + i
+ mls.vars = append(mls.vars, cands[candslot])
+ mls.partition[cands[candslot]] = sl
+ }
+ navail -= (len(elems) - 1)
+ for i := range elems {
+ used.Set(int32(elems[i] - st))
+ }
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= overlapping %+v:\n", sl)
+ for i := range sl {
+ dumpCand(mls.vars[sl[i]], sl[i])
+ }
+ for i, v := range elems {
+ fmt.Fprintf(os.Stderr, "=-= %d: sl=%d %s\n", i, v, ivs[v])
+ }
+ }
+ }
+ }
+}
+
+// performMerging carries out variable merging within each of the
+// candidate ranges in regions, returning a state object
+// that describes the variable overlaps.
+func performMerging(lv *liveness, cs *cstate, regions []candRegion) *MergeLocalsState {
+ cands := lv.vars
+ mls := &MergeLocalsState{
+ partition: make(map[*ir.Name][]int),
+ }
+
+ // Finish intervals construction.
+ ivs := make([]Intervals, len(cands))
+ for i := range cands {
+ var err error
+ ivs[i], err = cs.ibuilders[i].Finish()
+ if err != nil {
+ ninstr := 0
+ if base.Debug.MergeLocalsTrace != 0 {
+ iidx := 0
+ for k := 0; k < len(lv.f.Blocks); k++ {
+ b := lv.f.Blocks[k]
+ fmt.Fprintf(os.Stderr, "\n")
+ for _, v := range b.Values {
+ fmt.Fprintf(os.Stderr, " b%d %d: %s\n", k, iidx, v.LongString())
+ iidx++
+ ninstr++
+ }
+ }
+ }
+ base.FatalfAt(cands[i].Pos(), "interval construct error for var %q in func %q (%d instrs): %v", cands[i].Sym().Name, ir.FuncName(cs.fn), ninstr, err)
+ return nil
+ }
+ }
+
+ // Dump state before attempting overlap.
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= cands live before overlap:\n")
+ for i := range cands {
+ c := cands[i]
+ fmt.Fprintf(os.Stderr, "%d: %v sz=%d ivs=%s\n",
+ i, c.Sym().Name, c.Type().Size(), ivs[i].String())
+ }
+ fmt.Fprintf(os.Stderr, "=-= regions (%d): ", len(regions))
+ for _, cr := range regions {
+ fmt.Fprintf(os.Stderr, " [%d,%d]", cr.st, cr.en)
+ }
+ fmt.Fprintf(os.Stderr, "\n")
+ }
+
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= len(regions) = %d\n", len(regions))
+ }
+
+ // Apply a greedy merge/overlap strategy within each region
+ // of compatible variables.
+ for _, cr := range regions {
+ mls.mergeVisitRegion(lv, ivs, cr.st, cr.en)
+ }
+ if len(mls.vars) == 0 {
+ return nil
+ }
+ return mls
+}
+
+// computeIntervals performs a backwards sweep over the instructions
+// of the function we're compiling, building up an Intervals object
+// for each candidate variable by looking for upwards exposed uses
+// and kills.
+func computeIntervals(lv *liveness, cs *cstate) {
+ nvars := int32(len(lv.vars))
+ liveout := bitvec.New(nvars)
+
+ if base.Debug.MergeLocalsDumpFunc != "" &&
+ strings.HasSuffix(fmt.Sprintf("%v", cs.fn), base.Debug.MergeLocalsDumpFunc) {
+ fmt.Fprintf(os.Stderr, "=-= mergelocalsdumpfunc %v:\n", cs.fn)
+ ii := 0
+ for k, b := range lv.f.Blocks {
+ fmt.Fprintf(os.Stderr, "b%d:\n", k)
+ for _, v := range b.Values {
+ pos := base.Ctxt.PosTable.Pos(v.Pos)
+ fmt.Fprintf(os.Stderr, "=-= %d L%d|C%d %s\n", ii, pos.RelLine(), pos.RelCol(), v.LongString())
+ ii++
+ }
+ }
+ }
+
+ // Count instructions.
+ ninstr := 0
+ for _, b := range lv.f.Blocks {
+ ninstr += len(b.Values)
+ }
+ // current instruction index during backwards walk
+ iidx := ninstr - 1
+
+ // Make a backwards pass over all blocks
+ for k := len(lv.f.Blocks) - 1; k >= 0; k-- {
+ b := lv.f.Blocks[k]
+ be := lv.blockEffects(b)
+
+ if base.Debug.MergeLocalsTrace > 2 {
+ fmt.Fprintf(os.Stderr, "=-= liveout from tail of b%d: ", k)
+ for j := range lv.vars {
+ if be.liveout.Get(int32(j)) {
+ fmt.Fprintf(os.Stderr, " %q", lv.vars[j].Sym().Name)
+ }
+ }
+ fmt.Fprintf(os.Stderr, "\n")
+ }
+
+ // Take into account effects taking place at end of this basic
+ // block by comparing our current live set with liveout for
+ // the block. If a given var was not live before and is now
+ // becoming live we need to mark this transition with a
+ // builder "Live" call; similarly if a var was live before and
+ // is now no longer live, we need a "Kill" call.
+ for j := range lv.vars {
+ isLive := liveout.Get(int32(j))
+ blockLiveOut := be.liveout.Get(int32(j))
+ if isLive {
+ if !blockLiveOut {
+ if base.Debug.MergeLocalsTrace > 2 {
+ fmt.Fprintf(os.Stderr, "=+= at instr %d block boundary kill of %v\n", iidx, lv.vars[j])
+ }
+ cs.ibuilders[j].Kill(iidx)
+ }
+ } else if blockLiveOut {
+ if base.Debug.MergeLocalsTrace > 2 {
+ fmt.Fprintf(os.Stderr, "=+= at block-end instr %d %v becomes live\n",
+ iidx, lv.vars[j])
+ }
+ cs.ibuilders[j].Live(iidx)
+ }
+ }
+
+ // Set our working "currently live" set to the previously
+ // computed live out set for the block.
+ liveout.Copy(be.liveout)
+
+ // Now walk backwards through this block.
+ for i := len(b.Values) - 1; i >= 0; i-- {
+ v := b.Values[i]
+
+ if base.Debug.MergeLocalsTrace > 2 {
+ fmt.Fprintf(os.Stderr, "=-= b%d instr %d: %s\n", k, iidx, v.LongString())
+ }
+
+ // Update liveness based on what we see happening in this
+ // instruction.
+ pos, e := lv.valueEffects(v)
+ becomeslive := e&uevar != 0
+ iskilled := e&varkill != 0
+ if becomeslive && iskilled {
+ // we do not ever expect to see both a kill and an
+ // upwards exposed use given our size constraints.
+ panic("should never happen")
+ }
+ if iskilled && liveout.Get(pos) {
+ cs.ibuilders[pos].Kill(iidx)
+ liveout.Unset(pos)
+ if base.Debug.MergeLocalsTrace > 2 {
+ fmt.Fprintf(os.Stderr, "=+= at instr %d kill of %v\n",
+ iidx, lv.vars[pos])
+ }
+ } else if becomeslive && !liveout.Get(pos) {
+ cs.ibuilders[pos].Live(iidx)
+ liveout.Set(pos)
+ if base.Debug.MergeLocalsTrace > 2 {
+ fmt.Fprintf(os.Stderr, "=+= at instr %d upwards-exposed use of %v\n",
+ iidx, lv.vars[pos])
+ }
+ }
+ iidx--
+ }
+
+ if b == lv.f.Entry {
+ for j, v := range lv.vars {
+ if liveout.Get(int32(j)) {
+ lv.f.Fatalf("%v %L recorded as live on entry",
+ lv.fn.Nname, v)
+ }
+ }
+ }
+ }
+ if iidx != -1 {
+ panic("iidx underflow")
+ }
+}
+
+func dumpCand(c *ir.Name, i int) {
+ fmtFullPos := func(p src.XPos) string {
+ var sb strings.Builder
+ sep := ""
+ base.Ctxt.AllPos(p, func(pos src.Pos) {
+ fmt.Fprintf(&sb, sep)
+ sep = "|"
+ file := filepath.Base(pos.Filename())
+ fmt.Fprintf(&sb, "%s:%d:%d", file, pos.Line(), pos.Col())
+ })
+ return sb.String()
+ }
+ fmt.Fprintf(os.Stderr, " %d: %s %q sz=%d hp=%v t=%v\n",
+ i, fmtFullPos(c.Pos()), c.Sym().Name, c.Type().Size(),
+ c.Type().HasPointers(), c.Type())
+}
+
+// for unit testing only.
+func MakeMergeLocalsState(partition map[*ir.Name][]int, vars []*ir.Name) (*MergeLocalsState, error) {
+ mls := &MergeLocalsState{partition: partition, vars: vars}
+ if err := mls.check(); err != nil {
+ return nil, err
+ }
+ return mls, nil
+}
diff --git a/src/cmd/compile/internal/liveness/plive.go b/src/cmd/compile/internal/liveness/plive.go
index e4dbfa9fa3..ab1a7df930 100644
--- a/src/cmd/compile/internal/liveness/plive.go
+++ b/src/cmd/compile/internal/liveness/plive.go
@@ -143,6 +143,11 @@ type liveness struct {
doClobber bool // Whether to clobber dead stack slots in this function.
noClobberArgs bool // Do not clobber function arguments
+
+ // treat "dead" writes as equivalent to reads during the analysis;
+ // used only during liveness analysis for stack slot merging (doesn't
+ // make sense for stackmap analysis).
+ conservativeWrites bool
}
// Map maps from *ssa.Value to StackMapIndex.
@@ -312,8 +317,12 @@ func (lv *liveness) valueEffects(v *ssa.Value) (int32, liveEffect) {
if e&(ssa.SymRead|ssa.SymAddr) != 0 {
effect |= uevar
}
- if e&ssa.SymWrite != 0 && (!isfat(n.Type()) || v.Op == ssa.OpVarDef) {
- effect |= varkill
+ if e&ssa.SymWrite != 0 {
+ if !isfat(n.Type()) || v.Op == ssa.OpVarDef {
+ effect |= varkill
+ } else if lv.conservativeWrites {
+ effect |= uevar
+ }
}
if effect == 0 {
@@ -450,6 +459,11 @@ func (lv *liveness) blockEffects(b *ssa.Block) *blockEffects {
// this argument and the in arguments are always assumed live. The vars
// argument is a slice of *Nodes.
func (lv *liveness) pointerMap(liveout bitvec.BitVec, vars []*ir.Name, args, locals bitvec.BitVec) {
+ var slotsSeen map[int64]*ir.Name
+ checkForDuplicateSlots := base.Debug.MergeLocals != 0
+ if checkForDuplicateSlots {
+ slotsSeen = make(map[int64]*ir.Name)
+ }
for i := int32(0); ; i++ {
i = liveout.Next(i)
if i < 0 {
@@ -468,6 +482,12 @@ func (lv *liveness) pointerMap(liveout bitvec.BitVec, vars []*ir.Name, args, loc
fallthrough // PPARAMOUT in registers acts memory-allocates like an AUTO
case ir.PAUTO:
typebits.Set(node.Type(), node.FrameOffset()+lv.stkptrsize, locals)
+ if checkForDuplicateSlots {
+ if prev, ok := slotsSeen[node.FrameOffset()]; ok {
+ base.FatalfAt(node.Pos(), "two vars live at pointerMap generation: %q and %q", prev.Sym().Name, node.Sym().Name)
+ }
+ slotsSeen[node.FrameOffset()] = node
+ }
}
}
}
diff --git a/src/cmd/compile/internal/ssa/check.go b/src/cmd/compile/internal/ssa/check.go
index bbfdaceaad..cb6788cd95 100644
--- a/src/cmd/compile/internal/ssa/check.go
+++ b/src/cmd/compile/internal/ssa/check.go
@@ -314,8 +314,9 @@ func checkFunc(f *Func) {
f.Fatalf("bad arg 1 type to %s: want integer, have %s", v.Op, v.Args[1].LongString())
}
case OpVarDef:
- if !v.Aux.(*ir.Name).Type().HasPointers() {
- f.Fatalf("vardef must have pointer type %s", v.Aux.(*ir.Name).Type().String())
+ n := v.Aux.(*ir.Name)
+ if !n.Type().HasPointers() && !IsMergeCandidate(n) {
+ f.Fatalf("vardef must be merge candidate or have pointer type %s", v.Aux.(*ir.Name).Type().String())
}
case OpNilCheck:
// nil checks have pointer type before scheduling, and
diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go
index 031d94f90c..38b459a2ff 100644
--- a/src/cmd/compile/internal/ssa/func.go
+++ b/src/cmd/compile/internal/ssa/func.go
@@ -838,5 +838,25 @@ func (f *Func) useFMA(v *Value) bool {
// NewLocal returns a new anonymous local variable of the given type.
func (f *Func) NewLocal(pos src.XPos, typ *types.Type) *ir.Name {
- return typecheck.TempAt(pos, f.fe.Func(), typ) // Note: adds new auto to fn.Dcl list
+ nn := typecheck.TempAt(pos, f.fe.Func(), typ) // Note: adds new auto to fn.Dcl list
+ nn.SetNonMergeable(true)
+ return nn
+}
+
+// IsMergeCandidate returns true if variable n could participate in
+// stack slot merging. For now we're restricting the set to things to
+// items larger than what CanSSA would allow (approximateky, we disallow things
+// marked as open defer slots so as to avoid complicating liveness
+// analysis.
+func IsMergeCandidate(n *ir.Name) bool {
+ if base.Debug.MergeLocals == 0 ||
+ base.Flag.N != 0 ||
+ n.Class != ir.PAUTO ||
+ n.Type().Size() <= int64(3*types.PtrSize) ||
+ n.Addrtaken() ||
+ n.NonMergeable() ||
+ n.OpenDeferSlot() {
+ return false
+ }
+ return true
}
diff --git a/src/cmd/compile/internal/ssagen/pgen.go b/src/cmd/compile/internal/ssagen/pgen.go
index c3d9ec3091..d0045e7ee3 100644
--- a/src/cmd/compile/internal/ssagen/pgen.go
+++ b/src/cmd/compile/internal/ssagen/pgen.go
@@ -13,6 +13,7 @@ import (
"cmd/compile/internal/base"
"cmd/compile/internal/ir"
+ "cmd/compile/internal/liveness"
"cmd/compile/internal/objw"
"cmd/compile/internal/ssa"
"cmd/compile/internal/types"
@@ -151,6 +152,18 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
}
}
+ var mls *liveness.MergeLocalsState
+ if base.Debug.MergeLocals != 0 {
+ mls = liveness.MergeLocals(fn, f)
+ if base.Debug.MergeLocalsTrace == 1 && mls != nil {
+ fmt.Fprintf(os.Stderr, "%s: %d bytes of stack space saved via stack slot merging\n", ir.FuncName(fn), mls.EstSavings())
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= merge locals state for %v:\n%v",
+ fn, mls)
+ }
+ }
+ }
+
// Use sort.SliceStable instead of sort.Slice so stack layout (and thus
// compiler output) is less sensitive to frontend changes that
// introduce or remove unused variables.
@@ -158,6 +171,22 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
return cmpstackvarlt(fn.Dcl[i], fn.Dcl[j])
})
+ if base.Debug.MergeLocalsTrace > 1 && mls != nil {
+ fmt.Fprintf(os.Stderr, "=-= sorted DCL for %v:\n", fn)
+ for i, v := range fn.Dcl {
+ if !ssa.IsMergeCandidate(v) {
+ continue
+ }
+ fmt.Fprintf(os.Stderr, " %d: %q isleader=%v subsumed=%v used=%v\n", i, v.Sym().Name, mls.IsLeader(v), mls.Subsumed(v), v.Used())
+
+ }
+ }
+
+ var leaders map[*ir.Name]int64
+ if mls != nil {
+ leaders = make(map[*ir.Name]int64)
+ }
+
// Reassign stack offsets of the locals that are used.
lastHasPtr := false
for i, n := range fn.Dcl {
@@ -165,12 +194,14 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
// i.e., stack assign if AUTO, or if PARAMOUT in registers (which has no predefined spill locations)
continue
}
+ if mls != nil && mls.Subsumed(n) {
+ continue
+ }
if !n.Used() {
fn.DebugInfo.(*ssa.FuncDebug).OptDcl = fn.Dcl[i:]
fn.Dcl = fn.Dcl[:i]
break
}
-
types.CalcSize(n.Type())
w := n.Type().Size()
if w >= types.MaxWidth || w < 0 {
@@ -195,6 +226,42 @@ func (s *ssafn) AllocFrame(f *ssa.Func) {
lastHasPtr = false
}
n.SetFrameOffset(-s.stksize)
+ if mls != nil && mls.IsLeader(n) {
+ leaders[n] = -s.stksize
+ }
+ }
+
+ if mls != nil {
+ followers := []*ir.Name{}
+ newdcl := make([]*ir.Name, 0, len(fn.Dcl))
+ for i := 0; i < len(fn.Dcl); i++ {
+ n := fn.Dcl[i]
+ if mls.Subsumed(n) {
+ continue
+ }
+ newdcl = append(newdcl, n)
+ if off, ok := leaders[n]; ok {
+ followers = mls.Followers(n, followers)
+ for _, f := range followers {
+ // Set the stack offset for each follower to be
+ // the same as the leader.
+ f.SetFrameOffset(off)
+ }
+ // position followers immediately after leader
+ newdcl = append(newdcl, followers...)
+ }
+ }
+ fn.Dcl = newdcl
+ }
+
+ if base.Debug.MergeLocalsTrace > 1 {
+ fmt.Fprintf(os.Stderr, "=-= stack layout for %v:\n", fn)
+ for i, v := range fn.Dcl {
+ if v.Op() != ir.ONAME || (v.Class != ir.PAUTO && !(v.Class == ir.PPARAMOUT && v.IsOutputParamInRegisters())) {
+ continue
+ }
+ fmt.Fprintf(os.Stderr, " %d: %q frameoff %d used=%v\n", i, v.Sym().Name, v.FrameOffset(), v.Used())
+ }
}
s.stksize = types.RoundUp(s.stksize, s.stkalign)
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go
index 59b4c88089..9e384fe016 100644
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@@ -633,7 +633,7 @@ func (s *state) zeroResults() {
if typ := n.Type(); ssa.CanSSA(typ) {
s.assign(n, s.zeroVal(typ), false, 0)
} else {
- if typ.HasPointers() {
+ if typ.HasPointers() || ssa.IsMergeCandidate(n) {
s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, n, s.mem())
}
s.zero(n.Type(), s.decladdrs[n])
@@ -3949,7 +3949,7 @@ func (s *state) assignWhichMayOverlap(left ir.Node, right *ssa.Value, deref bool
// If this assignment clobbers an entire local variable, then emit
// OpVarDef so liveness analysis knows the variable is redefined.
- if base, ok := clobberBase(left).(*ir.Name); ok && base.OnStack() && skip == 0 && t.HasPointers() {
+ if base, ok := clobberBase(left).(*ir.Name); ok && base.OnStack() && skip == 0 && (t.HasPointers() || ssa.IsMergeCandidate(base)) {
s.vars[memVar] = s.newValue1Apos(ssa.OpVarDef, types.TypeMem, base, s.mem(), !ir.IsAutoTmp(base))
}
@@ -5389,7 +5389,8 @@ func (s *state) call(n *ir.CallExpr, k callKind, returnResultAddr bool, deferExt
}
// Make a defer struct on the stack.
t := deferstruct()
- _, addr := s.temp(n.Pos(), t)
+ n, addr := s.temp(n.Pos(), t)
+ n.SetNonMergeable(true)
s.store(closure.Type,
s.newValue1I(ssa.OpOffPtr, closure.Type.PtrTo(), t.FieldOff(deferStructFnField), addr),
closure)
@@ -6893,7 +6894,7 @@ func (s *state) dottype1(pos src.XPos, src, dst *types.Type, iface, source, targ
// temp allocates a temp of type t at position pos
func (s *state) temp(pos src.XPos, t *types.Type) (*ir.Name, *ssa.Value) {
tmp := typecheck.TempAt(pos, s.curfn, t)
- if t.HasPointers() {
+ if t.HasPointers() || (ssa.IsMergeCandidate(tmp) && t != deferstruct()) {
s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, tmp, s.mem())
}
addr := s.addr(tmp)
diff --git a/src/cmd/compile/internal/test/mergelocals_test.go b/src/cmd/compile/internal/test/mergelocals_test.go
new file mode 100644
index 0000000000..f070197c80
--- /dev/null
+++ b/src/cmd/compile/internal/test/mergelocals_test.go
@@ -0,0 +1,184 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+ "cmd/compile/internal/ir"
+ "cmd/compile/internal/liveness"
+ "cmd/compile/internal/typecheck"
+ "cmd/compile/internal/types"
+ "cmd/internal/src"
+ "internal/testenv"
+ "path/filepath"
+ "slices"
+ "sort"
+ "strings"
+ "testing"
+)
+
+func TestMergeLocalState(t *testing.T) {
+ mkiv := func(name string) *ir.Name {
+ i32 := types.Types[types.TINT32]
+ s := typecheck.Lookup(name)
+ v := ir.NewNameAt(src.NoXPos, s, i32)
+ return v
+ }
+ v1 := mkiv("v1")
+ v2 := mkiv("v2")
+ v3 := mkiv("v3")
+
+ testcases := []struct {
+ vars []*ir.Name
+ partition map[*ir.Name][]int
+ experr bool
+ }{
+ {
+ vars: []*ir.Name{v1, v2, v3},
+ partition: map[*ir.Name][]int{
+ v1: []int{0, 1, 2},
+ v2: []int{0, 1, 2},
+ v3: []int{0, 1, 2},
+ },
+ experr: false,
+ },
+ {
+ // invalid mls.v slot -1
+ vars: []*ir.Name{v1, v2, v3},
+ partition: map[*ir.Name][]int{
+ v1: []int{-1, 0},
+ v2: []int{0, 1, 2},
+ v3: []int{0, 1, 2},
+ },
+ experr: true,
+ },
+ {
+ // duplicate var in v
+ vars: []*ir.Name{v1, v2, v2},
+ partition: map[*ir.Name][]int{
+ v1: []int{0, 1, 2},
+ v2: []int{0, 1, 2},
+ v3: []int{0, 1, 2},
+ },
+ experr: true,
+ },
+ {
+ // single element in partition
+ vars: []*ir.Name{v1, v2, v3},
+ partition: map[*ir.Name][]int{
+ v1: []int{0},
+ v2: []int{0, 1, 2},
+ v3: []int{0, 1, 2},
+ },
+ experr: true,
+ },
+ {
+ // missing element 2
+ vars: []*ir.Name{v1, v2, v3},
+ partition: map[*ir.Name][]int{
+ v1: []int{0, 1},
+ v2: []int{0, 1},
+ v3: []int{0, 1},
+ },
+ experr: true,
+ },
+ {
+ // partitions disagree for v1 vs v2
+ vars: []*ir.Name{v1, v2, v3},
+ partition: map[*ir.Name][]int{
+ v1: []int{0, 1, 2},
+ v2: []int{1, 0, 2},
+ v3: []int{0, 1, 2},
+ },
+ experr: true,
+ },
+ }
+
+ for k, testcase := range testcases {
+ mls, err := liveness.MakeMergeLocalsState(testcase.partition, testcase.vars)
+ t.Logf("tc %d err is %v\n", k, err)
+ if testcase.experr && err == nil {
+ t.Fatalf("tc:%d missing error mls %v", k, mls)
+ } else if !testcase.experr && err != nil {
+ t.Fatalf("tc:%d unexpected error mls %v", k, err)
+ }
+ if mls != nil {
+ t.Logf("tc %d: mls: %v\n", k, mls.String())
+ }
+ }
+}
+
+func TestMergeLocalsIntegration(t *testing.T) {
+ testenv.MustHaveGoBuild(t)
+
+ // This test does a build of a specific canned package to
+ // check whether merging of stack slots is taking place.
+ // The idea is to do the compile with a trace option turned
+ // on and then pick up on the frame offsets of specific
+ // variables.
+ //
+ // Stack slot merging is a greedy algorithm, and there can
+ // be many possible ways to overlap a given set of candidate
+ // variables, all of them legal. Rather than locking down
+ // a specific set of overlappings or frame offsets, this
+ // tests just verifies that there is one clump of 3 vars that
+ // get overlapped, then another clump of 2 that share the same
+ // frame offset.
+ //
+ // The expected output blob we're interested in looks like this:
+ //
+ // =-= stack layout for ABC:
+ // 2: "p1" frameoff -8200 used=true
+ // 3: "xp3" frameoff -8200 used=true
+ // 4: "xp4" frameoff -8200 used=true
+ // 5: "p2" frameoff -16400 used=true
+ // 6: "s" frameoff -24592 used=true
+ // 7: "v1" frameoff -32792 used=true
+ // 8: "v3" frameoff -32792 used=true
+ // 9: "v2" frameoff -40992 used=true
+ //
+ tmpdir := t.TempDir()
+ src := filepath.Join("testdata", "mergelocals", "integration.go")
+ obj := filepath.Join(tmpdir, "p.a")
+ out, err := testenv.Command(t, testenv.GoToolPath(t), "tool", "compile", "-p=p", "-c", "1", "-o", obj, "-d=mergelocalstrace=2,mergelocals=1", src).CombinedOutput()
+ if err != nil {
+ t.Fatalf("failed to compile: %v\n%s", err, out)
+ }
+ vars := make(map[string]string)
+ lines := strings.Split(string(out), "\n")
+ prolog := true
+ varsAtFrameOffset := make(map[string]int)
+ for _, line := range lines {
+ if line == "=-= stack layout for ABC:" {
+ prolog = false
+ continue
+ } else if prolog || line == "" {
+ continue
+ }
+ fields := strings.Fields(line)
+ if len(fields) != 5 {
+ t.Fatalf("bad trace output line: %s", line)
+ }
+ vname := fields[1]
+ frameoff := fields[3]
+ varsAtFrameOffset[frameoff] = varsAtFrameOffset[frameoff] + 1
+ vars[vname] = frameoff
+ }
+ wantvnum := 8
+ gotvnum := len(vars)
+ if wantvnum != gotvnum {
+ t.Fatalf("expected trace output on %d vars got %d\n", wantvnum, gotvnum)
+ }
+
+ // We expect one clump of 3, another clump of 2, and the rest singletons.
+ expected := []int{1, 1, 1, 2, 3}
+ got := []int{}
+ for _, v := range varsAtFrameOffset {
+ got = append(got, v)
+ }
+ sort.Ints(got)
+ if !slices.Equal(got, expected) {
+ t.Fatalf("expected variable clumps %+v not equal to what we got: %+v", expected, got)
+ }
+}
diff --git a/src/cmd/compile/internal/test/testdata/mergelocals/integration.go b/src/cmd/compile/internal/test/testdata/mergelocals/integration.go
new file mode 100644
index 0000000000..d640c6fce8
--- /dev/null
+++ b/src/cmd/compile/internal/test/testdata/mergelocals/integration.go
@@ -0,0 +1,83 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package p
+
+// This type and the following one will share the same GC shape and size.
+type Pointery struct {
+ p *Pointery
+ x [1024]int
+}
+
+type Pointery2 struct {
+ p *Pointery2
+ x [1024]int
+}
+
+// This type and the following one will have the same size.
+type Vanilla struct {
+ np uintptr
+ x [1024]int
+}
+
+type Vanilla2 struct {
+ np uintptr
+ x [1023]int
+ y int
+}
+
+type Single struct {
+ np uintptr
+ x [1023]int
+}
+
+func ABC(i, j int) int {
+ r := 0
+
+ // here v1 interferes with v2 but could be overlapped with v3.
+ // we can also overlap v1 with v3.
+ var v1 Vanilla
+ if i < 101 {
+ var v2 Vanilla
+ v1.x[i] = j
+ r += v1.x[j]
+ v2.x[i] = j
+ r += v2.x[j]
+ }
+
+ {
+ var v3 Vanilla2
+ v3.x[i] = j
+ r += v3.x[j]
+ }
+
+ var s Single
+ s.x[i] = j
+ r += s.x[j]
+
+ // Here p1 and p2 interfere, but p1 could be overlapped with xp3.
+ var p1, p2 Pointery
+ p1.x[i] = j
+ r += p1.x[j]
+ p2.x[i] = j
+ r += p2.x[j]
+ {
+ var xp3 Pointery2
+ xp3.x[i] = j
+ r += xp3.x[j]
+ }
+
+ if i == j*2 {
+ // p2 live on this path
+ p2.x[i] += j
+ r += p2.x[j]
+ } else {
+ // p2 not live on this path
+ var xp4 Pointery2
+ xp4.x[i] = j
+ r += xp4.x[j]
+ }
+
+ return r
+}
diff --git a/src/cmd/compile/internal/walk/temp.go b/src/cmd/compile/internal/walk/temp.go
index 886b5beec3..604ac17367 100644
--- a/src/cmd/compile/internal/walk/temp.go
+++ b/src/cmd/compile/internal/walk/temp.go
@@ -25,7 +25,9 @@ func initStackTemp(init *ir.Nodes, tmp *ir.Name, val ir.Node) *ir.AddrExpr {
// allocated temporary variable of the given type. Statements to
// zero-initialize tmp are appended to init.
func stackTempAddr(init *ir.Nodes, typ *types.Type) *ir.AddrExpr {
- return initStackTemp(init, typecheck.TempAt(base.Pos, ir.CurFunc, typ), nil)
+ n := typecheck.TempAt(base.Pos, ir.CurFunc, typ)
+ n.SetNonMergeable(true)
+ return initStackTemp(init, n, nil)
}
// stackBufAddr returns the expression &tmp, where tmp is a newly
diff --git a/test/fixedbugs/bug385_64.go b/test/fixedbugs/bug385_64.go
index 3240960f1a..deba9c9fae 100644
--- a/test/fixedbugs/bug385_64.go
+++ b/test/fixedbugs/bug385_64.go
@@ -11,214 +11,423 @@
package main
-var z [10<<20]byte
+var z [10 << 20]byte
func main() { // GC_ERROR "stack frame too large"
- // seq 1 206 | sed 's/.*/ var x& [10<<20]byte; z = x&/'
- var x1 [10<<20]byte; z = x1
- var x2 [10<<20]byte; z = x2
- var x3 [10<<20]byte; z = x3
- var x4 [10<<20]byte; z = x4
- var x5 [10<<20]byte; z = x5
- var x6 [10<<20]byte; z = x6
- var x7 [10<<20]byte; z = x7
- var x8 [10<<20]byte; z = x8
- var x9 [10<<20]byte; z = x9
- var x10 [10<<20]byte; z = x10
- var x11 [10<<20]byte; z = x11
- var x12 [10<<20]byte; z = x12
- var x13 [10<<20]byte; z = x13
- var x14 [10<<20]byte; z = x14
- var x15 [10<<20]byte; z = x15
- var x16 [10<<20]byte; z = x16
- var x17 [10<<20]byte; z = x17
- var x18 [10<<20]byte; z = x18
- var x19 [10<<20]byte; z = x19
- var x20 [10<<20]byte; z = x20
- var x21 [10<<20]byte; z = x21
- var x22 [10<<20]byte; z = x22
- var x23 [10<<20]byte; z = x23
- var x24 [10<<20]byte; z = x24
- var x25 [10<<20]byte; z = x25
- var x26 [10<<20]byte; z = x26
- var x27 [10<<20]byte; z = x27
- var x28 [10<<20]byte; z = x28
- var x29 [10<<20]byte; z = x29
- var x30 [10<<20]byte; z = x30
- var x31 [10<<20]byte; z = x31
- var x32 [10<<20]byte; z = x32
- var x33 [10<<20]byte; z = x33
- var x34 [10<<20]byte; z = x34
- var x35 [10<<20]byte; z = x35
- var x36 [10<<20]byte; z = x36
- var x37 [10<<20]byte; z = x37
- var x38 [10<<20]byte; z = x38
- var x39 [10<<20]byte; z = x39
- var x40 [10<<20]byte; z = x40
- var x41 [10<<20]byte; z = x41
- var x42 [10<<20]byte; z = x42
- var x43 [10<<20]byte; z = x43
- var x44 [10<<20]byte; z = x44
- var x45 [10<<20]byte; z = x45
- var x46 [10<<20]byte; z = x46
- var x47 [10<<20]byte; z = x47
- var x48 [10<<20]byte; z = x48
- var x49 [10<<20]byte; z = x49
- var x50 [10<<20]byte; z = x50
- var x51 [10<<20]byte; z = x51
- var x52 [10<<20]byte; z = x52
- var x53 [10<<20]byte; z = x53
- var x54 [10<<20]byte; z = x54
- var x55 [10<<20]byte; z = x55
- var x56 [10<<20]byte; z = x56
- var x57 [10<<20]byte; z = x57
- var x58 [10<<20]byte; z = x58
- var x59 [10<<20]byte; z = x59
- var x60 [10<<20]byte; z = x60
- var x61 [10<<20]byte; z = x61
- var x62 [10<<20]byte; z = x62
- var x63 [10<<20]byte; z = x63
- var x64 [10<<20]byte; z = x64
- var x65 [10<<20]byte; z = x65
- var x66 [10<<20]byte; z = x66
- var x67 [10<<20]byte; z = x67
- var x68 [10<<20]byte; z = x68
- var x69 [10<<20]byte; z = x69
- var x70 [10<<20]byte; z = x70
- var x71 [10<<20]byte; z = x71
- var x72 [10<<20]byte; z = x72
- var x73 [10<<20]byte; z = x73
- var x74 [10<<20]byte; z = x74
- var x75 [10<<20]byte; z = x75
- var x76 [10<<20]byte; z = x76
- var x77 [10<<20]byte; z = x77
- var x78 [10<<20]byte; z = x78
- var x79 [10<<20]byte; z = x79
- var x80 [10<<20]byte; z = x80
- var x81 [10<<20]byte; z = x81
- var x82 [10<<20]byte; z = x82
- var x83 [10<<20]byte; z = x83
- var x84 [10<<20]byte; z = x84
- var x85 [10<<20]byte; z = x85
- var x86 [10<<20]byte; z = x86
- var x87 [10<<20]byte; z = x87
- var x88 [10<<20]byte; z = x88
- var x89 [10<<20]byte; z = x89
- var x90 [10<<20]byte; z = x90
- var x91 [10<<20]byte; z = x91
- var x92 [10<<20]byte; z = x92
- var x93 [10<<20]byte; z = x93
- var x94 [10<<20]byte; z = x94
- var x95 [10<<20]byte; z = x95
- var x96 [10<<20]byte; z = x96
- var x97 [10<<20]byte; z = x97
- var x98 [10<<20]byte; z = x98
- var x99 [10<<20]byte; z = x99
- var x100 [10<<20]byte; z = x100
- var x101 [10<<20]byte; z = x101
- var x102 [10<<20]byte; z = x102
- var x103 [10<<20]byte; z = x103
- var x104 [10<<20]byte; z = x104
- var x105 [10<<20]byte; z = x105
- var x106 [10<<20]byte; z = x106
- var x107 [10<<20]byte; z = x107
- var x108 [10<<20]byte; z = x108
- var x109 [10<<20]byte; z = x109
- var x110 [10<<20]byte; z = x110
- var x111 [10<<20]byte; z = x111
- var x112 [10<<20]byte; z = x112
- var x113 [10<<20]byte; z = x113
- var x114 [10<<20]byte; z = x114
- var x115 [10<<20]byte; z = x115
- var x116 [10<<20]byte; z = x116
- var x117 [10<<20]byte; z = x117
- var x118 [10<<20]byte; z = x118
- var x119 [10<<20]byte; z = x119
- var x120 [10<<20]byte; z = x120
- var x121 [10<<20]byte; z = x121
- var x122 [10<<20]byte; z = x122
- var x123 [10<<20]byte; z = x123
- var x124 [10<<20]byte; z = x124
- var x125 [10<<20]byte; z = x125
- var x126 [10<<20]byte; z = x126
- var x127 [10<<20]byte; z = x127
- var x128 [10<<20]byte; z = x128
- var x129 [10<<20]byte; z = x129
- var x130 [10<<20]byte; z = x130
- var x131 [10<<20]byte; z = x131
- var x132 [10<<20]byte; z = x132
- var x133 [10<<20]byte; z = x133
- var x134 [10<<20]byte; z = x134
- var x135 [10<<20]byte; z = x135
- var x136 [10<<20]byte; z = x136
- var x137 [10<<20]byte; z = x137
- var x138 [10<<20]byte; z = x138
- var x139 [10<<20]byte; z = x139
- var x140 [10<<20]byte; z = x140
- var x141 [10<<20]byte; z = x141
- var x142 [10<<20]byte; z = x142
- var x143 [10<<20]byte; z = x143
- var x144 [10<<20]byte; z = x144
- var x145 [10<<20]byte; z = x145
- var x146 [10<<20]byte; z = x146
- var x147 [10<<20]byte; z = x147
- var x148 [10<<20]byte; z = x148
- var x149 [10<<20]byte; z = x149
- var x150 [10<<20]byte; z = x150
- var x151 [10<<20]byte; z = x151
- var x152 [10<<20]byte; z = x152
- var x153 [10<<20]byte; z = x153
- var x154 [10<<20]byte; z = x154
- var x155 [10<<20]byte; z = x155
- var x156 [10<<20]byte; z = x156
- var x157 [10<<20]byte; z = x157
- var x158 [10<<20]byte; z = x158
- var x159 [10<<20]byte; z = x159
- var x160 [10<<20]byte; z = x160
- var x161 [10<<20]byte; z = x161
- var x162 [10<<20]byte; z = x162
- var x163 [10<<20]byte; z = x163
- var x164 [10<<20]byte; z = x164
- var x165 [10<<20]byte; z = x165
- var x166 [10<<20]byte; z = x166
- var x167 [10<<20]byte; z = x167
- var x168 [10<<20]byte; z = x168
- var x169 [10<<20]byte; z = x169
- var x170 [10<<20]byte; z = x170
- var x171 [10<<20]byte; z = x171
- var x172 [10<<20]byte; z = x172
- var x173 [10<<20]byte; z = x173
- var x174 [10<<20]byte; z = x174
- var x175 [10<<20]byte; z = x175
- var x176 [10<<20]byte; z = x176
- var x177 [10<<20]byte; z = x177
- var x178 [10<<20]byte; z = x178
- var x179 [10<<20]byte; z = x179
- var x180 [10<<20]byte; z = x180
- var x181 [10<<20]byte; z = x181
- var x182 [10<<20]byte; z = x182
- var x183 [10<<20]byte; z = x183
- var x184 [10<<20]byte; z = x184
- var x185 [10<<20]byte; z = x185
- var x186 [10<<20]byte; z = x186
- var x187 [10<<20]byte; z = x187
- var x188 [10<<20]byte; z = x188
- var x189 [10<<20]byte; z = x189
- var x190 [10<<20]byte; z = x190
- var x191 [10<<20]byte; z = x191
- var x192 [10<<20]byte; z = x192
- var x193 [10<<20]byte; z = x193
- var x194 [10<<20]byte; z = x194
- var x195 [10<<20]byte; z = x195
- var x196 [10<<20]byte; z = x196
- var x197 [10<<20]byte; z = x197
- var x198 [10<<20]byte; z = x198
- var x199 [10<<20]byte; z = x199
- var x200 [10<<20]byte; z = x200
- var x201 [10<<20]byte; z = x201
- var x202 [10<<20]byte; z = x202
- var x203 [10<<20]byte; z = x203
- var x204 [10<<20]byte; z = x204
- var x205 [10<<20]byte; z = x205
- var x206 [10<<20]byte; z = x206
+ // seq 1 206 | sed 's/.*/ var x& [10<<20]byte/'
+ // seq 1 206 | sed 's/.*/ z = x&/'
+ var x1 [10<<20]byte
+ var x2 [10<<20]byte
+ var x3 [10<<20]byte
+ var x4 [10<<20]byte
+ var x5 [10<<20]byte
+ var x6 [10<<20]byte
+ var x7 [10<<20]byte
+ var x8 [10<<20]byte
+ var x9 [10<<20]byte
+ var x10 [10<<20]byte
+ var x11 [10<<20]byte
+ var x12 [10<<20]byte
+ var x13 [10<<20]byte
+ var x14 [10<<20]byte
+ var x15 [10<<20]byte
+ var x16 [10<<20]byte
+ var x17 [10<<20]byte
+ var x18 [10<<20]byte
+ var x19 [10<<20]byte
+ var x20 [10<<20]byte
+ var x21 [10<<20]byte
+ var x22 [10<<20]byte
+ var x23 [10<<20]byte
+ var x24 [10<<20]byte
+ var x25 [10<<20]byte
+ var x26 [10<<20]byte
+ var x27 [10<<20]byte
+ var x28 [10<<20]byte
+ var x29 [10<<20]byte
+ var x30 [10<<20]byte
+ var x31 [10<<20]byte
+ var x32 [10<<20]byte
+ var x33 [10<<20]byte
+ var x34 [10<<20]byte
+ var x35 [10<<20]byte
+ var x36 [10<<20]byte
+ var x37 [10<<20]byte
+ var x38 [10<<20]byte
+ var x39 [10<<20]byte
+ var x40 [10<<20]byte
+ var x41 [10<<20]byte
+ var x42 [10<<20]byte
+ var x43 [10<<20]byte
+ var x44 [10<<20]byte
+ var x45 [10<<20]byte
+ var x46 [10<<20]byte
+ var x47 [10<<20]byte
+ var x48 [10<<20]byte
+ var x49 [10<<20]byte
+ var x50 [10<<20]byte
+ var x51 [10<<20]byte
+ var x52 [10<<20]byte
+ var x53 [10<<20]byte
+ var x54 [10<<20]byte
+ var x55 [10<<20]byte
+ var x56 [10<<20]byte
+ var x57 [10<<20]byte
+ var x58 [10<<20]byte
+ var x59 [10<<20]byte
+ var x60 [10<<20]byte
+ var x61 [10<<20]byte
+ var x62 [10<<20]byte
+ var x63 [10<<20]byte
+ var x64 [10<<20]byte
+ var x65 [10<<20]byte
+ var x66 [10<<20]byte
+ var x67 [10<<20]byte
+ var x68 [10<<20]byte
+ var x69 [10<<20]byte
+ var x70 [10<<20]byte
+ var x71 [10<<20]byte
+ var x72 [10<<20]byte
+ var x73 [10<<20]byte
+ var x74 [10<<20]byte
+ var x75 [10<<20]byte
+ var x76 [10<<20]byte
+ var x77 [10<<20]byte
+ var x78 [10<<20]byte
+ var x79 [10<<20]byte
+ var x80 [10<<20]byte
+ var x81 [10<<20]byte
+ var x82 [10<<20]byte
+ var x83 [10<<20]byte
+ var x84 [10<<20]byte
+ var x85 [10<<20]byte
+ var x86 [10<<20]byte
+ var x87 [10<<20]byte
+ var x88 [10<<20]byte
+ var x89 [10<<20]byte
+ var x90 [10<<20]byte
+ var x91 [10<<20]byte
+ var x92 [10<<20]byte
+ var x93 [10<<20]byte
+ var x94 [10<<20]byte
+ var x95 [10<<20]byte
+ var x96 [10<<20]byte
+ var x97 [10<<20]byte
+ var x98 [10<<20]byte
+ var x99 [10<<20]byte
+ var x100 [10<<20]byte
+ var x101 [10<<20]byte
+ var x102 [10<<20]byte
+ var x103 [10<<20]byte
+ var x104 [10<<20]byte
+ var x105 [10<<20]byte
+ var x106 [10<<20]byte
+ var x107 [10<<20]byte
+ var x108 [10<<20]byte
+ var x109 [10<<20]byte
+ var x110 [10<<20]byte
+ var x111 [10<<20]byte
+ var x112 [10<<20]byte
+ var x113 [10<<20]byte
+ var x114 [10<<20]byte
+ var x115 [10<<20]byte
+ var x116 [10<<20]byte
+ var x117 [10<<20]byte
+ var x118 [10<<20]byte
+ var x119 [10<<20]byte
+ var x120 [10<<20]byte
+ var x121 [10<<20]byte
+ var x122 [10<<20]byte
+ var x123 [10<<20]byte
+ var x124 [10<<20]byte
+ var x125 [10<<20]byte
+ var x126 [10<<20]byte
+ var x127 [10<<20]byte
+ var x128 [10<<20]byte
+ var x129 [10<<20]byte
+ var x130 [10<<20]byte
+ var x131 [10<<20]byte
+ var x132 [10<<20]byte
+ var x133 [10<<20]byte
+ var x134 [10<<20]byte
+ var x135 [10<<20]byte
+ var x136 [10<<20]byte
+ var x137 [10<<20]byte
+ var x138 [10<<20]byte
+ var x139 [10<<20]byte
+ var x140 [10<<20]byte
+ var x141 [10<<20]byte
+ var x142 [10<<20]byte
+ var x143 [10<<20]byte
+ var x144 [10<<20]byte
+ var x145 [10<<20]byte
+ var x146 [10<<20]byte
+ var x147 [10<<20]byte
+ var x148 [10<<20]byte
+ var x149 [10<<20]byte
+ var x150 [10<<20]byte
+ var x151 [10<<20]byte
+ var x152 [10<<20]byte
+ var x153 [10<<20]byte
+ var x154 [10<<20]byte
+ var x155 [10<<20]byte
+ var x156 [10<<20]byte
+ var x157 [10<<20]byte
+ var x158 [10<<20]byte
+ var x159 [10<<20]byte
+ var x160 [10<<20]byte
+ var x161 [10<<20]byte
+ var x162 [10<<20]byte
+ var x163 [10<<20]byte
+ var x164 [10<<20]byte
+ var x165 [10<<20]byte
+ var x166 [10<<20]byte
+ var x167 [10<<20]byte
+ var x168 [10<<20]byte
+ var x169 [10<<20]byte
+ var x170 [10<<20]byte
+ var x171 [10<<20]byte
+ var x172 [10<<20]byte
+ var x173 [10<<20]byte
+ var x174 [10<<20]byte
+ var x175 [10<<20]byte
+ var x176 [10<<20]byte
+ var x177 [10<<20]byte
+ var x178 [10<<20]byte
+ var x179 [10<<20]byte
+ var x180 [10<<20]byte
+ var x181 [10<<20]byte
+ var x182 [10<<20]byte
+ var x183 [10<<20]byte
+ var x184 [10<<20]byte
+ var x185 [10<<20]byte
+ var x186 [10<<20]byte
+ var x187 [10<<20]byte
+ var x188 [10<<20]byte
+ var x189 [10<<20]byte
+ var x190 [10<<20]byte
+ var x191 [10<<20]byte
+ var x192 [10<<20]byte
+ var x193 [10<<20]byte
+ var x194 [10<<20]byte
+ var x195 [10<<20]byte
+ var x196 [10<<20]byte
+ var x197 [10<<20]byte
+ var x198 [10<<20]byte
+ var x199 [10<<20]byte
+ var x200 [10<<20]byte
+ var x201 [10<<20]byte
+ var x202 [10<<20]byte
+ var x203 [10<<20]byte
+ var x204 [10<<20]byte
+ var x205 [10<<20]byte
+ var x206 [10<<20]byte
+ var x207 [10<<20]byte
+ z = x1
+ z = x2
+ z = x3
+ z = x4
+ z = x5
+ z = x6
+ z = x7
+ z = x8
+ z = x9
+ z = x10
+ z = x11
+ z = x12
+ z = x13
+ z = x14
+ z = x15
+ z = x16
+ z = x17
+ z = x18
+ z = x19
+ z = x20
+ z = x21
+ z = x22
+ z = x23
+ z = x24
+ z = x25
+ z = x26
+ z = x27
+ z = x28
+ z = x29
+ z = x30
+ z = x31
+ z = x32
+ z = x33
+ z = x34
+ z = x35
+ z = x36
+ z = x37
+ z = x38
+ z = x39
+ z = x40
+ z = x41
+ z = x42
+ z = x43
+ z = x44
+ z = x45
+ z = x46
+ z = x47
+ z = x48
+ z = x49
+ z = x50
+ z = x51
+ z = x52
+ z = x53
+ z = x54
+ z = x55
+ z = x56
+ z = x57
+ z = x58
+ z = x59
+ z = x60
+ z = x61
+ z = x62
+ z = x63
+ z = x64
+ z = x65
+ z = x66
+ z = x67
+ z = x68
+ z = x69
+ z = x70
+ z = x71
+ z = x72
+ z = x73
+ z = x74
+ z = x75
+ z = x76
+ z = x77
+ z = x78
+ z = x79
+ z = x80
+ z = x81
+ z = x82
+ z = x83
+ z = x84
+ z = x85
+ z = x86
+ z = x87
+ z = x88
+ z = x89
+ z = x90
+ z = x91
+ z = x92
+ z = x93
+ z = x94
+ z = x95
+ z = x96
+ z = x97
+ z = x98
+ z = x99
+ z = x100
+ z = x101
+ z = x102
+ z = x103
+ z = x104
+ z = x105
+ z = x106
+ z = x107
+ z = x108
+ z = x109
+ z = x110
+ z = x111
+ z = x112
+ z = x113
+ z = x114
+ z = x115
+ z = x116
+ z = x117
+ z = x118
+ z = x119
+ z = x120
+ z = x121
+ z = x122
+ z = x123
+ z = x124
+ z = x125
+ z = x126
+ z = x127
+ z = x128
+ z = x129
+ z = x130
+ z = x131
+ z = x132
+ z = x133
+ z = x134
+ z = x135
+ z = x136
+ z = x137
+ z = x138
+ z = x139
+ z = x140
+ z = x141
+ z = x142
+ z = x143
+ z = x144
+ z = x145
+ z = x146
+ z = x147
+ z = x148
+ z = x149
+ z = x150
+ z = x151
+ z = x152
+ z = x153
+ z = x154
+ z = x155
+ z = x156
+ z = x157
+ z = x158
+ z = x159
+ z = x160
+ z = x161
+ z = x162
+ z = x163
+ z = x164
+ z = x165
+ z = x166
+ z = x167
+ z = x168
+ z = x169
+ z = x170
+ z = x171
+ z = x172
+ z = x173
+ z = x174
+ z = x175
+ z = x176
+ z = x177
+ z = x178
+ z = x179
+ z = x180
+ z = x181
+ z = x182
+ z = x183
+ z = x184
+ z = x185
+ z = x186
+ z = x187
+ z = x188
+ z = x189
+ z = x190
+ z = x191
+ z = x192
+ z = x193
+ z = x194
+ z = x195
+ z = x196
+ z = x197
+ z = x198
+ z = x199
+ z = x200
+ z = x201
+ z = x202
+ z = x203
+ z = x204
+ z = x205
+ z = x206
+ z = x207
}