cmd/compile: fix OpArg decomposer for registers in expandCalls

Includes test taken from https://github.com/golang/go/issues/44816#issuecomment-791618179 and improved debugging output. Updates #44816 Change-Id: I94aeb9c5255f175fe80727be29d218bad54bf7ea Reviewed-on: https://go-review.googlesource.com/c/go/+/299389 Trust: David Chase <drchase@google.com> Reviewed-by: Cherry Zhang <cherryyz@google.com>
author: David Chase <drchase@google.com> 2021-03-05 19:56:13 -0500
committer: David Chase <drchase@google.com> 2021-03-09 18:43:58 +0000
commit: 5eb99120844c0494d655678262e1fb41949a2b99 (patch)
tree: 490056b7db3ec8a587281577b3e6e84c60916bad /src/cmd/compile/internal/ssa/expand_calls.go
parent: 48895d021bf631f15d68ecc10cab89ebd9cb28f6 (diff)
download: go-5eb99120844c0494d655678262e1fb41949a2b99.tar.gz
go-5eb99120844c0494d655678262e1fb41949a2b99.zip
1 files changed, 127 insertions, 35 deletions
diff --git a/src/cmd/compile/internal/ssa/expand_calls.go b/src/cmd/compile/internal/ssa/expand_calls.go
index df135853fe..516ea42db9 100644
--- a/src/cmd/compile/internal/ssa/expand_calls.go
+++ b/src/cmd/compile/internal/ssa/expand_calls.go
@@ -78,7 +78,8 @@ func (rc *registerCursor) String() string {
 			regs = regs + x.LongString()
 		}
 	}
-	return fmt.Sprintf("RCSR{storeDest=%v, regsLen=%d, nextSlice=%d, regValues=[%s], config=%v", dest, rc.regsLen, rc.nextSlice, regs, rc.config)
+	// not printing the config because that has not been useful
+	return fmt.Sprintf("RCSR{storeDest=%v, regsLen=%d, nextSlice=%d, regValues=[%s]}", dest, rc.regsLen, rc.nextSlice, regs)
 }
 
 // next effectively post-increments the register cursor; the receiver is advanced,
@@ -189,6 +190,7 @@ type expandState struct {
 	commonSelectors map[selKey]*Value // used to de-dupe selectors
 	commonArgs      map[selKey]*Value // used to de-dupe OpArg/OpArgIntReg/OpArgFloatReg
 	memForCall      map[ID]*Value     // For a call, need to know the unique selector that gets the mem.
+	indentLevel     int               // Indentation for debugging recursion
 }
 
 // intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target
@@ -267,6 +269,19 @@ func ParamAssignmentForArgName(f *Func, name *ir.Name) *abi.ABIParamAssignment {
 	panic(fmt.Errorf("Did not match param %v in prInfo %+v", name, abiInfo.InParams()))
 }
 
+// indent increments (or decrements) the indentation.
+func (x *expandState) indent(n int) {
+	x.indentLevel += n
+}
+
+// Printf does an indented fmt.Printf on te format and args.
+func (x *expandState) Printf(format string, a ...interface{}) (n int, err error) {
+	if x.indentLevel > 0 {
+		fmt.Printf("%[1]*s", x.indentLevel, "")
+	}
+	return fmt.Printf(format, a...)
+}
+
 // Calls that need lowering have some number of inputs, including a memory input,
 // and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
 
@@ -286,7 +301,9 @@ func ParamAssignmentForArgName(f *Func, name *ir.Name) *abi.ABIParamAssignment {
 // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
 func (x *expandState) rewriteSelect(leaf *Value, selector *Value, offset int64, regOffset Abi1RO) []LocalSlot {
 	if x.debug {
-		fmt.Printf("rewriteSelect(%s, %s, %d)\n", leaf.LongString(), selector.LongString(), offset)
+		x.indent(3)
+		defer x.indent(-3)
+		x.Printf("rewriteSelect(%s, %s, %d)\n", leaf.LongString(), selector.LongString(), offset)
 	}
 	var locs []LocalSlot
 	leafType := leaf.Type
@@ -308,7 +325,7 @@ func (x *expandState) rewriteSelect(leaf *Value, selector *Value, offset int64,
 				x.f.Fatalf("Unexpected OpArg type, selector=%s, leaf=%s\n", selector.LongString(), leaf.LongString())
 			}
 			if x.debug {
-				fmt.Printf("\tOpArg, break\n")
+				x.Printf("---OpArg, break\n")
 			}
 			break
 		}
@@ -427,7 +444,7 @@ func (x *expandState) rewriteSelect(leaf *Value, selector *Value, offset int64,
 						w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call)
 						leaf.copyOf(w)
 						if x.debug {
-							fmt.Printf("\tnew %s\n", w.LongString())
+							x.Printf("---new %s\n", w.LongString())
 						}
 					}
 				}
@@ -539,9 +556,86 @@ func (x *expandState) rewriteDereference(b *Block, base, a, mem *Value, offset,
 	return mem
 }
 
-// decomposeArgOrLoad is a helper for storeArgOrLoad.
-// It decomposes a Load or an Arg into smaller parts, parameterized by the decomposeOne and decomposeTwo functions
-// passed to it, and returns the new mem.
+// decomposeArg is a helper for storeArgOrLoad.
+// It decomposes a Load or an Arg into smaller parts and returns the new mem.
+// If the type does not match one of the expected aggregate types, it returns nil instead.
+// Parameters:
+//  pos           -- the location of any generated code.
+//  b             -- the block into which any generated code should normally be placed
+//  source        -- the value, possibly an aggregate, to be stored.
+//  mem           -- the mem flowing into this decomposition (loads depend on it, stores updated it)
+//  t             -- the type of the value to be stored
+//  storeOffset   -- if the value is stored in memory, it is stored at base (see storeRc) + storeOffset
+//  loadRegOffset -- regarding source as a value in registers, the register offset in ABI1.  Meaningful only if source is OpArg.
+//  storeRc       -- storeRC; if the value is stored in registers, this specifies the registers.
+//                   StoreRc also identifies whether the target is registers or memory, and has the base for the store operation.
+func (x *expandState) decomposeArg(pos src.XPos, b *Block, source, mem *Value, t *types.Type, storeOffset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {
+
+	pa := x.prAssignForArg(source)
+	if len(pa.Registers) > 0 {
+		// Handle the in-registers case directly
+		rts, offs := pa.RegisterTypesAndOffsets()
+		last := loadRegOffset + x.regWidth(t)
+		if offs[loadRegOffset] != 0 {
+			panic(fmt.Errorf("offset %d of requested register %d should be zero", offs[loadRegOffset], loadRegOffset))
+		}
+		for i := loadRegOffset; i < last; i++ {
+			rt := rts[i]
+			off := offs[i]
+			w := x.commonArgs[selKey{source, off, rt.Width, rt}]
+			if w == nil {
+				w = x.newArgToMemOrRegs(source, w, off, i, rt, pos)
+			}
+			mem = x.storeArgOrLoad(pos, b, w, mem, rt, storeOffset+off, i, storeRc.next(rt))
+		}
+		return mem
+	}
+
+	u := source.Type
+	switch u.Kind() {
+	case types.TARRAY:
+		elem := u.Elem()
+		elemRO := x.regWidth(elem)
+		for i := int64(0); i < u.NumElem(); i++ {
+			elemOff := i * elem.Size()
+			mem = storeOneArg(x, pos, b, source, mem, elem, elemOff, storeOffset+elemOff, loadRegOffset, storeRc.next(elem))
+			loadRegOffset += elemRO
+			pos = pos.WithNotStmt()
+		}
+		return mem
+	case types.TSTRUCT:
+		for i := 0; i < u.NumFields(); i++ {
+			fld := u.Field(i)
+			mem = storeOneArg(x, pos, b, source, mem, fld.Type, fld.Offset, storeOffset+fld.Offset, loadRegOffset, storeRc.next(fld.Type))
+			loadRegOffset += x.regWidth(fld.Type)
+			pos = pos.WithNotStmt()
+		}
+		return mem
+	case types.TINT64, types.TUINT64:
+		if t.Width == x.regSize {
+			break
+		}
+		tHi, tLo := x.intPairTypes(t.Kind())
+		mem = storeOneArg(x, pos, b, source, mem, tHi, x.hiOffset, storeOffset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo))
+		pos = pos.WithNotStmt()
+		return storeOneArg(x, pos, b, source, mem, tLo, x.lowOffset, storeOffset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo))
+	case types.TINTER:
+		return storeTwoArg(x, pos, b, source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc)
+	case types.TSTRING:
+		return storeTwoArg(x, pos, b, source, mem, x.typs.BytePtr, x.typs.Int, 0, storeOffset, loadRegOffset, storeRc)
+	case types.TCOMPLEX64:
+		return storeTwoArg(x, pos, b, source, mem, x.typs.Float32, x.typs.Float32, 0, storeOffset, loadRegOffset, storeRc)
+	case types.TCOMPLEX128:
+		return storeTwoArg(x, pos, b, source, mem, x.typs.Float64, x.typs.Float64, 0, storeOffset, loadRegOffset, storeRc)
+	case types.TSLICE:
+		mem = storeOneArg(x, pos, b, source, mem, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc.next(x.typs.BytePtr))
+		return storeTwoArg(x, pos, b, source, mem, x.typs.Int, x.typs.Int, x.ptrSize, storeOffset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc)
+	}
+	return nil
+}
+
+// decomposeLoad is a helper for storeArgOrLoad.
+// It decomposes a Load  into smaller parts and returns the new mem.
 // If the type does not match one of the expected aggregate types, it returns nil instead.
 // Parameters:
 //  pos           -- the location of any generated code.
@@ -555,11 +649,7 @@ func (x *expandState) rewriteDereference(b *Block, base, a, mem *Value, offset,
 //                   StoreRc also identifies whether the target is registers or memory, and has the base for the store operation.
 //
 // TODO -- this needs cleanup; it just works for SSA-able aggregates, and won't fully generalize to register-args aggregates.
-func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, offset int64, loadRegOffset Abi1RO, storeRc registerCursor,
-	// For decompose One and Two, the additional offArg provides the offset from the beginning of "source", if it is in memory.
-	// offStore is combined to base to obtain a store destionation, like "offset" of decomposeArgOrLoad
-	decomposeOne func(x *expandState, pos src.XPos, b *Block, source, mem *Value, t1 *types.Type, offArg, offStore int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value,
-	decomposeTwo func(x *expandState, pos src.XPos, b *Block, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value) *Value {
+func (x *expandState) decomposeLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, offset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {
 	u := source.Type
 	switch u.Kind() {
 	case types.TARRAY:
@@ -567,7 +657,7 @@ func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Va
 		elemRO := x.regWidth(elem)
 		for i := int64(0); i < u.NumElem(); i++ {
 			elemOff := i * elem.Size()
-			mem = decomposeOne(x, pos, b, source, mem, elem, elemOff, offset+elemOff, loadRegOffset, storeRc.next(elem))
+			mem = storeOneLoad(x, pos, b, source, mem, elem, elemOff, offset+elemOff, loadRegOffset, storeRc.next(elem))
 			loadRegOffset += elemRO
 			pos = pos.WithNotStmt()
 		}
@@ -575,7 +665,7 @@ func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Va
 	case types.TSTRUCT:
 		for i := 0; i < u.NumFields(); i++ {
 			fld := u.Field(i)
-			mem = decomposeOne(x, pos, b, source, mem, fld.Type, fld.Offset, offset+fld.Offset, loadRegOffset, storeRc.next(fld.Type))
+			mem = storeOneLoad(x, pos, b, source, mem, fld.Type, fld.Offset, offset+fld.Offset, loadRegOffset, storeRc.next(fld.Type))
 			loadRegOffset += x.regWidth(fld.Type)
 			pos = pos.WithNotStmt()
 		}
@@ -585,20 +675,20 @@ func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Va
 			break
 		}
 		tHi, tLo := x.intPairTypes(t.Kind())
-		mem = decomposeOne(x, pos, b, source, mem, tHi, x.hiOffset, offset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo))
+		mem = storeOneLoad(x, pos, b, source, mem, tHi, x.hiOffset, offset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo))
 		pos = pos.WithNotStmt()
-		return decomposeOne(x, pos, b, source, mem, tLo, x.lowOffset, offset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo))
+		return storeOneLoad(x, pos, b, source, mem, tLo, x.lowOffset, offset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo))
 	case types.TINTER:
-		return decomposeTwo(x, pos, b, source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc)
+		return storeTwoLoad(x, pos, b, source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc)
 	case types.TSTRING:
-		return decomposeTwo(x, pos, b, source, mem, x.typs.BytePtr, x.typs.Int, 0, offset, loadRegOffset, storeRc)
+		return storeTwoLoad(x, pos, b, source, mem, x.typs.BytePtr, x.typs.Int, 0, offset, loadRegOffset, storeRc)
 	case types.TCOMPLEX64:
-		return decomposeTwo(x, pos, b, source, mem, x.typs.Float32, x.typs.Float32, 0, offset, loadRegOffset, storeRc)
+		return storeTwoLoad(x, pos, b, source, mem, x.typs.Float32, x.typs.Float32, 0, offset, loadRegOffset, storeRc)
 	case types.TCOMPLEX128:
-		return decomposeTwo(x, pos, b, source, mem, x.typs.Float64, x.typs.Float64, 0, offset, loadRegOffset, storeRc)
+		return storeTwoLoad(x, pos, b, source, mem, x.typs.Float64, x.typs.Float64, 0, offset, loadRegOffset, storeRc)
 	case types.TSLICE:
-		mem = decomposeOne(x, pos, b, source, mem, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc.next(x.typs.BytePtr))
-		return decomposeTwo(x, pos, b, source, mem, x.typs.Int, x.typs.Int, x.ptrSize, offset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc)
+		mem = storeOneLoad(x, pos, b, source, mem, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc.next(x.typs.BytePtr))
+		return storeTwoLoad(x, pos, b, source, mem, x.typs.Int, x.typs.Int, x.ptrSize, offset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc)
 	}
 	return nil
 }
@@ -642,7 +732,9 @@ func storeTwoLoad(x *expandState, pos src.XPos, b *Block, source, mem *Value, t1
 // If it does not reach a Load or an Arg, nothing happens; this allows a little freedom in phase ordering.
 func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, offset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {
 	if x.debug {
-		fmt.Printf("\tstoreArgOrLoad(%s;  %s;  %s; %d; %s)\n", source.LongString(), mem.String(), t.String(), offset, storeRc.String())
+		x.indent(3)
+		defer x.indent(-3)
+		x.Printf("storeArgOrLoad(%s;  %s;  %s; %d; %s)\n", source.LongString(), mem.String(), t.String(), offset, storeRc.String())
 	}
 
 	// Start with Opcodes that can be disassembled
@@ -651,13 +743,13 @@ func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value,
 		return x.storeArgOrLoad(pos, b, source.Args[0], mem, t, offset, loadRegOffset, storeRc)
 
 	case OpLoad, OpDereference:
-		ret := x.decomposeArgOrLoad(pos, b, source, mem, t, offset, loadRegOffset, storeRc, storeOneLoad, storeTwoLoad)
+		ret := x.decomposeLoad(pos, b, source, mem, t, offset, loadRegOffset, storeRc)
 		if ret != nil {
 			return ret
 		}
 
 	case OpArg:
-		ret := x.decomposeArgOrLoad(pos, b, source, mem, t, offset, loadRegOffset, storeRc, storeOneArg, storeTwoArg)
+		ret := x.decomposeArg(pos, b, source, mem, t, offset, loadRegOffset, storeRc)
 		if ret != nil {
 			return ret
 		}
@@ -823,7 +915,7 @@ func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value,
 		s = b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, source, mem)
 	}
 	if x.debug {
-		fmt.Printf("\t\tstoreArg returns %s, storeRc=%s\n", s.LongString(), storeRc.String())
+		x.Printf("-->storeArg returns %s, storeRc=%s\n", s.LongString(), storeRc.String())
 	}
 	return s
 }
@@ -860,7 +952,7 @@ func (x *expandState) rewriteArgs(v *Value, firstArg int) (*Value, []*Value) {
 				aOffset = aux.OffsetOfArg(auxI)
 			}
 			if x.debug {
-				fmt.Printf("storeArg %s, %v, %d\n", a.LongString(), aType, aOffset)
+				x.Printf("storeArg %s, %v, %d\n", a.LongString(), aType, aOffset)
 			}
 			rc.init(aRegs, aux.abiInfo, result, x.sp)
 			mem = x.storeArgOrLoad(pos, v.Block, a, mem, aType, aOffset, 0, rc)
@@ -910,7 +1002,7 @@ func expandCalls(f *Func) {
 	}
 
 	if x.debug {
-		fmt.Printf("\nexpandsCalls(%s)\n", f.Name)
+		x.Printf("\nexpandsCalls(%s)\n", f.Name)
 	}
 
 	// TODO if too slow, whole program iteration can be replaced w/ slices of appropriate values, accumulated in first loop here.
@@ -1055,7 +1147,7 @@ func expandCalls(f *Func) {
 				case OpStructSelect, OpArraySelect, OpSelectN, OpArg:
 					val2Preds[w] += 1
 					if x.debug {
-						fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
+						x.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
 					}
 				}
 				fallthrough
@@ -1064,7 +1156,7 @@ func expandCalls(f *Func) {
 				if _, ok := val2Preds[v]; !ok {
 					val2Preds[v] = 0
 					if x.debug {
-						fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
+						x.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
 					}
 				}
 
@@ -1075,7 +1167,7 @@ func expandCalls(f *Func) {
 				if _, ok := val2Preds[v]; !ok {
 					val2Preds[v] = 0
 					if x.debug {
-						fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
+						x.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
 					}
 				}
 
@@ -1203,7 +1295,7 @@ func expandCalls(f *Func) {
 	for i, v := range allOrdered {
 		if x.debug {
 			b := v.Block
-			fmt.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses)
+			x.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses)
 		}
 		if v.Uses == 0 {
 			v.reset(OpInvalid)
@@ -1305,7 +1397,7 @@ func (x *expandState) rewriteArgToMemOrRegs(v *Value) *Value {
 
 // newArgToMemOrRegs either rewrites toReplace into an OpArg referencing memory or into an OpArgXXXReg to a register,
 // or rewrites it into a copy of the appropriate OpArgXXX.  The actual OpArgXXX is determined by combining baseArg (an OpArg)
-// with offset, regOffset, and t to determine which portion of it reference (either all or a part, in memory or in registers).
+// with offset, regOffset, and t to determine which portion of it to reference (either all or a part, in memory or in registers).
 func (x *expandState) newArgToMemOrRegs(baseArg, toReplace *Value, offset int64, regOffset Abi1RO, t *types.Type, pos src.XPos) *Value {
 	key := selKey{baseArg, offset, t.Width, t}
 	w := x.commonArgs[key]
@@ -1336,7 +1428,7 @@ func (x *expandState) newArgToMemOrRegs(baseArg, toReplace *Value, offset int64,
 			w := baseArg.Block.NewValue0IA(pos, OpArg, t, auxInt, aux)
 			x.commonArgs[key] = w
 			if x.debug {
-				fmt.Printf("\tnew %s\n", w.LongString())
+				x.Printf("---new %s\n", w.LongString())
 			}
 			if toReplace != nil {
 				toReplace.copyOf(w)
@@ -1364,7 +1456,7 @@ func (x *expandState) newArgToMemOrRegs(baseArg, toReplace *Value, offset int64,
 	} else {
 		w := baseArg.Block.NewValue0IA(pos, op, t, auxInt, aux)
 		if x.debug {
-			fmt.Printf("\tnew %s\n", w.LongString())
+			x.Printf("---new %s\n", w.LongString())
 		}
 		x.commonArgs[key] = w
 		if toReplace != nil {
author	David Chase <drchase@google.com>	2021-03-05 19:56:13 -0500
committer	David Chase <drchase@google.com>	2021-03-09 18:43:58 +0000
commit	5eb99120844c0494d655678262e1fb41949a2b99 (patch)
tree	490056b7db3ec8a587281577b3e6e84c60916bad /src/cmd/compile/internal/ssa/expand_calls.go
parent	48895d021bf631f15d68ecc10cab89ebd9cb28f6 (diff)
download	go-5eb99120844c0494d655678262e1fb41949a2b99.tar.gz go-5eb99120844c0494d655678262e1fb41949a2b99.zip