diff options
author | David Chase <drchase@google.com> | 2021-03-05 19:56:13 -0500 |
---|---|---|
committer | David Chase <drchase@google.com> | 2021-03-09 18:43:58 +0000 |
commit | 5eb99120844c0494d655678262e1fb41949a2b99 (patch) | |
tree | 490056b7db3ec8a587281577b3e6e84c60916bad /src/cmd/compile/internal/ssa/expand_calls.go | |
parent | 48895d021bf631f15d68ecc10cab89ebd9cb28f6 (diff) | |
download | go-5eb99120844c0494d655678262e1fb41949a2b99.tar.gz go-5eb99120844c0494d655678262e1fb41949a2b99.zip |
cmd/compile: fix OpArg decomposer for registers in expandCalls
Includes test taken from
https://github.com/golang/go/issues/44816#issuecomment-791618179
and improved debugging output.
Updates #44816
Change-Id: I94aeb9c5255f175fe80727be29d218bad54bf7ea
Reviewed-on: https://go-review.googlesource.com/c/go/+/299389
Trust: David Chase <drchase@google.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa/expand_calls.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/expand_calls.go | 162 |
1 files changed, 127 insertions, 35 deletions
diff --git a/src/cmd/compile/internal/ssa/expand_calls.go b/src/cmd/compile/internal/ssa/expand_calls.go index df135853fe..516ea42db9 100644 --- a/src/cmd/compile/internal/ssa/expand_calls.go +++ b/src/cmd/compile/internal/ssa/expand_calls.go @@ -78,7 +78,8 @@ func (rc *registerCursor) String() string { regs = regs + x.LongString() } } - return fmt.Sprintf("RCSR{storeDest=%v, regsLen=%d, nextSlice=%d, regValues=[%s], config=%v", dest, rc.regsLen, rc.nextSlice, regs, rc.config) + // not printing the config because that has not been useful + return fmt.Sprintf("RCSR{storeDest=%v, regsLen=%d, nextSlice=%d, regValues=[%s]}", dest, rc.regsLen, rc.nextSlice, regs) } // next effectively post-increments the register cursor; the receiver is advanced, @@ -189,6 +190,7 @@ type expandState struct { commonSelectors map[selKey]*Value // used to de-dupe selectors commonArgs map[selKey]*Value // used to de-dupe OpArg/OpArgIntReg/OpArgFloatReg memForCall map[ID]*Value // For a call, need to know the unique selector that gets the mem. + indentLevel int // Indentation for debugging recursion } // intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target @@ -267,6 +269,19 @@ func ParamAssignmentForArgName(f *Func, name *ir.Name) *abi.ABIParamAssignment { panic(fmt.Errorf("Did not match param %v in prInfo %+v", name, abiInfo.InParams())) } +// indent increments (or decrements) the indentation. +func (x *expandState) indent(n int) { + x.indentLevel += n +} + +// Printf does an indented fmt.Printf on te format and args. +func (x *expandState) Printf(format string, a ...interface{}) (n int, err error) { + if x.indentLevel > 0 { + fmt.Printf("%[1]*s", x.indentLevel, "") + } + return fmt.Printf(format, a...) +} + // Calls that need lowering have some number of inputs, including a memory input, // and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able. @@ -286,7 +301,9 @@ func ParamAssignmentForArgName(f *Func, name *ir.Name) *abi.ABIParamAssignment { // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory. func (x *expandState) rewriteSelect(leaf *Value, selector *Value, offset int64, regOffset Abi1RO) []LocalSlot { if x.debug { - fmt.Printf("rewriteSelect(%s, %s, %d)\n", leaf.LongString(), selector.LongString(), offset) + x.indent(3) + defer x.indent(-3) + x.Printf("rewriteSelect(%s, %s, %d)\n", leaf.LongString(), selector.LongString(), offset) } var locs []LocalSlot leafType := leaf.Type @@ -308,7 +325,7 @@ func (x *expandState) rewriteSelect(leaf *Value, selector *Value, offset int64, x.f.Fatalf("Unexpected OpArg type, selector=%s, leaf=%s\n", selector.LongString(), leaf.LongString()) } if x.debug { - fmt.Printf("\tOpArg, break\n") + x.Printf("---OpArg, break\n") } break } @@ -427,7 +444,7 @@ func (x *expandState) rewriteSelect(leaf *Value, selector *Value, offset int64, w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call) leaf.copyOf(w) if x.debug { - fmt.Printf("\tnew %s\n", w.LongString()) + x.Printf("---new %s\n", w.LongString()) } } } @@ -539,9 +556,86 @@ func (x *expandState) rewriteDereference(b *Block, base, a, mem *Value, offset, return mem } -// decomposeArgOrLoad is a helper for storeArgOrLoad. -// It decomposes a Load or an Arg into smaller parts, parameterized by the decomposeOne and decomposeTwo functions -// passed to it, and returns the new mem. +// decomposeArg is a helper for storeArgOrLoad. +// It decomposes a Load or an Arg into smaller parts and returns the new mem. +// If the type does not match one of the expected aggregate types, it returns nil instead. +// Parameters: +// pos -- the location of any generated code. +// b -- the block into which any generated code should normally be placed +// source -- the value, possibly an aggregate, to be stored. +// mem -- the mem flowing into this decomposition (loads depend on it, stores updated it) +// t -- the type of the value to be stored +// storeOffset -- if the value is stored in memory, it is stored at base (see storeRc) + storeOffset +// loadRegOffset -- regarding source as a value in registers, the register offset in ABI1. Meaningful only if source is OpArg. +// storeRc -- storeRC; if the value is stored in registers, this specifies the registers. +// StoreRc also identifies whether the target is registers or memory, and has the base for the store operation. +func (x *expandState) decomposeArg(pos src.XPos, b *Block, source, mem *Value, t *types.Type, storeOffset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value { + + pa := x.prAssignForArg(source) + if len(pa.Registers) > 0 { + // Handle the in-registers case directly + rts, offs := pa.RegisterTypesAndOffsets() + last := loadRegOffset + x.regWidth(t) + if offs[loadRegOffset] != 0 { + panic(fmt.Errorf("offset %d of requested register %d should be zero", offs[loadRegOffset], loadRegOffset)) + } + for i := loadRegOffset; i < last; i++ { + rt := rts[i] + off := offs[i] + w := x.commonArgs[selKey{source, off, rt.Width, rt}] + if w == nil { + w = x.newArgToMemOrRegs(source, w, off, i, rt, pos) + } + mem = x.storeArgOrLoad(pos, b, w, mem, rt, storeOffset+off, i, storeRc.next(rt)) + } + return mem + } + + u := source.Type + switch u.Kind() { + case types.TARRAY: + elem := u.Elem() + elemRO := x.regWidth(elem) + for i := int64(0); i < u.NumElem(); i++ { + elemOff := i * elem.Size() + mem = storeOneArg(x, pos, b, source, mem, elem, elemOff, storeOffset+elemOff, loadRegOffset, storeRc.next(elem)) + loadRegOffset += elemRO + pos = pos.WithNotStmt() + } + return mem + case types.TSTRUCT: + for i := 0; i < u.NumFields(); i++ { + fld := u.Field(i) + mem = storeOneArg(x, pos, b, source, mem, fld.Type, fld.Offset, storeOffset+fld.Offset, loadRegOffset, storeRc.next(fld.Type)) + loadRegOffset += x.regWidth(fld.Type) + pos = pos.WithNotStmt() + } + return mem + case types.TINT64, types.TUINT64: + if t.Width == x.regSize { + break + } + tHi, tLo := x.intPairTypes(t.Kind()) + mem = storeOneArg(x, pos, b, source, mem, tHi, x.hiOffset, storeOffset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo)) + pos = pos.WithNotStmt() + return storeOneArg(x, pos, b, source, mem, tLo, x.lowOffset, storeOffset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo)) + case types.TINTER: + return storeTwoArg(x, pos, b, source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc) + case types.TSTRING: + return storeTwoArg(x, pos, b, source, mem, x.typs.BytePtr, x.typs.Int, 0, storeOffset, loadRegOffset, storeRc) + case types.TCOMPLEX64: + return storeTwoArg(x, pos, b, source, mem, x.typs.Float32, x.typs.Float32, 0, storeOffset, loadRegOffset, storeRc) + case types.TCOMPLEX128: + return storeTwoArg(x, pos, b, source, mem, x.typs.Float64, x.typs.Float64, 0, storeOffset, loadRegOffset, storeRc) + case types.TSLICE: + mem = storeOneArg(x, pos, b, source, mem, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc.next(x.typs.BytePtr)) + return storeTwoArg(x, pos, b, source, mem, x.typs.Int, x.typs.Int, x.ptrSize, storeOffset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc) + } + return nil +} + +// decomposeLoad is a helper for storeArgOrLoad. +// It decomposes a Load into smaller parts and returns the new mem. // If the type does not match one of the expected aggregate types, it returns nil instead. // Parameters: // pos -- the location of any generated code. @@ -555,11 +649,7 @@ func (x *expandState) rewriteDereference(b *Block, base, a, mem *Value, offset, // StoreRc also identifies whether the target is registers or memory, and has the base for the store operation. // // TODO -- this needs cleanup; it just works for SSA-able aggregates, and won't fully generalize to register-args aggregates. -func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, offset int64, loadRegOffset Abi1RO, storeRc registerCursor, - // For decompose One and Two, the additional offArg provides the offset from the beginning of "source", if it is in memory. - // offStore is combined to base to obtain a store destionation, like "offset" of decomposeArgOrLoad - decomposeOne func(x *expandState, pos src.XPos, b *Block, source, mem *Value, t1 *types.Type, offArg, offStore int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value, - decomposeTwo func(x *expandState, pos src.XPos, b *Block, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value) *Value { +func (x *expandState) decomposeLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, offset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value { u := source.Type switch u.Kind() { case types.TARRAY: @@ -567,7 +657,7 @@ func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Va elemRO := x.regWidth(elem) for i := int64(0); i < u.NumElem(); i++ { elemOff := i * elem.Size() - mem = decomposeOne(x, pos, b, source, mem, elem, elemOff, offset+elemOff, loadRegOffset, storeRc.next(elem)) + mem = storeOneLoad(x, pos, b, source, mem, elem, elemOff, offset+elemOff, loadRegOffset, storeRc.next(elem)) loadRegOffset += elemRO pos = pos.WithNotStmt() } @@ -575,7 +665,7 @@ func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Va case types.TSTRUCT: for i := 0; i < u.NumFields(); i++ { fld := u.Field(i) - mem = decomposeOne(x, pos, b, source, mem, fld.Type, fld.Offset, offset+fld.Offset, loadRegOffset, storeRc.next(fld.Type)) + mem = storeOneLoad(x, pos, b, source, mem, fld.Type, fld.Offset, offset+fld.Offset, loadRegOffset, storeRc.next(fld.Type)) loadRegOffset += x.regWidth(fld.Type) pos = pos.WithNotStmt() } @@ -585,20 +675,20 @@ func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Va break } tHi, tLo := x.intPairTypes(t.Kind()) - mem = decomposeOne(x, pos, b, source, mem, tHi, x.hiOffset, offset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo)) + mem = storeOneLoad(x, pos, b, source, mem, tHi, x.hiOffset, offset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo)) pos = pos.WithNotStmt() - return decomposeOne(x, pos, b, source, mem, tLo, x.lowOffset, offset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo)) + return storeOneLoad(x, pos, b, source, mem, tLo, x.lowOffset, offset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo)) case types.TINTER: - return decomposeTwo(x, pos, b, source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc) + return storeTwoLoad(x, pos, b, source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc) case types.TSTRING: - return decomposeTwo(x, pos, b, source, mem, x.typs.BytePtr, x.typs.Int, 0, offset, loadRegOffset, storeRc) + return storeTwoLoad(x, pos, b, source, mem, x.typs.BytePtr, x.typs.Int, 0, offset, loadRegOffset, storeRc) case types.TCOMPLEX64: - return decomposeTwo(x, pos, b, source, mem, x.typs.Float32, x.typs.Float32, 0, offset, loadRegOffset, storeRc) + return storeTwoLoad(x, pos, b, source, mem, x.typs.Float32, x.typs.Float32, 0, offset, loadRegOffset, storeRc) case types.TCOMPLEX128: - return decomposeTwo(x, pos, b, source, mem, x.typs.Float64, x.typs.Float64, 0, offset, loadRegOffset, storeRc) + return storeTwoLoad(x, pos, b, source, mem, x.typs.Float64, x.typs.Float64, 0, offset, loadRegOffset, storeRc) case types.TSLICE: - mem = decomposeOne(x, pos, b, source, mem, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc.next(x.typs.BytePtr)) - return decomposeTwo(x, pos, b, source, mem, x.typs.Int, x.typs.Int, x.ptrSize, offset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc) + mem = storeOneLoad(x, pos, b, source, mem, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc.next(x.typs.BytePtr)) + return storeTwoLoad(x, pos, b, source, mem, x.typs.Int, x.typs.Int, x.ptrSize, offset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc) } return nil } @@ -642,7 +732,9 @@ func storeTwoLoad(x *expandState, pos src.XPos, b *Block, source, mem *Value, t1 // If it does not reach a Load or an Arg, nothing happens; this allows a little freedom in phase ordering. func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, offset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value { if x.debug { - fmt.Printf("\tstoreArgOrLoad(%s; %s; %s; %d; %s)\n", source.LongString(), mem.String(), t.String(), offset, storeRc.String()) + x.indent(3) + defer x.indent(-3) + x.Printf("storeArgOrLoad(%s; %s; %s; %d; %s)\n", source.LongString(), mem.String(), t.String(), offset, storeRc.String()) } // Start with Opcodes that can be disassembled @@ -651,13 +743,13 @@ func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value, return x.storeArgOrLoad(pos, b, source.Args[0], mem, t, offset, loadRegOffset, storeRc) case OpLoad, OpDereference: - ret := x.decomposeArgOrLoad(pos, b, source, mem, t, offset, loadRegOffset, storeRc, storeOneLoad, storeTwoLoad) + ret := x.decomposeLoad(pos, b, source, mem, t, offset, loadRegOffset, storeRc) if ret != nil { return ret } case OpArg: - ret := x.decomposeArgOrLoad(pos, b, source, mem, t, offset, loadRegOffset, storeRc, storeOneArg, storeTwoArg) + ret := x.decomposeArg(pos, b, source, mem, t, offset, loadRegOffset, storeRc) if ret != nil { return ret } @@ -823,7 +915,7 @@ func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value, s = b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, source, mem) } if x.debug { - fmt.Printf("\t\tstoreArg returns %s, storeRc=%s\n", s.LongString(), storeRc.String()) + x.Printf("-->storeArg returns %s, storeRc=%s\n", s.LongString(), storeRc.String()) } return s } @@ -860,7 +952,7 @@ func (x *expandState) rewriteArgs(v *Value, firstArg int) (*Value, []*Value) { aOffset = aux.OffsetOfArg(auxI) } if x.debug { - fmt.Printf("storeArg %s, %v, %d\n", a.LongString(), aType, aOffset) + x.Printf("storeArg %s, %v, %d\n", a.LongString(), aType, aOffset) } rc.init(aRegs, aux.abiInfo, result, x.sp) mem = x.storeArgOrLoad(pos, v.Block, a, mem, aType, aOffset, 0, rc) @@ -910,7 +1002,7 @@ func expandCalls(f *Func) { } if x.debug { - fmt.Printf("\nexpandsCalls(%s)\n", f.Name) + x.Printf("\nexpandsCalls(%s)\n", f.Name) } // TODO if too slow, whole program iteration can be replaced w/ slices of appropriate values, accumulated in first loop here. @@ -1055,7 +1147,7 @@ func expandCalls(f *Func) { case OpStructSelect, OpArraySelect, OpSelectN, OpArg: val2Preds[w] += 1 if x.debug { - fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w]) + x.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w]) } } fallthrough @@ -1064,7 +1156,7 @@ func expandCalls(f *Func) { if _, ok := val2Preds[v]; !ok { val2Preds[v] = 0 if x.debug { - fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v]) + x.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v]) } } @@ -1075,7 +1167,7 @@ func expandCalls(f *Func) { if _, ok := val2Preds[v]; !ok { val2Preds[v] = 0 if x.debug { - fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v]) + x.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v]) } } @@ -1203,7 +1295,7 @@ func expandCalls(f *Func) { for i, v := range allOrdered { if x.debug { b := v.Block - fmt.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses) + x.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses) } if v.Uses == 0 { v.reset(OpInvalid) @@ -1305,7 +1397,7 @@ func (x *expandState) rewriteArgToMemOrRegs(v *Value) *Value { // newArgToMemOrRegs either rewrites toReplace into an OpArg referencing memory or into an OpArgXXXReg to a register, // or rewrites it into a copy of the appropriate OpArgXXX. The actual OpArgXXX is determined by combining baseArg (an OpArg) -// with offset, regOffset, and t to determine which portion of it reference (either all or a part, in memory or in registers). +// with offset, regOffset, and t to determine which portion of it to reference (either all or a part, in memory or in registers). func (x *expandState) newArgToMemOrRegs(baseArg, toReplace *Value, offset int64, regOffset Abi1RO, t *types.Type, pos src.XPos) *Value { key := selKey{baseArg, offset, t.Width, t} w := x.commonArgs[key] @@ -1336,7 +1428,7 @@ func (x *expandState) newArgToMemOrRegs(baseArg, toReplace *Value, offset int64, w := baseArg.Block.NewValue0IA(pos, OpArg, t, auxInt, aux) x.commonArgs[key] = w if x.debug { - fmt.Printf("\tnew %s\n", w.LongString()) + x.Printf("---new %s\n", w.LongString()) } if toReplace != nil { toReplace.copyOf(w) @@ -1364,7 +1456,7 @@ func (x *expandState) newArgToMemOrRegs(baseArg, toReplace *Value, offset int64, } else { w := baseArg.Block.NewValue0IA(pos, op, t, auxInt, aux) if x.debug { - fmt.Printf("\tnew %s\n", w.LongString()) + x.Printf("---new %s\n", w.LongString()) } x.commonArgs[key] = w if toReplace != nil { |