From a5d1a9df812706708da1940898cf8cad8d5f9de2 Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Fri, 27 Mar 2020 12:11:21 -0400 Subject: net/http: remove arbitrary timeouts from TestIdentityResponse and TestTLSHandshakeTimeout These hard-coded timeouts make the tests flaky on slow builders (such as solaris-amd64-oraclerel), and make test failures harder to diagnose anyway (by replacing dumps of the stuck goroutine stacks with failure messages that do not describe the stuck goroutines). Eliminate them and simplify the tests. Fixes #37327 Fixes #38112 Change-Id: Id40febe349d134ef53c702e36199bfbf2b6468ff Reviewed-on: https://go-review.googlesource.com/c/go/+/225977 Run-TryBot: Bryan C. Mills TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick --- src/net/http/serve_test.go | 93 ++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 56 deletions(-) diff --git a/src/net/http/serve_test.go b/src/net/http/serve_test.go index 21ee7f33c8..9488821466 100644 --- a/src/net/http/serve_test.go +++ b/src/net/http/serve_test.go @@ -1057,16 +1057,13 @@ func TestIdentityResponse(t *testing.T) { t.Fatalf("error writing: %v", err) } - // The ReadAll will hang for a failing test, so use a Timer to - // fail explicitly. - goTimeout(t, 2*time.Second, func() { - got, _ := ioutil.ReadAll(conn) - expectedSuffix := "\r\n\r\ntoo short" - if !strings.HasSuffix(string(got), expectedSuffix) { - t.Errorf("Expected output to end with %q; got response body %q", - expectedSuffix, string(got)) - } - }) + // The ReadAll will hang for a failing test. + got, _ := ioutil.ReadAll(conn) + expectedSuffix := "\r\n\r\ntoo short" + if !strings.HasSuffix(string(got), expectedSuffix) { + t.Errorf("Expected output to end with %q; got response body %q", + expectedSuffix, string(got)) + } } func testTCPConnectionCloses(t *testing.T, req string, h Handler) { @@ -1438,13 +1435,13 @@ func TestTLSHandshakeTimeout(t *testing.T) { t.Fatalf("Dial: %v", err) } defer conn.Close() - goTimeout(t, 10*time.Second, func() { - var buf [1]byte - n, err := conn.Read(buf[:]) - if err == nil || n != 0 { - t.Errorf("Read = %d, %v; want an error and no bytes", n, err) - } - }) + + var buf [1]byte + n, err := conn.Read(buf[:]) + if err == nil || n != 0 { + t.Errorf("Read = %d, %v; want an error and no bytes", n, err) + } + select { case v := <-errc: if !strings.Contains(v, "timeout") && !strings.Contains(v, "TLS handshake") { @@ -1479,30 +1476,29 @@ func TestTLSServer(t *testing.T) { t.Fatalf("Dial: %v", err) } defer idleConn.Close() - goTimeout(t, 10*time.Second, func() { - if !strings.HasPrefix(ts.URL, "https://") { - t.Errorf("expected test TLS server to start with https://, got %q", ts.URL) - return - } - client := ts.Client() - res, err := client.Get(ts.URL) - if err != nil { - t.Error(err) - return - } - if res == nil { - t.Errorf("got nil Response") - return - } - defer res.Body.Close() - if res.Header.Get("X-TLS-Set") != "true" { - t.Errorf("expected X-TLS-Set response header") - return - } - if res.Header.Get("X-TLS-HandshakeComplete") != "true" { - t.Errorf("expected X-TLS-HandshakeComplete header") - } - }) + + if !strings.HasPrefix(ts.URL, "https://") { + t.Errorf("expected test TLS server to start with https://, got %q", ts.URL) + return + } + client := ts.Client() + res, err := client.Get(ts.URL) + if err != nil { + t.Error(err) + return + } + if res == nil { + t.Errorf("got nil Response") + return + } + defer res.Body.Close() + if res.Header.Get("X-TLS-Set") != "true" { + t.Errorf("expected X-TLS-Set response header") + return + } + if res.Header.Get("X-TLS-HandshakeComplete") != "true" { + t.Errorf("expected X-TLS-HandshakeComplete header") + } } func TestServeTLS(t *testing.T) { @@ -3629,21 +3625,6 @@ func TestHeaderToWire(t *testing.T) { } } -// goTimeout runs f, failing t if f takes more than ns to complete. -func goTimeout(t *testing.T, d time.Duration, f func()) { - ch := make(chan bool, 2) - timer := time.AfterFunc(d, func() { - t.Errorf("Timeout expired after %v", d) - ch <- true - }) - defer timer.Stop() - go func() { - defer func() { ch <- true }() - f() - }() - <-ch -} - type errorListener struct { errs []error } -- cgit v1.2.3-54-g00ecf From 4a8b9bd2646a5b297197ffd1c412ef6afebe5c0d Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Thu, 26 Mar 2020 15:10:21 -0400 Subject: runtime/pprof: increment fake overflow record PC gentraceback generates PCs which are usually following the CALL instruction. For those that aren't, it fixes up the PCs so that functions processing the output can unconditionally decrement the PC. runtime_expandInlineFrames does this unconditional decrement when looking up the function. However, the fake stack frame generated for overflow records fails to meet the contract, and decrementing the PC results in a PC in the previous function. If that function contains inlined call, runtime_expandInlineFrames will not short-circuit and will panic trying to look up a PC that doesn't exist. Note that the added test does not fail at HEAD. It will only fail (with a panic) if the function preceeding lostProfileEvent contains inlined function calls. At the moment (on linux/amd64), that is runtime/pprof.addMaxRSS, which does not. Fixes #38096 Change-Id: Iad0819f23c566011c920fd9a5b1254719228da0b Reviewed-on: https://go-review.googlesource.com/c/go/+/225661 Reviewed-by: Hyang-Ah Hana Kim Reviewed-by: Heschi Kreinick Reviewed-by: Keith Randall Run-TryBot: Michael Pratt TryBot-Result: Gobot Gobot --- src/runtime/pprof/pprof_test.go | 12 ++++++++++++ src/runtime/pprof/proto.go | 5 ++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go index 5bfc3b6134..83b3152d68 100644 --- a/src/runtime/pprof/pprof_test.go +++ b/src/runtime/pprof/pprof_test.go @@ -1171,6 +1171,18 @@ func TestTryAdd(t *testing.T) { {Value: []int64{10, 10 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}}}, {Value: []int64{20, 20 * period}, Location: []*profile.Location{{ID: 1}}}, }, + }, { + name: "bug38096", + input: []uint64{ + 3, 0, 500, // hz = 500. Must match the period. + // count (data[2]) == 0 && len(stk) == 1 is an overflow + // entry. The "stk" entry is actually the count. + 4, 0, 0, 4242, + }, + wantLocs: [][]string{{"runtime/pprof.lostProfileEvent"}}, + wantSamples: []*profile.Sample{ + {Value: []int64{4242, 4242 * period}, Location: []*profile.Location{{ID: 1}}}, + }, }, { // If a function is called recursively then it must not be // inlined in the caller. diff --git a/src/runtime/pprof/proto.go b/src/runtime/pprof/proto.go index 416ace7ab2..bb63153a70 100644 --- a/src/runtime/pprof/proto.go +++ b/src/runtime/pprof/proto.go @@ -322,7 +322,10 @@ func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error // overflow record count = uint64(stk[0]) stk = []uint64{ - uint64(funcPC(lostProfileEvent)), + // gentraceback guarantees that PCs in the + // stack can be unconditionally decremented and + // still be valid, so we must do the same. + uint64(funcPC(lostProfileEvent)+1), } } b.m.lookup(stk, tag).count += int64(count) -- cgit v1.2.3-54-g00ecf From af7eafd1505f9e150aa9fc21cd3f19da42a30333 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Tue, 24 Mar 2020 13:39:44 -0700 Subject: cmd/compile: convert 386 port to use addressing modes pass (take 2) Retrying CL 222782, with a fix that will hopefully stop the random crashing. The issue with the previous CL is that it does pointer arithmetic in a way that may briefly generate an out-of-bounds pointer. If an interrupt happens to occur in that state, the referenced object may be collected incorrectly. Suppose there was code that did s[x+c]. The previous CL had a rule to the effect of ptr + (x + c) -> c + (ptr + x). But ptr+x is not guaranteed to point to the same object as ptr. In contrast, ptr+(x+c) is guaranteed to point to the same object as ptr, because we would have already checked that x+c is in bounds. For example, strconv.trim used to have this code: MOVZX -0x1(BX)(DX*1), BP CMPL $0x30, AL After CL 222782, it had this code: LEAL 0(BX)(DX*1), BP CMPB $0x30, -0x1(BP) An interrupt between those last two instructions could see BP pointing outside the backing store of the slice involved. It's really hard to actually demonstrate a bug. First, you need to have an interrupt occur at exactly the right time. Then, there must be no other pointers to the object in question. Since the interrupted frame will be scanned conservatively, there can't even be a dead pointer in another register or on the stack. (In the example above, a bug can't happen because BX still holds the original pointer.) Then, the object in question needs to be collected (or at least scanned?) before the interrupted code continues. This CL needs to handle load combining somewhat differently than CL 222782 because of the new restriction on arithmetic. That's the only real difference (other than removing the bad rules) from that old CL. This bug is also present in the amd64 rewrite rules, and we haven't seen any crashing as a result. I will fix up that code similarly to this one in a separate CL. Update #37881 Change-Id: I5f0d584d9bef4696bfe89a61ef0a27c8d507329f Reviewed-on: https://go-review.googlesource.com/c/go/+/225798 Run-TryBot: Keith Randall TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/ssa/addressingmodes.go | 83 +- src/cmd/compile/internal/ssa/gen/386.rules | 242 +- src/cmd/compile/internal/ssa/gen/AMD64.rules | 1 + src/cmd/compile/internal/ssa/gen/generic.rules | 6 +- src/cmd/compile/internal/ssa/gen/rulegen.go | 5 + src/cmd/compile/internal/ssa/rewrite.go | 16 + src/cmd/compile/internal/ssa/rewrite386.go | 6156 ++++------------------- src/cmd/compile/internal/ssa/rewritegeneric.go | 8 +- test/codegen/memops.go | 78 +- 9 files changed, 1270 insertions(+), 5325 deletions(-) diff --git a/src/cmd/compile/internal/ssa/addressingmodes.go b/src/cmd/compile/internal/ssa/addressingmodes.go index 8874b56a9b..2af8a4d1fc 100644 --- a/src/cmd/compile/internal/ssa/addressingmodes.go +++ b/src/cmd/compile/internal/ssa/addressingmodes.go @@ -11,8 +11,8 @@ func addressingModes(f *Func) { default: // Most architectures can't do this. return - case "amd64": - // TODO: 386, s390x? + case "amd64", "386": + // TODO: s390x? } var tmp []*Value @@ -21,7 +21,17 @@ func addressingModes(f *Func) { if !combineFirst[v.Op] { continue } - p := v.Args[0] + // All matched operations have the pointer in arg[0]. + // All results have the pointer in arg[0] and the index in arg[1]. + // *Except* for operations which update a register, + // which are marked with resultInArg0. Those have + // the pointer in arg[1], and the corresponding result op + // has the pointer in arg[1] and the index in arg[2]. + ptrIndex := 0 + if opcodeTable[v.Op].resultInArg0 { + ptrIndex = 1 + } + p := v.Args[ptrIndex] c, ok := combine[[2]Op{v.Op, p.Op}] if !ok { continue @@ -71,10 +81,11 @@ func addressingModes(f *Func) { f.Fatalf("unknown aux combining for %s and %s\n", v.Op, p.Op) } // Combine the operations. - tmp = append(tmp[:0], v.Args[1:]...) + tmp = append(tmp[:0], v.Args[:ptrIndex]...) + tmp = append(tmp, p.Args...) + tmp = append(tmp, v.Args[ptrIndex+1:]...) v.resetArgs() v.Op = c - v.AddArgs(p.Args...) v.AddArgs(tmp...) } } @@ -97,6 +108,7 @@ func init() { // x.Args[0].Args + x.Args[1:] // Additionally, the Aux/AuxInt from x.Args[0] is merged into x. var combine = map[[2]Op]Op{ + // amd64 [2]Op{OpAMD64MOVBload, OpAMD64ADDQ}: OpAMD64MOVBloadidx1, [2]Op{OpAMD64MOVWload, OpAMD64ADDQ}: OpAMD64MOVWloadidx1, [2]Op{OpAMD64MOVLload, OpAMD64ADDQ}: OpAMD64MOVLloadidx1, @@ -150,5 +162,64 @@ var combine = map[[2]Op]Op{ [2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1, [2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8, - // TODO: 386 + // 386 + [2]Op{Op386MOVBload, Op386ADDL}: Op386MOVBloadidx1, + [2]Op{Op386MOVWload, Op386ADDL}: Op386MOVWloadidx1, + [2]Op{Op386MOVLload, Op386ADDL}: Op386MOVLloadidx1, + [2]Op{Op386MOVSSload, Op386ADDL}: Op386MOVSSloadidx1, + [2]Op{Op386MOVSDload, Op386ADDL}: Op386MOVSDloadidx1, + + [2]Op{Op386MOVBstore, Op386ADDL}: Op386MOVBstoreidx1, + [2]Op{Op386MOVWstore, Op386ADDL}: Op386MOVWstoreidx1, + [2]Op{Op386MOVLstore, Op386ADDL}: Op386MOVLstoreidx1, + [2]Op{Op386MOVSSstore, Op386ADDL}: Op386MOVSSstoreidx1, + [2]Op{Op386MOVSDstore, Op386ADDL}: Op386MOVSDstoreidx1, + + [2]Op{Op386MOVBstoreconst, Op386ADDL}: Op386MOVBstoreconstidx1, + [2]Op{Op386MOVWstoreconst, Op386ADDL}: Op386MOVWstoreconstidx1, + [2]Op{Op386MOVLstoreconst, Op386ADDL}: Op386MOVLstoreconstidx1, + + [2]Op{Op386MOVBload, Op386LEAL1}: Op386MOVBloadidx1, + [2]Op{Op386MOVWload, Op386LEAL1}: Op386MOVWloadidx1, + [2]Op{Op386MOVWload, Op386LEAL2}: Op386MOVWloadidx2, + [2]Op{Op386MOVLload, Op386LEAL1}: Op386MOVLloadidx1, + [2]Op{Op386MOVLload, Op386LEAL4}: Op386MOVLloadidx4, + [2]Op{Op386MOVSSload, Op386LEAL1}: Op386MOVSSloadidx1, + [2]Op{Op386MOVSSload, Op386LEAL4}: Op386MOVSSloadidx4, + [2]Op{Op386MOVSDload, Op386LEAL1}: Op386MOVSDloadidx1, + [2]Op{Op386MOVSDload, Op386LEAL8}: Op386MOVSDloadidx8, + + [2]Op{Op386MOVBstore, Op386LEAL1}: Op386MOVBstoreidx1, + [2]Op{Op386MOVWstore, Op386LEAL1}: Op386MOVWstoreidx1, + [2]Op{Op386MOVWstore, Op386LEAL2}: Op386MOVWstoreidx2, + [2]Op{Op386MOVLstore, Op386LEAL1}: Op386MOVLstoreidx1, + [2]Op{Op386MOVLstore, Op386LEAL4}: Op386MOVLstoreidx4, + [2]Op{Op386MOVSSstore, Op386LEAL1}: Op386MOVSSstoreidx1, + [2]Op{Op386MOVSSstore, Op386LEAL4}: Op386MOVSSstoreidx4, + [2]Op{Op386MOVSDstore, Op386LEAL1}: Op386MOVSDstoreidx1, + [2]Op{Op386MOVSDstore, Op386LEAL8}: Op386MOVSDstoreidx8, + + [2]Op{Op386MOVBstoreconst, Op386LEAL1}: Op386MOVBstoreconstidx1, + [2]Op{Op386MOVWstoreconst, Op386LEAL1}: Op386MOVWstoreconstidx1, + [2]Op{Op386MOVWstoreconst, Op386LEAL2}: Op386MOVWstoreconstidx2, + [2]Op{Op386MOVLstoreconst, Op386LEAL1}: Op386MOVLstoreconstidx1, + [2]Op{Op386MOVLstoreconst, Op386LEAL4}: Op386MOVLstoreconstidx4, + + [2]Op{Op386ADDLload, Op386LEAL4}: Op386ADDLloadidx4, + [2]Op{Op386SUBLload, Op386LEAL4}: Op386SUBLloadidx4, + [2]Op{Op386MULLload, Op386LEAL4}: Op386MULLloadidx4, + [2]Op{Op386ANDLload, Op386LEAL4}: Op386ANDLloadidx4, + [2]Op{Op386ORLload, Op386LEAL4}: Op386ORLloadidx4, + [2]Op{Op386XORLload, Op386LEAL4}: Op386XORLloadidx4, + + [2]Op{Op386ADDLmodify, Op386LEAL4}: Op386ADDLmodifyidx4, + [2]Op{Op386SUBLmodify, Op386LEAL4}: Op386SUBLmodifyidx4, + [2]Op{Op386ANDLmodify, Op386LEAL4}: Op386ANDLmodifyidx4, + [2]Op{Op386ORLmodify, Op386LEAL4}: Op386ORLmodifyidx4, + [2]Op{Op386XORLmodify, Op386LEAL4}: Op386XORLmodifyidx4, + + [2]Op{Op386ADDLconstmodify, Op386LEAL4}: Op386ADDLconstmodifyidx4, + [2]Op{Op386ANDLconstmodify, Op386LEAL4}: Op386ANDLconstmodifyidx4, + [2]Op{Op386ORLconstmodify, Op386LEAL4}: Op386ORLconstmodifyidx4, + [2]Op{Op386XORLconstmodify, Op386LEAL4}: Op386XORLconstmodifyidx4, } diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules index 64a6cbaf84..2c48994a5f 100644 --- a/src/cmd/compile/internal/ssa/gen/386.rules +++ b/src/cmd/compile/internal/ssa/gen/386.rules @@ -588,10 +588,6 @@ (MOVWLSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWLSXload [off] {sym} ptr mem) (MOVWLZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload [off] {sym} ptr mem) -(MOVBLZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 [off] {sym} ptr idx mem) -(MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 [off] {sym} ptr idx mem) -(MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 [off] {sym} ptr idx mem) - // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBLZX x) (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWLZX x) @@ -611,34 +607,22 @@ // fold constants into memory operations // Note that this is not always a good idea because if not all the uses of -// the ADDQconst get eliminated, we still have to compute the ADDQconst and we now -// have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one. +// the ADDLconst get eliminated, we still have to compute the ADDLconst and we now +// have potentially two live values (ptr and (ADDLconst [off] ptr)) instead of one. // Nevertheless, let's do it! (MOV(L|W|B|SS|SD)load [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)load [off1+off2] {sym} ptr mem) (MOV(L|W|B|SS|SD)store [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)store [off1+off2] {sym} ptr val mem) ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) -> ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {sym} val base mem) -((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) && is32Bit(off1+off2) -> - ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {sym} val base idx mem) -((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) && is32Bit(off1+off2*4) -> - ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2*4] {sym} val base idx mem) ((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) -> ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) -> ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem) ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDLconst [off2] base) val mem) && is32Bit(off1+off2) -> ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem) -((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) && is32Bit(off1+off2) -> - ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {sym} base idx val mem) -((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) && is32Bit(off1+off2*4) -> - ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2*4] {sym} base idx val mem) ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDLconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) -((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) && ValAndOff(valoff1).canAdd(off2) -> - ((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem) -((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) && ValAndOff(valoff1).canAdd(off2*4) -> - ((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem) // Fold constants into stores. (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) -> @@ -652,7 +636,7 @@ (MOV(L|W|B)storeconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> (MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem) -// We need to fold LEAQ into the MOVx ops so that the live variable analysis knows +// We need to fold LEAL into the MOVx ops so that the live variable analysis knows // what variables are being read/written by the ops. // Note: we turn off this merging for operations on globals when building // position-independent code (when Flag_shared is set). @@ -672,31 +656,9 @@ && (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> (MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) -// generating indexed loads and stores -(MOV(B|W|L|SS|SD)load [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOV(B|W|L|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) -(MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) -(MOV(L|SS)load [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) -(MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - -(MOV(B|W|L|SS|SD)store [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOV(B|W|L|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) -(MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) -(MOV(L|SS)store [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) -(MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem) -((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem) - && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> - ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem) ((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) @@ -706,97 +668,20 @@ ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) -((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem) - && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> - ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem) ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAL [off2] {sym2} base) mem) && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) -((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem) - && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> - ((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem) - -(MOV(B|W|L|SS|SD)load [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)loadidx1 [off] {sym} ptr idx mem) -(MOV(B|W|L|SS|SD)store [off] {sym} (ADDL ptr idx) val mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)storeidx1 [off] {sym} ptr idx val mem) - -(MOV(B|W|L)storeconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> - (MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) -(MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> - (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) -(MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> - (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) - -(MOV(B|W|L)storeconst [x] {sym} (ADDL ptr idx) mem) -> (MOV(B|W|L)storeconstidx1 [x] {sym} ptr idx mem) - -// combine SHLL into indexed loads and stores -(MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem) -(MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLloadidx4 [c] {sym} ptr idx mem) -(MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem) -(MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem) -> (MOVLstoreidx4 [c] {sym} ptr idx val mem) -(MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem) -(MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem) - -// combine ADDL into indexed loads and stores -(MOV(B|W|L|SS|SD)loadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem) -(MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVWloadidx2 [int64(int32(c+d))] {sym} ptr idx mem) -(MOV(L|SS)loadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+d))] {sym} ptr idx mem) -(MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVSDloadidx8 [int64(int32(c+d))] {sym} ptr idx mem) - -(MOV(B|W|L|SS|SD)storeidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(B|W|L|SS|SD)storeidx1 [int64(int32(c+d))] {sym} ptr idx val mem) -(MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVWstoreidx2 [int64(int32(c+d))] {sym} ptr idx val mem) -(MOV(L|SS)storeidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+d))] {sym} ptr idx val mem) -(MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVSDstoreidx8 [int64(int32(c+d))] {sym} ptr idx val mem) - -(MOV(B|W|L|SS|SD)loadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem) -(MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVWloadidx2 [int64(int32(c+2*d))] {sym} ptr idx mem) -(MOV(L|SS)loadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem) -(MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVSDloadidx8 [int64(int32(c+8*d))] {sym} ptr idx mem) - -(MOV(B|W|L|SS|SD)storeidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(B|W|L|SS|SD)storeidx1 [int64(int32(c+d))] {sym} ptr idx val mem) -(MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVWstoreidx2 [int64(int32(c+2*d))] {sym} ptr idx val mem) -(MOV(L|SS)storeidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem) -(MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem) // Merge load/store to op ((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem) -((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> - ((ADD|AND|OR|XOR|SUB|MUL)Lloadidx4 x [off] {sym} ptr idx mem) -((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem) - && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem) ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) -> ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) -(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lloadidx4 x [off] {sym} ptr idx mem) mem) && y.Uses==1 && clobber(y) -> - ((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem) -(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|SUB|AND|OR|XOR)L l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) -> - ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr mem)) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) -> ((ADD|AND|OR|XOR)Lconstmodify [makeValAndOff(c,off)] {sym} ptr mem) -(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem) - && y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) -> - ((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) -((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(c,off) -> - ((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) -(SUBLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(-c,off) -> - (ADDLconstmodifyidx4 [makeValAndOff(-c,off)] {sym} ptr idx mem) - -(MOV(B|W|L)storeconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) -> - (MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) -(MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem) -> - (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem) -(MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem) -> - (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem) - -(MOV(B|W|L)storeconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem) -> - (MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) -(MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem) -> - (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem) -(MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem) -> - (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem) // fold LEALs together (LEAL [off1] {sym1} (LEAL [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> @@ -826,6 +711,16 @@ (LEAL [off1] {sym1} (LEAL8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> (LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y) +// LEAL[1248] into LEAL[1248]. Only some such merges are possible. +(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} x y) +(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} y x) +(LEAL2 [off1] {sym} x (LEAL1 [off2] {nil} y y)) && is32Bit(off1+2*off2) -> + (LEAL4 [off1+2*off2] {sym} x y) +(LEAL4 [off1] {sym} x (LEAL1 [off2] {nil} y y)) && is32Bit(off1+4*off2) -> + (LEAL8 [off1+4*off2] {sym} x y) + // Absorb InvertFlags into branches. (LT (InvertFlags cmp) yes no) -> (GT cmp yes no) (GT (InvertFlags cmp) yes no) -> (LT cmp yes no) @@ -1039,6 +934,9 @@ // TEST %reg,%reg is shorter than CMP (CMP(L|W|B)const x [0]) -> (TEST(L|W|B) x x) +// Convert LEAL1 back to ADDL if we can +(LEAL1 [0] {nil} x y) -> (ADDL x y) + // Combining byte loads into larger (unaligned) loads. // There are many ways these combinations could occur. This is // designed to match the way encoding/binary.LittleEndian does it. @@ -1052,6 +950,16 @@ && clobber(x0, x1, s0) -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) +(ORL x0:(MOVBload [i] {s} p0 mem) + s0:(SHLLconst [8] x1:(MOVBload [i] {s} p1 mem))) + && x0.Uses == 1 + && x1.Uses == 1 + && s0.Uses == 1 + && sequentialAddresses(p0, p1, 1) + && mergePoint(b,x0,x1) != nil + && clobber(x0, x1, s0) + -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem) + (ORL o0:(ORL x0:(MOVWload [i0] {s} p mem) s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem))) @@ -1068,31 +976,21 @@ && clobber(x0, x1, x2, s0, s1, o0) -> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem) -(ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) - s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem))) - && i1==i0+1 - && x0.Uses == 1 - && x1.Uses == 1 - && s0.Uses == 1 - && mergePoint(b,x0,x1) != nil - && clobber(x0, x1, s0) - -> @mergePoint(b,x0,x1) (MOVWloadidx1 [i0] {s} p idx mem) - (ORL o0:(ORL - x0:(MOVWloadidx1 [i0] {s} p idx mem) - s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) - s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem))) - && i2 == i0+2 - && i3 == i0+3 + x0:(MOVWload [i] {s} p0 mem) + s0:(SHLLconst [16] x1:(MOVBload [i] {s} p1 mem))) + s1:(SHLLconst [24] x2:(MOVBload [i] {s} p2 mem))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 + && sequentialAddresses(p0, p1, 2) + && sequentialAddresses(p1, p2, 1) && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0) - -> @mergePoint(b,x0,x1,x2) (MOVLloadidx1 [i0] {s} p idx mem) + -> @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p0 mem) // Combine constant stores into larger (unaligned) stores. (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) @@ -1105,6 +1003,20 @@ && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) + +(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem)) + && x.Uses == 1 + && ValAndOff(a).Off() == ValAndOff(c).Off() + && sequentialAddresses(p0, p1, 1) + && clobber(x) + -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) +(MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem)) + && x.Uses == 1 + && ValAndOff(a).Off() == ValAndOff(c).Off() + && sequentialAddresses(p0, p1, 1) + && clobber(x) + -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) + (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) && x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() @@ -1116,22 +1028,18 @@ && clobber(x) -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) -(MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem)) - && x.Uses == 1 - && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() - && clobber(x) - -> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem) -(MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem)) +(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem)) && x.Uses == 1 - && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() + && ValAndOff(a).Off() == ValAndOff(c).Off() + && sequentialAddresses(p0, p1, 2) && clobber(x) - -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem) - -(MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem)) + -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) +(MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem)) && x.Uses == 1 - && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() + && ValAndOff(a).Off() == ValAndOff(c).Off() + && sequentialAddresses(p0, p1, 2) && clobber(x) - -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst [1] i) mem) + -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) // Combine stores into larger (unaligned) stores. (MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem)) @@ -1146,44 +1054,42 @@ && x.Uses == 1 && clobber(x) -> (MOVWstore [i-1] {s} p w0 mem) -(MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) - && x.Uses == 1 - && clobber(x) - -> (MOVLstore [i-2] {s} p w mem) -(MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem)) - && x.Uses == 1 - && clobber(x) - -> (MOVLstore [i-2] {s} p w0 mem) -(MOVBstoreidx1 [i] {s} p idx (SHR(L|W)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) +(MOVBstore [i] {s} p1 (SHR(W|L)const [8] w) x:(MOVBstore [i] {s} p0 w mem)) && x.Uses == 1 + && sequentialAddresses(p0, p1, 1) && clobber(x) - -> (MOVWstoreidx1 [i-1] {s} p idx w mem) -(MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHR(L|W)const [8] w) mem)) + -> (MOVWstore [i] {s} p0 w mem) +(MOVBstore [i] {s} p0 w x:(MOVBstore {s} [i] p1 (SHR(W|L)const [8] w) mem)) && x.Uses == 1 + && sequentialAddresses(p0, p1, 1) && clobber(x) - -> (MOVWstoreidx1 [i] {s} p idx w mem) -(MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem)) + -> (MOVWstore [i] {s} p0 w mem) +(MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRLconst [j-8] w) mem)) && x.Uses == 1 + && sequentialAddresses(p0, p1, 1) && clobber(x) - -> (MOVWstoreidx1 [i-1] {s} p idx w0 mem) -(MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem)) + -> (MOVWstore [i] {s} p0 w0 mem) + +(MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) && x.Uses == 1 && clobber(x) - -> (MOVLstoreidx1 [i-2] {s} p idx w mem) -(MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem)) + -> (MOVLstore [i-2] {s} p w mem) +(MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem)) && x.Uses == 1 && clobber(x) - -> (MOVLstoreidx1 [i-2] {s} p idx w0 mem) + -> (MOVLstore [i-2] {s} p w0 mem) -(MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem)) +(MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i] {s} p0 w mem)) && x.Uses == 1 + && sequentialAddresses(p0, p1, 2) && clobber(x) - -> (MOVLstoreidx1 [i-2] {s} p (SHLLconst [1] idx) w mem) -(MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem)) + -> (MOVLstore [i] {s} p0 w mem) +(MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRLconst [j-16] w) mem)) && x.Uses == 1 + && sequentialAddresses(p0, p1, 2) && clobber(x) - -> (MOVLstoreidx1 [i-2] {s} p (SHLLconst [1] idx) w0 mem) + -> (MOVLstore [i] {s} p0 w0 mem) // For PIC, break floating-point constant loading into two instructions so we have // a register to use for holding the address of the constant pool entry. diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 306847d28c..ca5962f249 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -1604,6 +1604,7 @@ // Move constants offsets from LEAQx up into load. This lets the above combining // rules discover indexed load-combining instances. +//TODO:remove! These rules are bad. (MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) -> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ1 [0] {s1} x y) mem) (MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index 8ec22d86e7..8a3c8eeaab 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -917,7 +917,7 @@ (If (ConstBool [c]) yes no) && c == 0 -> (First no yes) // Get rid of Convert ops for pointer arithmetic on unsafe.Pointer. -(Convert (Add(64|32) (Convert ptr mem) off) mem) -> (Add(64|32) ptr off) +(Convert (Add(64|32) (Convert ptr mem) off) mem) -> (AddPtr ptr off) (Convert (Convert ptr mem) mem) -> ptr // strength reduction of divide by a constant. @@ -1780,6 +1780,10 @@ // is constant, which pushes constants to the outside // of the expression. At that point, any constant-folding // opportunities should be obvious. +// Note: don't include AddPtr here! In order to maintain the +// invariant that pointers must stay within the pointed-to object, +// we can't pull part of a pointer computation above the AddPtr. +// See issue 37881. // x + (C + z) -> C + (x + z) (Add64 (Add64 i:(Const64 ) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) -> (Add64 i (Add64 z x)) diff --git a/src/cmd/compile/internal/ssa/gen/rulegen.go b/src/cmd/compile/internal/ssa/gen/rulegen.go index 8e88d0b6a3..3caa06038a 100644 --- a/src/cmd/compile/internal/ssa/gen/rulegen.go +++ b/src/cmd/compile/internal/ssa/gen/rulegen.go @@ -715,6 +715,11 @@ func (w *bodyBase) add(node Statement) { // declared reports if the body contains a Declare with the given name. func (w *bodyBase) declared(name string) bool { + if name == "nil" { + // Treat "nil" as having already been declared. + // This lets us use nil to match an aux field. + return true + } for _, s := range w.list { if decl, ok := s.(*Declare); ok && decl.name == name { return true diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index b3e7d34779..fc03f0d72c 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1248,9 +1248,25 @@ func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 { return byteorder.Uint64(buf) } +// sequentialAddresses reports true if it can prove that x + n == y +func sequentialAddresses(x, y *Value, n int64) bool { + if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil && + (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || + x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { + return true + } + if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux && + (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || + x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { + return true + } + return false +} + // same reports whether x and y are the same value. // It checks to a maximum depth of d, so it may report // a false negative. +// TODO: remove when amd64 port is switched to using sequentialAddresses func same(x, y *Value, depth int) bool { if x == y { return true diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go index 8b2da94c13..2a0a92bb83 100644 --- a/src/cmd/compile/internal/ssa/rewrite386.go +++ b/src/cmd/compile/internal/ssa/rewrite386.go @@ -18,16 +18,10 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386ADDLconst(v) case Op386ADDLconstmodify: return rewriteValue386_Op386ADDLconstmodify(v) - case Op386ADDLconstmodifyidx4: - return rewriteValue386_Op386ADDLconstmodifyidx4(v) case Op386ADDLload: return rewriteValue386_Op386ADDLload(v) - case Op386ADDLloadidx4: - return rewriteValue386_Op386ADDLloadidx4(v) case Op386ADDLmodify: return rewriteValue386_Op386ADDLmodify(v) - case Op386ADDLmodifyidx4: - return rewriteValue386_Op386ADDLmodifyidx4(v) case Op386ADDSD: return rewriteValue386_Op386ADDSD(v) case Op386ADDSDload: @@ -42,16 +36,10 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386ANDLconst(v) case Op386ANDLconstmodify: return rewriteValue386_Op386ANDLconstmodify(v) - case Op386ANDLconstmodifyidx4: - return rewriteValue386_Op386ANDLconstmodifyidx4(v) case Op386ANDLload: return rewriteValue386_Op386ANDLload(v) - case Op386ANDLloadidx4: - return rewriteValue386_Op386ANDLloadidx4(v) case Op386ANDLmodify: return rewriteValue386_Op386ANDLmodify(v) - case Op386ANDLmodifyidx4: - return rewriteValue386_Op386ANDLmodifyidx4(v) case Op386CMPB: return rewriteValue386_Op386CMPB(v) case Op386CMPBconst: @@ -96,62 +84,28 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386MOVBLZX(v) case Op386MOVBload: return rewriteValue386_Op386MOVBload(v) - case Op386MOVBloadidx1: - return rewriteValue386_Op386MOVBloadidx1(v) case Op386MOVBstore: return rewriteValue386_Op386MOVBstore(v) case Op386MOVBstoreconst: return rewriteValue386_Op386MOVBstoreconst(v) - case Op386MOVBstoreconstidx1: - return rewriteValue386_Op386MOVBstoreconstidx1(v) - case Op386MOVBstoreidx1: - return rewriteValue386_Op386MOVBstoreidx1(v) case Op386MOVLload: return rewriteValue386_Op386MOVLload(v) - case Op386MOVLloadidx1: - return rewriteValue386_Op386MOVLloadidx1(v) - case Op386MOVLloadidx4: - return rewriteValue386_Op386MOVLloadidx4(v) case Op386MOVLstore: return rewriteValue386_Op386MOVLstore(v) case Op386MOVLstoreconst: return rewriteValue386_Op386MOVLstoreconst(v) - case Op386MOVLstoreconstidx1: - return rewriteValue386_Op386MOVLstoreconstidx1(v) - case Op386MOVLstoreconstidx4: - return rewriteValue386_Op386MOVLstoreconstidx4(v) - case Op386MOVLstoreidx1: - return rewriteValue386_Op386MOVLstoreidx1(v) - case Op386MOVLstoreidx4: - return rewriteValue386_Op386MOVLstoreidx4(v) case Op386MOVSDconst: return rewriteValue386_Op386MOVSDconst(v) case Op386MOVSDload: return rewriteValue386_Op386MOVSDload(v) - case Op386MOVSDloadidx1: - return rewriteValue386_Op386MOVSDloadidx1(v) - case Op386MOVSDloadidx8: - return rewriteValue386_Op386MOVSDloadidx8(v) case Op386MOVSDstore: return rewriteValue386_Op386MOVSDstore(v) - case Op386MOVSDstoreidx1: - return rewriteValue386_Op386MOVSDstoreidx1(v) - case Op386MOVSDstoreidx8: - return rewriteValue386_Op386MOVSDstoreidx8(v) case Op386MOVSSconst: return rewriteValue386_Op386MOVSSconst(v) case Op386MOVSSload: return rewriteValue386_Op386MOVSSload(v) - case Op386MOVSSloadidx1: - return rewriteValue386_Op386MOVSSloadidx1(v) - case Op386MOVSSloadidx4: - return rewriteValue386_Op386MOVSSloadidx4(v) case Op386MOVSSstore: return rewriteValue386_Op386MOVSSstore(v) - case Op386MOVSSstoreidx1: - return rewriteValue386_Op386MOVSSstoreidx1(v) - case Op386MOVSSstoreidx4: - return rewriteValue386_Op386MOVSSstoreidx4(v) case Op386MOVWLSX: return rewriteValue386_Op386MOVWLSX(v) case Op386MOVWLSXload: @@ -160,30 +114,16 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386MOVWLZX(v) case Op386MOVWload: return rewriteValue386_Op386MOVWload(v) - case Op386MOVWloadidx1: - return rewriteValue386_Op386MOVWloadidx1(v) - case Op386MOVWloadidx2: - return rewriteValue386_Op386MOVWloadidx2(v) case Op386MOVWstore: return rewriteValue386_Op386MOVWstore(v) case Op386MOVWstoreconst: return rewriteValue386_Op386MOVWstoreconst(v) - case Op386MOVWstoreconstidx1: - return rewriteValue386_Op386MOVWstoreconstidx1(v) - case Op386MOVWstoreconstidx2: - return rewriteValue386_Op386MOVWstoreconstidx2(v) - case Op386MOVWstoreidx1: - return rewriteValue386_Op386MOVWstoreidx1(v) - case Op386MOVWstoreidx2: - return rewriteValue386_Op386MOVWstoreidx2(v) case Op386MULL: return rewriteValue386_Op386MULL(v) case Op386MULLconst: return rewriteValue386_Op386MULLconst(v) case Op386MULLload: return rewriteValue386_Op386MULLload(v) - case Op386MULLloadidx4: - return rewriteValue386_Op386MULLloadidx4(v) case Op386MULSD: return rewriteValue386_Op386MULSD(v) case Op386MULSDload: @@ -202,16 +142,10 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386ORLconst(v) case Op386ORLconstmodify: return rewriteValue386_Op386ORLconstmodify(v) - case Op386ORLconstmodifyidx4: - return rewriteValue386_Op386ORLconstmodifyidx4(v) case Op386ORLload: return rewriteValue386_Op386ORLload(v) - case Op386ORLloadidx4: - return rewriteValue386_Op386ORLloadidx4(v) case Op386ORLmodify: return rewriteValue386_Op386ORLmodify(v) - case Op386ORLmodifyidx4: - return rewriteValue386_Op386ORLmodifyidx4(v) case Op386ROLBconst: return rewriteValue386_Op386ROLBconst(v) case Op386ROLLconst: @@ -278,12 +212,8 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386SUBLconst(v) case Op386SUBLload: return rewriteValue386_Op386SUBLload(v) - case Op386SUBLloadidx4: - return rewriteValue386_Op386SUBLloadidx4(v) case Op386SUBLmodify: return rewriteValue386_Op386SUBLmodify(v) - case Op386SUBLmodifyidx4: - return rewriteValue386_Op386SUBLmodifyidx4(v) case Op386SUBSD: return rewriteValue386_Op386SUBSD(v) case Op386SUBSDload: @@ -298,16 +228,10 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386XORLconst(v) case Op386XORLconstmodify: return rewriteValue386_Op386XORLconstmodify(v) - case Op386XORLconstmodifyidx4: - return rewriteValue386_Op386XORLconstmodifyidx4(v) case Op386XORLload: return rewriteValue386_Op386XORLload(v) - case Op386XORLloadidx4: - return rewriteValue386_Op386XORLloadidx4(v) case Op386XORLmodify: return rewriteValue386_Op386XORLmodify(v) - case Op386XORLmodifyidx4: - return rewriteValue386_Op386XORLmodifyidx4(v) case OpAdd16: v.Op = Op386ADDL return true @@ -1042,32 +966,6 @@ func rewriteValue386_Op386ADDL(v *Value) bool { } break } - // match: (ADDL x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (ADDLloadidx4 x [off] {sym} ptr idx mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != Op386MOVLloadidx4 { - continue - } - off := l.AuxInt - sym := l.Aux - mem := l.Args[2] - ptr := l.Args[0] - idx := l.Args[1] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { - continue - } - v.reset(Op386ADDLloadidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(x, ptr, idx, mem) - return true - } - break - } // match: (ADDL x (NEGL y)) // result: (SUBL x y) for { @@ -1316,81 +1214,6 @@ func rewriteValue386_Op386ADDLconstmodify(v *Value) bool { } return false } -func rewriteValue386_Op386ADDLconstmodifyidx4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (ADDLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) - // cond: ValAndOff(valoff1).canAdd(off2) - // result: (ADDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem) - for { - valoff1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - base := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(ValAndOff(valoff1).canAdd(off2)) { - break - } - v.reset(Op386ADDLconstmodifyidx4) - v.AuxInt = ValAndOff(valoff1).add(off2) - v.Aux = sym - v.AddArg3(base, idx, mem) - return true - } - // match: (ADDLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) - // cond: ValAndOff(valoff1).canAdd(off2*4) - // result: (ADDLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem) - for { - valoff1 := v.AuxInt - sym := v.Aux - base := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(ValAndOff(valoff1).canAdd(off2 * 4)) { - break - } - v.reset(Op386ADDLconstmodifyidx4) - v.AuxInt = ValAndOff(valoff1).add(off2 * 4) - v.Aux = sym - v.AddArg3(base, idx, mem) - return true - } - // match: (ADDLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem) - // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (ADDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem) - for { - valoff1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386ADDLconstmodifyidx4) - v.AuxInt = ValAndOff(valoff1).add(off2) - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, idx, mem) - return true - } - return false -} func rewriteValue386_Op386ADDLload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -1442,109 +1265,6 @@ func rewriteValue386_Op386ADDLload(v *Value) bool { v.AddArg3(val, base, mem) return true } - // match: (ADDLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (ADDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - val := v_0 - if v_1.Op != Op386LEAL4 { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - idx := v_1.Args[1] - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386ADDLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(val, ptr, idx, mem) - return true - } - return false -} -func rewriteValue386_Op386ADDLloadidx4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (ADDLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) - // cond: is32Bit(off1+off2) - // result: (ADDLloadidx4 [off1+off2] {sym} val base idx mem) - for { - off1 := v.AuxInt - sym := v.Aux - val := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - base := v_1.Args[0] - idx := v_2 - mem := v_3 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386ADDLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg4(val, base, idx, mem) - return true - } - // match: (ADDLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) - // cond: is32Bit(off1+off2*4) - // result: (ADDLloadidx4 [off1+off2*4] {sym} val base idx mem) - for { - off1 := v.AuxInt - sym := v.Aux - val := v_0 - base := v_1 - if v_2.Op != Op386ADDLconst { - break - } - off2 := v_2.AuxInt - idx := v_2.Args[0] - mem := v_3 - if !(is32Bit(off1 + off2*4)) { - break - } - v.reset(Op386ADDLloadidx4) - v.AuxInt = off1 + off2*4 - v.Aux = sym - v.AddArg4(val, base, idx, mem) - return true - } - // match: (ADDLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (ADDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - val := v_0 - if v_1.Op != Op386LEAL { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - base := v_1.Args[0] - idx := v_2 - mem := v_3 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386ADDLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(val, base, idx, mem) - return true - } return false } func rewriteValue386_Op386ADDLmodify(v *Value) bool { @@ -1600,107 +1320,6 @@ func rewriteValue386_Op386ADDLmodify(v *Value) bool { } return false } -func rewriteValue386_Op386ADDLmodifyidx4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (ADDLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) - // cond: is32Bit(off1+off2) - // result: (ADDLmodifyidx4 [off1+off2] {sym} base idx val mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - base := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386ADDLmodifyidx4) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg4(base, idx, val, mem) - return true - } - // match: (ADDLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) - // cond: is32Bit(off1+off2*4) - // result: (ADDLmodifyidx4 [off1+off2*4] {sym} base idx val mem) - for { - off1 := v.AuxInt - sym := v.Aux - base := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(off1 + off2*4)) { - break - } - v.reset(Op386ADDLmodifyidx4) - v.AuxInt = off1 + off2*4 - v.Aux = sym - v.AddArg4(base, idx, val, mem) - return true - } - // match: (ADDLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (ADDLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386ADDLmodifyidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(base, idx, val, mem) - return true - } - // match: (ADDLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) - // cond: validValAndOff(c,off) - // result: (ADDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - if v_2.Op != Op386MOVLconst { - break - } - c := v_2.AuxInt - mem := v_3 - if !(validValAndOff(c, off)) { - break - } - v.reset(Op386ADDLconstmodifyidx4) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - return false -} func rewriteValue386_Op386ADDSD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -1915,32 +1534,6 @@ func rewriteValue386_Op386ANDL(v *Value) bool { } break } - // match: (ANDL x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (ANDLloadidx4 x [off] {sym} ptr idx mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != Op386MOVLloadidx4 { - continue - } - off := l.AuxInt - sym := l.Aux - mem := l.Args[2] - ptr := l.Args[0] - idx := l.Args[1] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { - continue - } - v.reset(Op386ANDLloadidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(x, ptr, idx, mem) - return true - } - break - } // match: (ANDL x x) // result: x for { @@ -2057,101 +1650,26 @@ func rewriteValue386_Op386ANDLconstmodify(v *Value) bool { } return false } -func rewriteValue386_Op386ANDLconstmodifyidx4(v *Value) bool { +func rewriteValue386_Op386ANDLload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block config := b.Func.Config - // match: (ANDLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) - // cond: ValAndOff(valoff1).canAdd(off2) - // result: (ANDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem) + // match: (ANDLload [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (ANDLload [off1+off2] {sym} val base mem) for { - valoff1 := v.AuxInt + off1 := v.AuxInt sym := v.Aux - if v_0.Op != Op386ADDLconst { + val := v_0 + if v_1.Op != Op386ADDLconst { break } - off2 := v_0.AuxInt - base := v_0.Args[0] - idx := v_1 + off2 := v_1.AuxInt + base := v_1.Args[0] mem := v_2 - if !(ValAndOff(valoff1).canAdd(off2)) { - break - } - v.reset(Op386ANDLconstmodifyidx4) - v.AuxInt = ValAndOff(valoff1).add(off2) - v.Aux = sym - v.AddArg3(base, idx, mem) - return true - } - // match: (ANDLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) - // cond: ValAndOff(valoff1).canAdd(off2*4) - // result: (ANDLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem) - for { - valoff1 := v.AuxInt - sym := v.Aux - base := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(ValAndOff(valoff1).canAdd(off2 * 4)) { - break - } - v.reset(Op386ANDLconstmodifyidx4) - v.AuxInt = ValAndOff(valoff1).add(off2 * 4) - v.Aux = sym - v.AddArg3(base, idx, mem) - return true - } - // match: (ANDLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem) - // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (ANDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem) - for { - valoff1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386ANDLconstmodifyidx4) - v.AuxInt = ValAndOff(valoff1).add(off2) - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, idx, mem) - return true - } - return false -} -func rewriteValue386_Op386ANDLload(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (ANDLload [off1] {sym} val (ADDLconst [off2] base) mem) - // cond: is32Bit(off1+off2) - // result: (ANDLload [off1+off2] {sym} val base mem) - for { - off1 := v.AuxInt - sym := v.Aux - val := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(off1 + off2)) { + if !(is32Bit(off1 + off2)) { break } v.reset(Op386ANDLload) @@ -2183,109 +1701,6 @@ func rewriteValue386_Op386ANDLload(v *Value) bool { v.AddArg3(val, base, mem) return true } - // match: (ANDLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (ANDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - val := v_0 - if v_1.Op != Op386LEAL4 { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - idx := v_1.Args[1] - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386ANDLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(val, ptr, idx, mem) - return true - } - return false -} -func rewriteValue386_Op386ANDLloadidx4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (ANDLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) - // cond: is32Bit(off1+off2) - // result: (ANDLloadidx4 [off1+off2] {sym} val base idx mem) - for { - off1 := v.AuxInt - sym := v.Aux - val := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - base := v_1.Args[0] - idx := v_2 - mem := v_3 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386ANDLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg4(val, base, idx, mem) - return true - } - // match: (ANDLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) - // cond: is32Bit(off1+off2*4) - // result: (ANDLloadidx4 [off1+off2*4] {sym} val base idx mem) - for { - off1 := v.AuxInt - sym := v.Aux - val := v_0 - base := v_1 - if v_2.Op != Op386ADDLconst { - break - } - off2 := v_2.AuxInt - idx := v_2.Args[0] - mem := v_3 - if !(is32Bit(off1 + off2*4)) { - break - } - v.reset(Op386ANDLloadidx4) - v.AuxInt = off1 + off2*4 - v.Aux = sym - v.AddArg4(val, base, idx, mem) - return true - } - // match: (ANDLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (ANDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - val := v_0 - if v_1.Op != Op386LEAL { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - base := v_1.Args[0] - idx := v_2 - mem := v_3 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386ANDLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(val, base, idx, mem) - return true - } return false } func rewriteValue386_Op386ANDLmodify(v *Value) bool { @@ -2341,107 +1756,6 @@ func rewriteValue386_Op386ANDLmodify(v *Value) bool { } return false } -func rewriteValue386_Op386ANDLmodifyidx4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (ANDLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) - // cond: is32Bit(off1+off2) - // result: (ANDLmodifyidx4 [off1+off2] {sym} base idx val mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - base := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386ANDLmodifyidx4) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg4(base, idx, val, mem) - return true - } - // match: (ANDLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) - // cond: is32Bit(off1+off2*4) - // result: (ANDLmodifyidx4 [off1+off2*4] {sym} base idx val mem) - for { - off1 := v.AuxInt - sym := v.Aux - base := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(off1 + off2*4)) { - break - } - v.reset(Op386ANDLmodifyidx4) - v.AuxInt = off1 + off2*4 - v.Aux = sym - v.AddArg4(base, idx, val, mem) - return true - } - // match: (ANDLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (ANDLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386ANDLmodifyidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(base, idx, val, mem) - return true - } - // match: (ANDLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) - // cond: validValAndOff(c,off) - // result: (ANDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - if v_2.Op != Op386MOVLconst { - break - } - c := v_2.AuxInt - mem := v_3 - if !(validValAndOff(c, off)) { - break - } - v.reset(Op386ANDLconstmodifyidx4) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - return false -} func rewriteValue386_Op386CMPB(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -3768,6 +3082,76 @@ func rewriteValue386_Op386LEAL1(v *Value) bool { } break } + // match: (LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} x y) + for { + off1 := v.AuxInt + sym1 := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != Op386LEAL1 { + continue + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + y := v_1.Args[1] + if y != v_1.Args[0] || !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + continue + } + v.reset(Op386LEAL2) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(x, y) + return true + } + break + } + // match: (LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y)) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} y x) + for { + off1 := v.AuxInt + sym1 := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != Op386LEAL1 { + continue + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if x != v_1_0 { + continue + } + y := v_1_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + continue + } + v.reset(Op386LEAL2) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(y, x) + return true + } + } + break + } + // match: (LEAL1 [0] {nil} x y) + // result: (ADDL x y) + for { + if v.AuxInt != 0 || v.Aux != nil { + break + } + x := v_0 + y := v_1 + v.reset(Op386ADDL) + v.AddArg2(x, y) + return true + } return false } func rewriteValue386_Op386LEAL2(v *Value) bool { @@ -3869,8 +3253,32 @@ func rewriteValue386_Op386LEAL2(v *Value) bool { v.AddArg2(x, y) return true } - return false -} + // match: (LEAL2 [off1] {sym} x (LEAL1 [off2] {nil} y y)) + // cond: is32Bit(off1+2*off2) + // result: (LEAL4 [off1+2*off2] {sym} x y) + for { + off1 := v.AuxInt + sym := v.Aux + x := v_0 + if v_1.Op != Op386LEAL1 { + break + } + off2 := v_1.AuxInt + if v_1.Aux != nil { + break + } + y := v_1.Args[1] + if y != v_1.Args[0] || !(is32Bit(off1 + 2*off2)) { + break + } + v.reset(Op386LEAL4) + v.AuxInt = off1 + 2*off2 + v.Aux = sym + v.AddArg2(x, y) + return true + } + return false +} func rewriteValue386_Op386LEAL4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -3954,6 +3362,30 @@ func rewriteValue386_Op386LEAL4(v *Value) bool { v.AddArg2(x, y) return true } + // match: (LEAL4 [off1] {sym} x (LEAL1 [off2] {nil} y y)) + // cond: is32Bit(off1+4*off2) + // result: (LEAL8 [off1+4*off2] {sym} x y) + for { + off1 := v.AuxInt + sym := v.Aux + x := v_0 + if v_1.Op != Op386LEAL1 { + break + } + off2 := v_1.AuxInt + if v_1.Aux != nil { + break + } + y := v_1.Args[1] + if y != v_1.Args[0] || !(is32Bit(off1 + 4*off2)) { + break + } + v.reset(Op386LEAL8) + v.AuxInt = off1 + 4*off2 + v.Aux = sym + v.AddArg2(x, y) + return true + } return false } func rewriteValue386_Op386LEAL8(v *Value) bool { @@ -4146,30 +3578,6 @@ func rewriteValue386_Op386MOVBLZX(v *Value) bool { v0.AddArg2(ptr, mem) return true } - // match: (MOVBLZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVBloadidx1 [off] {sym} ptr idx mem) - for { - x := v_0 - if x.Op != Op386MOVBloadidx1 { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[2] - ptr := x.Args[0] - idx := x.Args[1] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(v.Pos, Op386MOVBloadidx1, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg3(ptr, idx, mem) - return true - } // match: (MOVBLZX (ANDLconst [c] x)) // result: (ANDLconst [c & 0xff] x) for { @@ -4254,56 +3662,6 @@ func rewriteValue386_Op386MOVBload(v *Value) bool { v.AddArg2(base, mem) return true } - // match: (MOVBload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVBloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVBloadidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVBload [off] {sym} (ADDL ptr idx) mem) - // cond: ptr.Op != OpSB - // result: (MOVBloadidx1 [off] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDL { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - mem := v_1 - if !(ptr.Op != OpSB) { - continue - } - v.reset(Op386MOVBloadidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } // match: (MOVBload [off] {sym} (SB) _) // cond: symIsRO(sym) // result: (MOVLconst [int64(read8(sym, off))]) @@ -4319,54 +3677,6 @@ func rewriteValue386_Op386MOVBload(v *Value) bool { } return false } -func rewriteValue386_Op386MOVBloadidx1(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVBloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) - // result: (MOVBloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != Op386ADDLconst { - continue - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - v.reset(Op386MOVBloadidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVBloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) - // result: (MOVBloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != Op386ADDLconst { - continue - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVBloadidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - return false -} func rewriteValue386_Op386MOVBstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -4473,58 +3783,6 @@ func rewriteValue386_Op386MOVBstore(v *Value) bool { v.AddArg3(base, val, mem) return true } - // match: (MOVBstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVBstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVBstoreidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVBstore [off] {sym} (ADDL ptr idx) val mem) - // cond: ptr.Op != OpSB - // result: (MOVBstoreidx1 [off] {sym} ptr idx val mem) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDL { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - val := v_1 - mem := v_2 - if !(ptr.Op != OpSB) { - continue - } - v.reset(Op386MOVBstoreidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } // match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) // cond: x.Uses == 1 && clobber(x) // result: (MOVWstore [i-1] {s} p w mem) @@ -4657,6 +3915,134 @@ func rewriteValue386_Op386MOVBstore(v *Value) bool { v.AddArg3(p, w0, mem) return true } + // match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w mem) + for { + i := v.AuxInt + s := v.Aux + p1 := v_0 + if v_1.Op != Op386SHRWconst || v_1.AuxInt != 8 { + break + } + w := v_1.Args[0] + x := v_2 + if x.Op != Op386MOVBstore || x.AuxInt != i || x.Aux != s { + break + } + mem := x.Args[2] + p0 := x.Args[0] + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { + break + } + v.reset(Op386MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p0, w, mem) + return true + } + // match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w mem) + for { + i := v.AuxInt + s := v.Aux + p1 := v_0 + if v_1.Op != Op386SHRLconst || v_1.AuxInt != 8 { + break + } + w := v_1.Args[0] + x := v_2 + if x.Op != Op386MOVBstore || x.AuxInt != i || x.Aux != s { + break + } + mem := x.Args[2] + p0 := x.Args[0] + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { + break + } + v.reset(Op386MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p0, w, mem) + return true + } + // match: (MOVBstore [i] {s} p0 w x:(MOVBstore {s} [i] p1 (SHRWconst [8] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w mem) + for { + i := v.AuxInt + s := v.Aux + p0 := v_0 + w := v_1 + x := v_2 + if x.Op != Op386MOVBstore || x.AuxInt != i || x.Aux != s { + break + } + mem := x.Args[2] + p1 := x.Args[0] + x_1 := x.Args[1] + if x_1.Op != Op386SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { + break + } + v.reset(Op386MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p0, w, mem) + return true + } + // match: (MOVBstore [i] {s} p0 w x:(MOVBstore {s} [i] p1 (SHRLconst [8] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w mem) + for { + i := v.AuxInt + s := v.Aux + p0 := v_0 + w := v_1 + x := v_2 + if x.Op != Op386MOVBstore || x.AuxInt != i || x.Aux != s { + break + } + mem := x.Args[2] + p1 := x.Args[0] + x_1 := x.Args[1] + if x_1.Op != Op386SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { + break + } + v.reset(Op386MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p0, w, mem) + return true + } + // match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRLconst [j-8] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w0 mem) + for { + i := v.AuxInt + s := v.Aux + p1 := v_0 + if v_1.Op != Op386SHRLconst { + break + } + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != Op386MOVBstore || x.AuxInt != i || x.Aux != s { + break + } + mem := x.Args[2] + p0 := x.Args[0] + w0 := x.Args[1] + if w0.Op != Op386SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { + break + } + v.reset(Op386MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p0, w0, mem) + return true + } return false } func rewriteValue386_Op386MOVBstoreconst(v *Value) bool { @@ -4707,55 +4093,15 @@ func rewriteValue386_Op386MOVBstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVBstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem) - // cond: canMergeSym(sym1, sym2) - // result: (MOVBstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) + // match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) for { - x := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL1 { - break - } - off := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVBstoreconstidx1) - v.AuxInt = ValAndOff(x).add(off) - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVBstoreconst [x] {sym} (ADDL ptr idx) mem) - // result: (MOVBstoreconstidx1 [x] {sym} ptr idx mem) - for { - x := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDL { - break - } - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - v.reset(Op386MOVBstoreconstidx1) - v.AuxInt = x - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) - for { - c := v.AuxInt - s := v.Aux - p := v_0 - x := v_1 - if x.Op != Op386MOVBstoreconst { + c := v.AuxInt + s := v.Aux + p := v_0 + x := v_1 + if x.Op != Op386MOVBstoreconst { break } a := x.AuxInt @@ -4797,296 +4143,57 @@ func rewriteValue386_Op386MOVBstoreconst(v *Value) bool { v.AddArg2(p, mem) return true } - return false -} -func rewriteValue386_Op386MOVBstoreconstidx1(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVBstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) - // result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) - for { - x := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - c := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - v.reset(Op386MOVBstoreconstidx1) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVBstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem) - // result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) - for { - x := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { - break - } - c := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVBstoreconstidx1) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - // result: (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem) + // match: (MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) for { c := v.AuxInt s := v.Aux - p := v_0 - i := v_1 - x := v_2 - if x.Op != Op386MOVBstoreconstidx1 { + p1 := v_0 + x := v_1 + if x.Op != Op386MOVBstoreconst { break } a := x.AuxInt if x.Aux != s { break } - mem := x.Args[2] - if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { + mem := x.Args[1] + p0 := x.Args[0] + if !(x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 1) && clobber(x)) { break } - v.reset(Op386MOVWstoreconstidx1) + v.reset(Op386MOVWstoreconst) v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) v.Aux = s - v.AddArg3(p, i, mem) + v.AddArg2(p0, mem) return true } - return false -} -func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVBstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) - // result: (MOVBstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != Op386ADDLconst { - continue - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - v.reset(Op386MOVBstoreidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } - // match: (MOVBstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) - // result: (MOVBstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != Op386ADDLconst { - continue - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(Op386MOVBstoreidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } - // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstoreidx1 [i-1] {s} p idx w mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != Op386SHRLconst || v_2.AuxInt != 8 { - continue - } - w := v_2.Args[0] - x := v_3 - if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(Op386MOVWstoreidx1) - v.AuxInt = i - 1 - v.Aux = s - v.AddArg4(p, idx, w, mem) - return true - } - } - break - } - // match: (MOVBstoreidx1 [i] {s} p idx (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstoreidx1 [i-1] {s} p idx w mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != Op386SHRWconst || v_2.AuxInt != 8 { - continue - } - w := v_2.Args[0] - x := v_3 - if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(Op386MOVWstoreidx1) - v.AuxInt = i - 1 - v.Aux = s - v.AddArg4(p, idx, w, mem) - return true - } - } - break - } - // match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRLconst [8] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstoreidx1 [i] {s} p idx w mem) + // match: (MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) for { - i := v.AuxInt + a := v.AuxInt s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - w := v_2 - x := v_3 - if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i+1 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 { - continue - } - x_2 := x.Args[2] - if x_2.Op != Op386SHRLconst || x_2.AuxInt != 8 || w != x_2.Args[0] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(Op386MOVWstoreidx1) - v.AuxInt = i - v.Aux = s - v.AddArg4(p, idx, w, mem) - return true - } + p0 := v_0 + x := v_1 + if x.Op != Op386MOVBstoreconst { + break } - break - } - // match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRWconst [8] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstoreidx1 [i] {s} p idx w mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - w := v_2 - x := v_3 - if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i+1 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 { - continue - } - x_2 := x.Args[2] - if x_2.Op != Op386SHRWconst || x_2.AuxInt != 8 || w != x_2.Args[0] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(Op386MOVWstoreidx1) - v.AuxInt = i - v.Aux = s - v.AddArg4(p, idx, w, mem) - return true - } + c := x.AuxInt + if x.Aux != s { + break } - break - } - // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != Op386SHRLconst { - continue - } - j := v_2.AuxInt - w := v_2.Args[0] - x := v_3 - if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 { - continue - } - w0 := x.Args[2] - if w0.Op != Op386SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(Op386MOVWstoreidx1) - v.AuxInt = i - 1 - v.Aux = s - v.AddArg4(p, idx, w0, mem) - return true - } + mem := x.Args[1] + p1 := x.Args[0] + if !(x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 1) && clobber(x)) { + break } - break + v.reset(Op386MOVWstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) + v.Aux = s + v.AddArg2(p0, mem) + return true } return false } @@ -5158,252 +4265,69 @@ func rewriteValue386_Op386MOVLload(v *Value) bool { v.AddArg2(base, mem) return true } - // match: (MOVLload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVLloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) + // match: (MOVLload [off] {sym} (SB) _) + // cond: symIsRO(sym) + // result: (MOVLconst [int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder)))]) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + off := v.AuxInt + sym := v.Aux + if v_0.Op != OpSB || !(symIsRO(sym)) { break } - v.reset(Op386MOVLloadidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) + v.reset(Op386MOVLconst) + v.AuxInt = int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder))) return true } - // match: (MOVLload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) + return false +} +func rewriteValue386_Op386MOVLstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVLstore [off1+off2] {sym} ptr val mem) for { off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL4 { + sym := v.Aux + if v_0.Op != Op386ADDLconst { break } off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + val := v_1 + mem := v_2 + if !(is32Bit(off1 + off2)) { break } - v.reset(Op386MOVLloadidx4) + v.reset(Op386MOVLstore) v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) + v.Aux = sym + v.AddArg3(ptr, val, mem) return true } - // match: (MOVLload [off] {sym} (ADDL ptr idx) mem) - // cond: ptr.Op != OpSB - // result: (MOVLloadidx1 [off] {sym} ptr idx mem) + // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) + // cond: validOff(off) + // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem) for { off := v.AuxInt sym := v.Aux - if v_0.Op != Op386ADDL { + ptr := v_0 + if v_1.Op != Op386MOVLconst { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - mem := v_1 - if !(ptr.Op != OpSB) { - continue - } - v.reset(Op386MOVLloadidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true + c := v_1.AuxInt + mem := v_2 + if !(validOff(off)) { + break } - break - } - // match: (MOVLload [off] {sym} (SB) _) - // cond: symIsRO(sym) - // result: (MOVLconst [int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder)))]) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != OpSB || !(symIsRO(sym)) { - break - } - v.reset(Op386MOVLconst) - v.AuxInt = int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder))) - return true - } - return false -} -func rewriteValue386_Op386MOVLloadidx1(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) - // result: (MOVLloadidx4 [c] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != Op386SHLLconst || v_1.AuxInt != 2 { - continue - } - idx := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVLloadidx4) - v.AuxInt = c - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVLloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) - // result: (MOVLloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != Op386ADDLconst { - continue - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - v.reset(Op386MOVLloadidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVLloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) - // result: (MOVLloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != Op386ADDLconst { - continue - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVLloadidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - return false -} -func rewriteValue386_Op386MOVLloadidx4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVLloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem) - // result: (MOVLloadidx4 [int64(int32(c+d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - v.reset(Op386MOVLloadidx4) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVLloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem) - // result: (MOVLloadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVLloadidx4) - v.AuxInt = int64(int32(c + 4*d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - return false -} -func rewriteValue386_Op386MOVLstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem) - // cond: is32Bit(off1+off2) - // result: (MOVLstore [off1+off2] {sym} ptr val mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386MOVLstore) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg3(ptr, val, mem) - return true - } - // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) - // cond: validOff(off) - // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386MOVLconst { - break - } - c := v_1.AuxInt - mem := v_2 - if !(validOff(off)) { - break - } - v.reset(Op386MOVLstoreconst) - v.AuxInt = makeValAndOff(int64(int32(c)), off) - v.Aux = sym - v.AddArg2(ptr, mem) - return true + v.reset(Op386MOVLstoreconst) + v.AuxInt = makeValAndOff(int64(int32(c)), off) + v.Aux = sym + v.AddArg2(ptr, mem) + return true } // match: (MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) @@ -5428,82 +4352,6 @@ func rewriteValue386_Op386MOVLstore(v *Value) bool { v.AddArg3(base, val, mem) return true } - // match: (MOVLstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVLstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVLstoreidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVLstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVLstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL4 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVLstoreidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVLstore [off] {sym} (ADDL ptr idx) val mem) - // cond: ptr.Op != OpSB - // result: (MOVLstoreidx1 [off] {sym} ptr idx val mem) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDL { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - val := v_1 - mem := v_2 - if !(ptr.Op != OpSB) { - continue - } - v.reset(Op386MOVLstoreidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } // match: (MOVLstore {sym} [off] ptr y:(ADDLload x [off] {sym} ptr mem) mem) // cond: y.Uses==1 && clobber(y) // result: (ADDLmodify [off] {sym} ptr x mem) @@ -5912,2089 +4760,328 @@ func rewriteValue386_Op386MOVLstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVLstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem) - // cond: canMergeSym(sym1, sym2) - // result: (MOVLstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) + return false +} +func rewriteValue386_Op386MOVSDconst(v *Value) bool { + b := v.Block + config := b.Func.Config + typ := &b.Func.Config.Types + // match: (MOVSDconst [c]) + // cond: config.ctxt.Flag_shared + // result: (MOVSDconst2 (MOVSDconst1 [c])) + for { + c := v.AuxInt + if !(config.ctxt.Flag_shared) { + break + } + v.reset(Op386MOVSDconst2) + v0 := b.NewValue0(v.Pos, Op386MOVSDconst1, typ.UInt32) + v0.AuxInt = c + v.AddArg(v0) + return true + } + return false +} +func rewriteValue386_Op386MOVSDload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MOVSDload [off1] {sym} (ADDLconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVSDload [off1+off2] {sym} ptr mem) for { - x := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL1 { + off1 := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { break } - off := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] + off2 := v_0.AuxInt ptr := v_0.Args[0] mem := v_1 - if !(canMergeSym(sym1, sym2)) { + if !(is32Bit(off1 + off2)) { break } - v.reset(Op386MOVLstoreconstidx1) - v.AuxInt = ValAndOff(x).add(off) - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) + v.reset(Op386MOVSDload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg2(ptr, mem) return true } - // match: (MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem) - // cond: canMergeSym(sym1, sym2) - // result: (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) + // match: (MOVSDload [off1] {sym1} (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - x := v.AuxInt + off1 := v.AuxInt sym1 := v.Aux - if v_0.Op != Op386LEAL4 { + if v_0.Op != Op386LEAL { break } - off := v_0.AuxInt + off2 := v_0.AuxInt sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] + base := v_0.Args[0] mem := v_1 - if !(canMergeSym(sym1, sym2)) { + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(Op386MOVLstoreconstidx4) - v.AuxInt = ValAndOff(x).add(off) + v.reset(Op386MOVSDload) + v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVLstoreconst [x] {sym} (ADDL ptr idx) mem) - // result: (MOVLstoreconstidx1 [x] {sym} ptr idx mem) - for { - x := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDL { - break - } - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - v.reset(Op386MOVLstoreconstidx1) - v.AuxInt = x - v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg2(base, mem) return true } return false } -func rewriteValue386_Op386MOVLstoreconstidx1(v *Value) bool { +func rewriteValue386_Op386MOVSDstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) - // result: (MOVLstoreconstidx4 [c] {sym} ptr idx mem) + b := v.Block + config := b.Func.Config + // match: (MOVSDstore [off1] {sym} (ADDLconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVSDstore [off1+off2] {sym} ptr val mem) for { - c := v.AuxInt + off1 := v.AuxInt sym := v.Aux - ptr := v_0 - if v_1.Op != Op386SHLLconst || v_1.AuxInt != 2 { + if v_0.Op != Op386ADDLconst { break } - idx := v_1.Args[0] + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v_1 mem := v_2 - v.reset(Op386MOVLstoreconstidx4) - v.AuxInt = c + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386MOVSDstore) + v.AuxInt = off1 + off2 v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg3(ptr, val, mem) return true } - // match: (MOVLstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) - // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) + // match: (MOVSDstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - x := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { break } - c := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v_1 mem := v_2 - v.reset(Op386MOVLstoreconstidx1) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386MOVSDstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(base, val, mem) return true } - // match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem) - // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) + return false +} +func rewriteValue386_Op386MOVSSconst(v *Value) bool { + b := v.Block + config := b.Func.Config + typ := &b.Func.Config.Types + // match: (MOVSSconst [c]) + // cond: config.ctxt.Flag_shared + // result: (MOVSSconst2 (MOVSSconst1 [c])) for { - x := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { + c := v.AuxInt + if !(config.ctxt.Flag_shared) { break } - c := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVLstoreconstidx1) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.reset(Op386MOVSSconst2) + v0 := b.NewValue0(v.Pos, Op386MOVSSconst1, typ.UInt32) + v0.AuxInt = c + v.AddArg(v0) return true } return false } -func rewriteValue386_Op386MOVLstoreconstidx4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValue386_Op386MOVSSload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem) - // result: (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem) + b := v.Block + config := b.Func.Config + // match: (MOVSSload [off1] {sym} (ADDLconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVSSload [off1+off2] {sym} ptr mem) for { - x := v.AuxInt + off1 := v.AuxInt sym := v.Aux if v_0.Op != Op386ADDLconst { break } - c := v_0.AuxInt + off2 := v_0.AuxInt ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - v.reset(Op386MOVLstoreconstidx4) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem) - // result: (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem) - for { - x := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { + mem := v_1 + if !(is32Bit(off1 + off2)) { break } - c := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVLstoreconstidx4) - v.AuxInt = ValAndOff(x).add(4 * c) + v.reset(Op386MOVSSload) + v.AuxInt = off1 + off2 v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg2(ptr, mem) return true } - return false -} -func rewriteValue386_Op386MOVLstoreidx1(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem) - // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != Op386SHLLconst || v_1.AuxInt != 2 { - continue - } - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(Op386MOVLstoreidx4) - v.AuxInt = c - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } - // match: (MOVLstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) - // result: (MOVLstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) + // match: (MOVSSload [off1] {sym1} (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != Op386ADDLconst { - continue - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - v.reset(Op386MOVLstoreidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break } - break - } - // match: (MOVLstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) - // result: (MOVLstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != Op386ADDLconst { - continue - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(Op386MOVLstoreidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break } - break + v.reset(Op386MOVSSload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(base, mem) + return true } return false } -func rewriteValue386_Op386MOVLstoreidx4(v *Value) bool { - v_3 := v.Args[3] +func rewriteValue386_Op386MOVSSstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MOVLstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem) - // result: (MOVLstoreidx4 [int64(int32(c+d))] {sym} ptr idx val mem) + b := v.Block + config := b.Func.Config + // match: (MOVSSstore [off1] {sym} (ADDLconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVSSstore [off1+off2] {sym} ptr val mem) for { - c := v.AuxInt + off1 := v.AuxInt sym := v.Aux if v_0.Op != Op386ADDLconst { break } - d := v_0.AuxInt + off2 := v_0.AuxInt ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - v.reset(Op386MOVLstoreidx4) - v.AuxInt = int64(int32(c + d)) + val := v_1 + mem := v_2 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386MOVSSstore) + v.AuxInt = off1 + off2 v.Aux = sym - v.AddArg4(ptr, idx, val, mem) + v.AddArg3(ptr, val, mem) return true } - // match: (MOVLstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem) - // result: (MOVLstoreidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem) + // match: (MOVSSstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { break } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(Op386MOVLstoreidx4) - v.AuxInt = int64(int32(c + 4*d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386MOVSSstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(base, val, mem) return true } - // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ADDLloadidx4 x [off] {sym} ptr idx mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (ADDLmodifyidx4 [off] {sym} ptr idx x mem) + return false +} +func rewriteValue386_Op386MOVWLSX(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (MOVWLSX x:(MOVWload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWLSXload [off] {sym} ptr mem) for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - y := v_2 - if y.Op != Op386ADDLloadidx4 || y.AuxInt != off || y.Aux != sym { + x := v_0 + if x.Op != Op386MOVWload { break } - mem := y.Args[3] - x := y.Args[0] - if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) { + off := x.AuxInt + sym := x.Aux + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - v.reset(Op386ADDLmodifyidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, x, mem) + b = x.Block + v0 := b.NewValue0(x.Pos, Op386MOVWLSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg2(ptr, mem) return true } - // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ANDLloadidx4 x [off] {sym} ptr idx mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (ANDLmodifyidx4 [off] {sym} ptr idx x mem) + // match: (MOVWLSX (ANDLconst [c] x)) + // cond: c & 0x8000 == 0 + // result: (ANDLconst [c & 0x7fff] x) for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - y := v_2 - if y.Op != Op386ANDLloadidx4 || y.AuxInt != off || y.Aux != sym { + if v_0.Op != Op386ANDLconst { break } - mem := y.Args[3] - x := y.Args[0] - if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) { + c := v_0.AuxInt + x := v_0.Args[0] + if !(c&0x8000 == 0) { break } - v.reset(Op386ANDLmodifyidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, x, mem) + v.reset(Op386ANDLconst) + v.AuxInt = c & 0x7fff + v.AddArg(x) return true } - // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ORLloadidx4 x [off] {sym} ptr idx mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (ORLmodifyidx4 [off] {sym} ptr idx x mem) + return false +} +func rewriteValue386_Op386MOVWLSXload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MOVWLSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVWLSX x) for { off := v.AuxInt sym := v.Aux ptr := v_0 - idx := v_1 - y := v_2 - if y.Op != Op386ORLloadidx4 || y.AuxInt != off || y.Aux != sym { + if v_1.Op != Op386MOVWstore { break } - mem := y.Args[3] - x := y.Args[0] - if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) { - break - } - v.reset(Op386ORLmodifyidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, x, mem) - return true - } - // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(XORLloadidx4 x [off] {sym} ptr idx mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (XORLmodifyidx4 [off] {sym} ptr idx x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - y := v_2 - if y.Op != Op386XORLloadidx4 || y.AuxInt != off || y.Aux != sym { - break - } - mem := y.Args[3] - x := y.Args[0] - if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) { - break - } - v.reset(Op386XORLmodifyidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, x, mem) - return true - } - // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ADDL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (ADDLmodifyidx4 [off] {sym} ptr idx x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - y := v_2 - if y.Op != Op386ADDL { - break - } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { - continue - } - mem := l.Args[2] - if ptr != l.Args[0] || idx != l.Args[1] { - continue - } - x := y_1 - if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(Op386ADDLmodifyidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, x, mem) - return true - } - break - } - // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(SUBL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (SUBLmodifyidx4 [off] {sym} ptr idx x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - y := v_2 - if y.Op != Op386SUBL { - break - } - x := y.Args[1] - l := y.Args[0] - if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[2] - if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - break - } - v.reset(Op386SUBLmodifyidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, x, mem) - return true - } - // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ANDL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (ANDLmodifyidx4 [off] {sym} ptr idx x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - y := v_2 - if y.Op != Op386ANDL { - break - } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { - continue - } - mem := l.Args[2] - if ptr != l.Args[0] || idx != l.Args[1] { - continue - } - x := y_1 - if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(Op386ANDLmodifyidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, x, mem) - return true - } - break - } - // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ORL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (ORLmodifyidx4 [off] {sym} ptr idx x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - y := v_2 - if y.Op != Op386ORL { - break - } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { - continue - } - mem := l.Args[2] - if ptr != l.Args[0] || idx != l.Args[1] { - continue - } - x := y_1 - if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(Op386ORLmodifyidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, x, mem) - return true - } - break - } - // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(XORL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (XORLmodifyidx4 [off] {sym} ptr idx x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - y := v_2 - if y.Op != Op386XORL { - break - } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { - continue - } - mem := l.Args[2] - if ptr != l.Args[0] || idx != l.Args[1] { - continue - } - x := y_1 - if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(Op386XORLmodifyidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, x, mem) - return true - } - break - } - // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ADDLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) - // result: (ADDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - y := v_2 - if y.Op != Op386ADDLconst { - break - } - c := y.AuxInt - l := y.Args[0] - if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[2] - if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) { - break - } - v.reset(Op386ADDLconstmodifyidx4) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ANDLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) - // result: (ANDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - y := v_2 - if y.Op != Op386ANDLconst { - break - } - c := y.AuxInt - l := y.Args[0] - if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[2] - if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) { - break - } - v.reset(Op386ANDLconstmodifyidx4) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ORLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) - // result: (ORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - y := v_2 - if y.Op != Op386ORLconst { - break - } - c := y.AuxInt - l := y.Args[0] - if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[2] - if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) { - break - } - v.reset(Op386ORLconstmodifyidx4) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(XORLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) - // result: (XORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - y := v_2 - if y.Op != Op386XORLconst { - break - } - c := y.AuxInt - l := y.Args[0] - if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[2] - if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) { - break - } - v.reset(Op386XORLconstmodifyidx4) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - return false -} -func rewriteValue386_Op386MOVSDconst(v *Value) bool { - b := v.Block - config := b.Func.Config - typ := &b.Func.Config.Types - // match: (MOVSDconst [c]) - // cond: config.ctxt.Flag_shared - // result: (MOVSDconst2 (MOVSDconst1 [c])) - for { - c := v.AuxInt - if !(config.ctxt.Flag_shared) { - break - } - v.reset(Op386MOVSDconst2) - v0 := b.NewValue0(v.Pos, Op386MOVSDconst1, typ.UInt32) - v0.AuxInt = c - v.AddArg(v0) - return true - } - return false -} -func rewriteValue386_Op386MOVSDload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVSDload [off1] {sym} (ADDLconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) - // result: (MOVSDload [off1+off2] {sym} ptr mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386MOVSDload) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVSDload [off1] {sym1} (LEAL [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386MOVSDload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) - return true - } - // match: (MOVSDload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSDloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVSDloadidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL8 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVSDloadidx8) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSDload [off] {sym} (ADDL ptr idx) mem) - // cond: ptr.Op != OpSB - // result: (MOVSDloadidx1 [off] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDL { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - mem := v_1 - if !(ptr.Op != OpSB) { - continue - } - v.reset(Op386MOVSDloadidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - return false -} -func rewriteValue386_Op386MOVSDloadidx1(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSDloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) - // result: (MOVSDloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - v.reset(Op386MOVSDloadidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSDloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) - // result: (MOVSDloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVSDloadidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - return false -} -func rewriteValue386_Op386MOVSDloadidx8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem) - // result: (MOVSDloadidx8 [int64(int32(c+d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - v.reset(Op386MOVSDloadidx8) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem) - // result: (MOVSDloadidx8 [int64(int32(c+8*d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVSDloadidx8) - v.AuxInt = int64(int32(c + 8*d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - return false -} -func rewriteValue386_Op386MOVSDstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVSDstore [off1] {sym} (ADDLconst [off2] ptr) val mem) - // cond: is32Bit(off1+off2) - // result: (MOVSDstore [off1+off2] {sym} ptr val mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386MOVSDstore) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg3(ptr, val, mem) - return true - } - // match: (MOVSDstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386MOVSDstore) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, val, mem) - return true - } - // match: (MOVSDstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSDstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVSDstoreidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL8 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVSDstoreidx8) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSDstore [off] {sym} (ADDL ptr idx) val mem) - // cond: ptr.Op != OpSB - // result: (MOVSDstoreidx1 [off] {sym} ptr idx val mem) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDL { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - val := v_1 - mem := v_2 - if !(ptr.Op != OpSB) { - continue - } - v.reset(Op386MOVSDstoreidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } - return false -} -func rewriteValue386_Op386MOVSDstoreidx1(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSDstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) - // result: (MOVSDstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - v.reset(Op386MOVSDstoreidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSDstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) - // result: (MOVSDstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(Op386MOVSDstoreidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - return false -} -func rewriteValue386_Op386MOVSDstoreidx8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem) - // result: (MOVSDstoreidx8 [int64(int32(c+d))] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - v.reset(Op386MOVSDstoreidx8) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem) - // result: (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(Op386MOVSDstoreidx8) - v.AuxInt = int64(int32(c + 8*d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - return false -} -func rewriteValue386_Op386MOVSSconst(v *Value) bool { - b := v.Block - config := b.Func.Config - typ := &b.Func.Config.Types - // match: (MOVSSconst [c]) - // cond: config.ctxt.Flag_shared - // result: (MOVSSconst2 (MOVSSconst1 [c])) - for { - c := v.AuxInt - if !(config.ctxt.Flag_shared) { - break - } - v.reset(Op386MOVSSconst2) - v0 := b.NewValue0(v.Pos, Op386MOVSSconst1, typ.UInt32) - v0.AuxInt = c - v.AddArg(v0) - return true - } - return false -} -func rewriteValue386_Op386MOVSSload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVSSload [off1] {sym} (ADDLconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) - // result: (MOVSSload [off1+off2] {sym} ptr mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386MOVSSload) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVSSload [off1] {sym1} (LEAL [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386MOVSSload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) - return true - } - // match: (MOVSSload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSSloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVSSloadidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSSload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSSloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL4 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVSSloadidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSSload [off] {sym} (ADDL ptr idx) mem) - // cond: ptr.Op != OpSB - // result: (MOVSSloadidx1 [off] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDL { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - mem := v_1 - if !(ptr.Op != OpSB) { - continue - } - v.reset(Op386MOVSSloadidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - return false -} -func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSSloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) - // result: (MOVSSloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - v.reset(Op386MOVSSloadidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSSloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) - // result: (MOVSSloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVSSloadidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - return false -} -func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSSloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem) - // result: (MOVSSloadidx4 [int64(int32(c+d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - v.reset(Op386MOVSSloadidx4) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSSloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem) - // result: (MOVSSloadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVSSloadidx4) - v.AuxInt = int64(int32(c + 4*d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - return false -} -func rewriteValue386_Op386MOVSSstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVSSstore [off1] {sym} (ADDLconst [off2] ptr) val mem) - // cond: is32Bit(off1+off2) - // result: (MOVSSstore [off1+off2] {sym} ptr val mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386MOVSSstore) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg3(ptr, val, mem) - return true - } - // match: (MOVSSstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386MOVSSstore) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, val, mem) - return true - } - // match: (MOVSSstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSSstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVSSstoreidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSSstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSSstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL4 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVSSstoreidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSSstore [off] {sym} (ADDL ptr idx) val mem) - // cond: ptr.Op != OpSB - // result: (MOVSSstoreidx1 [off] {sym} ptr idx val mem) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDL { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - val := v_1 - mem := v_2 - if !(ptr.Op != OpSB) { - continue - } - v.reset(Op386MOVSSstoreidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } - return false -} -func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSSstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) - // result: (MOVSSstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - v.reset(Op386MOVSSstoreidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSSstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) - // result: (MOVSSstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(Op386MOVSSstoreidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - return false -} -func rewriteValue386_Op386MOVSSstoreidx4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSSstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem) - // result: (MOVSSstoreidx4 [int64(int32(c+d))] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - v.reset(Op386MOVSSstoreidx4) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSSstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem) - // result: (MOVSSstoreidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(Op386MOVSSstoreidx4) - v.AuxInt = int64(int32(c + 4*d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - return false -} -func rewriteValue386_Op386MOVWLSX(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (MOVWLSX x:(MOVWload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWLSXload [off] {sym} ptr mem) - for { - x := v_0 - if x.Op != Op386MOVWload { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(x.Pos, Op386MOVWLSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg2(ptr, mem) - return true - } - // match: (MOVWLSX (ANDLconst [c] x)) - // cond: c & 0x8000 == 0 - // result: (ANDLconst [c & 0x7fff] x) - for { - if v_0.Op != Op386ANDLconst { - break - } - c := v_0.AuxInt - x := v_0.Args[0] - if !(c&0x8000 == 0) { - break - } - v.reset(Op386ANDLconst) - v.AuxInt = c & 0x7fff - v.AddArg(x) - return true - } - return false -} -func rewriteValue386_Op386MOVWLSXload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVWLSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVWLSX x) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386MOVWstore { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - x := v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { - break - } - v.reset(Op386MOVWLSX) - v.AddArg(x) - return true - } - // match: (MOVWLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386MOVWLSXload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) - return true - } - return false -} -func rewriteValue386_Op386MOVWLZX(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (MOVWLZX x:(MOVWload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWload [off] {sym} ptr mem) - for { - x := v_0 - if x.Op != Op386MOVWload { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(x.Pos, Op386MOVWload, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg2(ptr, mem) - return true - } - // match: (MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWloadidx1 [off] {sym} ptr idx mem) - for { - x := v_0 - if x.Op != Op386MOVWloadidx1 { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[2] - ptr := x.Args[0] - idx := x.Args[1] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWloadidx2 [off] {sym} ptr idx mem) - for { - x := v_0 - if x.Op != Op386MOVWloadidx2 { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[2] - ptr := x.Args[0] - idx := x.Args[1] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(v.Pos, Op386MOVWloadidx2, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVWLZX (ANDLconst [c] x)) - // result: (ANDLconst [c & 0xffff] x) - for { - if v_0.Op != Op386ANDLconst { - break - } - c := v_0.AuxInt - x := v_0.Args[0] - v.reset(Op386ANDLconst) - v.AuxInt = c & 0xffff - v.AddArg(x) - return true - } - return false -} -func rewriteValue386_Op386MOVWload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVWLZX x) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386MOVWstore { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - x := v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { - break - } - v.reset(Op386MOVWLZX) - v.AddArg(x) - return true - } - // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) - // result: (MOVWload [off1+off2] {sym} ptr mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386MOVWload) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386MOVWload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) - return true - } - // match: (MOVWload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVWloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVWloadidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL2 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVWloadidx2) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVWload [off] {sym} (ADDL ptr idx) mem) - // cond: ptr.Op != OpSB - // result: (MOVWloadidx1 [off] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDL { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - mem := v_1 - if !(ptr.Op != OpSB) { - continue - } - v.reset(Op386MOVWloadidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVWload [off] {sym} (SB) _) - // cond: symIsRO(sym) - // result: (MOVLconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))]) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != OpSB || !(symIsRO(sym)) { - break - } - v.reset(Op386MOVLconst) - v.AuxInt = int64(read16(sym, off, config.ctxt.Arch.ByteOrder)) - return true - } - return false -} -func rewriteValue386_Op386MOVWloadidx1(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) - // result: (MOVWloadidx2 [c] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != Op386SHLLconst || v_1.AuxInt != 1 { - continue - } - idx := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVWloadidx2) - v.AuxInt = c - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVWloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) - // result: (MOVWloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != Op386ADDLconst { - continue - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - v.reset(Op386MOVWloadidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVWloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) - // result: (MOVWloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != Op386ADDLconst { - continue - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVWloadidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - return false -} -func rewriteValue386_Op386MOVWloadidx2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem) - // result: (MOVWloadidx2 [int64(int32(c+d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - v.reset(Op386MOVWloadidx2) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem) - // result: (MOVWloadidx2 [int64(int32(c+2*d))] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVWloadidx2) - v.AuxInt = int64(int32(c + 2*d)) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - return false -} -func rewriteValue386_Op386MOVWstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVWstore [off] {sym} ptr (MOVWLSX x) mem) - // result: (MOVWstore [off] {sym} ptr x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386MOVWLSX { - break - } - x := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVWstore) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) - return true - } - // match: (MOVWstore [off] {sym} ptr (MOVWLZX x) mem) - // result: (MOVWstore [off] {sym} ptr x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386MOVWLZX { - break - } - x := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVWstore) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) - return true - } - // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) - // cond: is32Bit(off1+off2) - // result: (MOVWstore [off1+off2] {sym} ptr val mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386MOVWstore) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg3(ptr, val, mem) - return true - } - // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem) - // cond: validOff(off) - // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386MOVLconst { - break - } - c := v_1.AuxInt - mem := v_2 - if !(validOff(off)) { + off2 := v_1.AuxInt + sym2 := v_1.Aux + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { break } - v.reset(Op386MOVWstoreconst) - v.AuxInt = makeValAndOff(int64(int16(c)), off) - v.Aux = sym - v.AddArg2(ptr, mem) + v.reset(Op386MOVWLSX) + v.AddArg(x) return true } - // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // match: (MOVWLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (MOVWLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { off1 := v.AuxInt sym1 := v.Aux @@ -8004,718 +5091,508 @@ func rewriteValue386_Op386MOVWstore(v *Value) bool { off2 := v_0.AuxInt sym2 := v_0.Aux base := v_0.Args[0] - val := v_1 - mem := v_2 + mem := v_1 if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(Op386MOVWstore) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, val, mem) - return true - } - // match: (MOVWstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVWstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVWstoreidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL2 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVWstoreidx2) + v.reset(Op386MOVWLSXload) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) + v.AddArg2(base, mem) return true } - // match: (MOVWstore [off] {sym} (ADDL ptr idx) val mem) - // cond: ptr.Op != OpSB - // result: (MOVWstoreidx1 [off] {sym} ptr idx val mem) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDL { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - val := v_1 - mem := v_2 - if !(ptr.Op != OpSB) { - continue - } - v.reset(Op386MOVWstoreidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } - // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) + return false +} +func rewriteValue386_Op386MOVWLZX(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (MOVWLZX x:(MOVWload [off] {sym} ptr mem)) // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstore [i-2] {s} p w mem) + // result: @x.Block (MOVWload [off] {sym} ptr mem) for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != Op386SHRLconst || v_1.AuxInt != 16 { - break - } - w := v_1.Args[0] - x := v_2 - if x.Op != Op386MOVWstore || x.AuxInt != i-2 || x.Aux != s { + x := v_0 + if x.Op != Op386MOVWload { break } - mem := x.Args[2] - if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + off := x.AuxInt + sym := x.Aux + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - v.reset(Op386MOVLstore) - v.AuxInt = i - 2 - v.Aux = s - v.AddArg3(p, w, mem) + b = x.Block + v0 := b.NewValue0(x.Pos, Op386MOVWload, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg2(ptr, mem) return true } - // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstore [i-2] {s} p w0 mem) + // match: (MOVWLZX (ANDLconst [c] x)) + // result: (ANDLconst [c & 0xffff] x) for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != Op386SHRLconst { - break - } - j := v_1.AuxInt - w := v_1.Args[0] - x := v_2 - if x.Op != Op386MOVWstore || x.AuxInt != i-2 || x.Aux != s { - break - } - mem := x.Args[2] - if p != x.Args[0] { - break - } - w0 := x.Args[1] - if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + if v_0.Op != Op386ANDLconst { break } - v.reset(Op386MOVLstore) - v.AuxInt = i - 2 - v.Aux = s - v.AddArg3(p, w0, mem) + c := v_0.AuxInt + x := v_0.Args[0] + v.reset(Op386ANDLconst) + v.AuxInt = c & 0xffff + v.AddArg(x) return true } return false } -func rewriteValue386_Op386MOVWstoreconst(v *Value) bool { +func rewriteValue386_Op386MOVWload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block config := b.Func.Config - // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) - // cond: ValAndOff(sc).canAdd(off) - // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) - for { - sc := v.AuxInt - s := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off := v_0.AuxInt - ptr := v_0.Args[0] - mem := v_1 - if !(ValAndOff(sc).canAdd(off)) { - break - } - v.reset(Op386MOVWstoreconst) - v.AuxInt = ValAndOff(sc).add(off) - v.Aux = s - v.AddArg2(ptr, mem) - return true - } - // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) - // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) - for { - sc := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off := v_0.AuxInt - sym2 := v_0.Aux - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386MOVWstoreconst) - v.AuxInt = ValAndOff(sc).add(off) - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(ptr, mem) - return true - } - // match: (MOVWstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem) - // cond: canMergeSym(sym1, sym2) - // result: (MOVWstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) - for { - x := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL1 { - break - } - off := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386MOVWstoreconstidx1) - v.AuxInt = ValAndOff(x).add(off) - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem) - // cond: canMergeSym(sym1, sym2) - // result: (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) + // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVWLZX x) for { - x := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL2 { - break - } - off := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2)) { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386MOVWstore { break } - v.reset(Op386MOVWstoreconstidx2) - v.AuxInt = ValAndOff(x).add(off) - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVWstoreconst [x] {sym} (ADDL ptr idx) mem) - // result: (MOVWstoreconstidx1 [x] {sym} ptr idx mem) - for { - x := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDL { + off2 := v_1.AuxInt + sym2 := v_1.Aux + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { break } - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - v.reset(Op386MOVWstoreconstidx1) - v.AuxInt = x - v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.reset(Op386MOVWLZX) + v.AddArg(x) return true } - // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) + // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVWload [off1+off2] {sym} ptr mem) for { - c := v.AuxInt - s := v.Aux - p := v_0 - x := v_1 - if x.Op != Op386MOVWstoreconst { - break - } - a := x.AuxInt - if x.Aux != s { + off1 := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { break } - mem := x.Args[1] - if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1 + off2)) { break } - v.reset(Op386MOVLstoreconst) - v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) - v.Aux = s - v.AddArg2(p, mem) + v.reset(Op386MOVWload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg2(ptr, mem) return true } - // match: (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) + // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - a := v.AuxInt - s := v.Aux - p := v_0 - x := v_1 - if x.Op != Op386MOVWstoreconst { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { break } - c := x.AuxInt - if x.Aux != s { + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { break } - mem := x.Args[1] - if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + v.reset(Op386MOVWload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(base, mem) + return true + } + // match: (MOVWload [off] {sym} (SB) _) + // cond: symIsRO(sym) + // result: (MOVLconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))]) + for { + off := v.AuxInt + sym := v.Aux + if v_0.Op != OpSB || !(symIsRO(sym)) { break } - v.reset(Op386MOVLstoreconst) - v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) - v.Aux = s - v.AddArg2(p, mem) + v.reset(Op386MOVLconst) + v.AuxInt = int64(read16(sym, off, config.ctxt.Arch.ByteOrder)) return true } return false } -func rewriteValue386_Op386MOVWstoreconstidx1(v *Value) bool { +func rewriteValue386_Op386MOVWstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) - // result: (MOVWstoreconstidx2 [c] {sym} ptr idx mem) + b := v.Block + config := b.Func.Config + // match: (MOVWstore [off] {sym} ptr (MOVWLSX x) mem) + // result: (MOVWstore [off] {sym} ptr x mem) for { - c := v.AuxInt + off := v.AuxInt sym := v.Aux ptr := v_0 - if v_1.Op != Op386SHLLconst || v_1.AuxInt != 1 { + if v_1.Op != Op386MOVWLSX { break } - idx := v_1.Args[0] + x := v_1.Args[0] mem := v_2 - v.reset(Op386MOVWstoreconstidx2) - v.AuxInt = c + v.reset(Op386MOVWstore) + v.AuxInt = off v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg3(ptr, x, mem) return true } - // match: (MOVWstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) - // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) + // match: (MOVWstore [off] {sym} ptr (MOVWLZX x) mem) + // result: (MOVWstore [off] {sym} ptr x mem) for { - x := v.AuxInt + off := v.AuxInt sym := v.Aux - if v_0.Op != Op386ADDLconst { + ptr := v_0 + if v_1.Op != Op386MOVWLZX { break } - c := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 + x := v_1.Args[0] mem := v_2 - v.reset(Op386MOVWstoreconstidx1) - v.AuxInt = ValAndOff(x).add(c) + v.reset(Op386MOVWstore) + v.AuxInt = off v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg3(ptr, x, mem) return true } - // match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem) - // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) + // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVWstore [off1+off2] {sym} ptr val mem) for { - x := v.AuxInt + off1 := v.AuxInt sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { + if v_0.Op != Op386ADDLconst { break } - c := v_1.AuxInt - idx := v_1.Args[0] + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v_1 mem := v_2 - v.reset(Op386MOVWstoreconstidx1) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem) - for { - c := v.AuxInt - s := v.Aux - p := v_0 - i := v_1 - x := v_2 - if x.Op != Op386MOVWstoreconstidx1 { - break - } - a := x.AuxInt - if x.Aux != s { - break - } - mem := x.Args[2] - if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + if !(is32Bit(off1 + off2)) { break } - v.reset(Op386MOVLstoreconstidx1) - v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) - v.Aux = s - v.AddArg3(p, i, mem) + v.reset(Op386MOVWstore) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg3(ptr, val, mem) return true } - return false -} -func rewriteValue386_Op386MOVWstoreconstidx2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem) - // result: (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem) + // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem) + // cond: validOff(off) + // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) for { - x := v.AuxInt + off := v.AuxInt sym := v.Aux - if v_0.Op != Op386ADDLconst { + ptr := v_0 + if v_1.Op != Op386MOVLconst { break } - c := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 + c := v_1.AuxInt mem := v_2 - v.reset(Op386MOVWstoreconstidx2) - v.AuxInt = ValAndOff(x).add(c) + if !(validOff(off)) { + break + } + v.reset(Op386MOVWstoreconst) + v.AuxInt = makeValAndOff(int64(int16(c)), off) v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg2(ptr, mem) return true } - // match: (MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem) - // result: (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem) + // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - x := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { break } - c := v_1.AuxInt - idx := v_1.Args[0] + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v_1 mem := v_2 - v.reset(Op386MOVWstoreconstidx2) - v.AuxInt = ValAndOff(x).add(2 * c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386MOVWstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(base, val, mem) return true } - // match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst [1] i) mem) + // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstore [i-2] {s} p w mem) for { - c := v.AuxInt + i := v.AuxInt s := v.Aux p := v_0 - i := v_1 - x := v_2 - if x.Op != Op386MOVWstoreconstidx2 { + if v_1.Op != Op386SHRLconst || v_1.AuxInt != 16 { break } - a := x.AuxInt - if x.Aux != s { + w := v_1.Args[0] + x := v_2 + if x.Op != Op386MOVWstore || x.AuxInt != i-2 || x.Aux != s { break } mem := x.Args[2] - if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { break } - v.reset(Op386MOVLstoreconstidx1) - v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) + v.reset(Op386MOVLstore) + v.AuxInt = i - 2 v.Aux = s - v0 := b.NewValue0(v.Pos, Op386SHLLconst, i.Type) - v0.AuxInt = 1 - v0.AddArg(i) - v.AddArg3(p, v0, mem) + v.AddArg3(p, w, mem) return true } - return false -} -func rewriteValue386_Op386MOVWstoreidx1(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem) - // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem) + // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstore [i-2] {s} p w0 mem) for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != Op386SHLLconst || v_1.AuxInt != 1 { - continue - } - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(Op386MOVWstoreidx2) - v.AuxInt = c - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != Op386SHRLconst { + break } - break - } - // match: (MOVWstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) - // result: (MOVWstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != Op386ADDLconst { - continue - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - v.reset(Op386MOVWstoreidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != Op386MOVWstore || x.AuxInt != i-2 || x.Aux != s { + break } - break - } - // match: (MOVWstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) - // result: (MOVWstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != Op386ADDLconst { - continue - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(Op386MOVWstoreidx1) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + mem := x.Args[2] + if p != x.Args[0] { + break } - break + w0 := x.Args[1] + if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p, w0, mem) + return true } - // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstoreidx1 [i-2] {s} p idx w mem) + // match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x) + // result: (MOVLstore [i] {s} p0 w mem) for { i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != Op386SHRLconst || v_2.AuxInt != 16 { - continue - } - w := v_2.Args[0] - x := v_3 - if x.Op != Op386MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(Op386MOVLstoreidx1) - v.AuxInt = i - 2 - v.Aux = s - v.AddArg4(p, idx, w, mem) - return true - } + s := v.Aux + p1 := v_0 + if v_1.Op != Op386SHRLconst || v_1.AuxInt != 16 { + break } - break + w := v_1.Args[0] + x := v_2 + if x.Op != Op386MOVWstore || x.AuxInt != i || x.Aux != s { + break + } + mem := x.Args[2] + p0 := x.Args[0] + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) { + break + } + v.reset(Op386MOVLstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p0, w, mem) + return true } - // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem) + // match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRLconst [j-16] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x) + // result: (MOVLstore [i] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != Op386SHRLconst { - continue - } - j := v_2.AuxInt - w := v_2.Args[0] - x := v_3 - if x.Op != Op386MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 { - continue - } - w0 := x.Args[2] - if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(Op386MOVLstoreidx1) - v.AuxInt = i - 2 - v.Aux = s - v.AddArg4(p, idx, w0, mem) - return true - } + p1 := v_0 + if v_1.Op != Op386SHRLconst { + break } - break + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != Op386MOVWstore || x.AuxInt != i || x.Aux != s { + break + } + mem := x.Args[2] + p0 := x.Args[0] + w0 := x.Args[1] + if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) { + break + } + v.reset(Op386MOVLstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p0, w0, mem) + return true } return false } -func rewriteValue386_Op386MOVWstoreidx2(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValue386_Op386MOVWstoreconst(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem) - // result: (MOVWstoreidx2 [int64(int32(c+d))] {sym} ptr idx val mem) + config := b.Func.Config + // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd(off) + // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) for { - c := v.AuxInt - sym := v.Aux + sc := v.AuxInt + s := v.Aux if v_0.Op != Op386ADDLconst { break } - d := v_0.AuxInt + off := v_0.AuxInt ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - v.reset(Op386MOVWstoreidx2) - v.AuxInt = int64(int32(c + d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) + mem := v_1 + if !(ValAndOff(sc).canAdd(off)) { + break + } + v.reset(Op386MOVWstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = s + v.AddArg2(ptr, mem) return true } - // match: (MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem) - // result: (MOVWstoreidx2 [int64(int32(c+2*d))] {sym} ptr idx val mem) + // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386ADDLconst { + sc := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { break } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(Op386MOVWstoreidx2) - v.AuxInt = int64(int32(c + 2*d)) - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) + off := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386MOVWstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(ptr, mem) return true } - // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst [1] idx) w mem) + // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) for { - i := v.AuxInt + c := v.AuxInt s := v.Aux p := v_0 - idx := v_1 - if v_2.Op != Op386SHRLconst || v_2.AuxInt != 16 { + x := v_1 + if x.Op != Op386MOVWstoreconst { break } - w := v_2.Args[0] - x := v_3 - if x.Op != Op386MOVWstoreidx2 || x.AuxInt != i-2 || x.Aux != s { + a := x.AuxInt + if x.Aux != s { break } - mem := x.Args[3] - if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { + mem := x.Args[1] + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { break } - v.reset(Op386MOVLstoreidx1) - v.AuxInt = i - 2 + v.reset(Op386MOVLstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) v.Aux = s - v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type) - v0.AuxInt = 1 - v0.AddArg(idx) - v.AddArg4(p, v0, w, mem) + v.AddArg2(p, mem) return true } - // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst [1] idx) w0 mem) + // match: (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) for { - i := v.AuxInt + a := v.AuxInt s := v.Aux p := v_0 - idx := v_1 - if v_2.Op != Op386SHRLconst { + x := v_1 + if x.Op != Op386MOVWstoreconst { break } - j := v_2.AuxInt - w := v_2.Args[0] - x := v_3 - if x.Op != Op386MOVWstoreidx2 || x.AuxInt != i-2 || x.Aux != s { + c := x.AuxInt + if x.Aux != s { break } - mem := x.Args[3] - if p != x.Args[0] || idx != x.Args[1] { + mem := x.Args[1] + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { break } - w0 := x.Args[2] - if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + v.reset(Op386MOVLstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) + v.Aux = s + v.AddArg2(p, mem) + return true + } + // match: (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 2) && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) + for { + c := v.AuxInt + s := v.Aux + p1 := v_0 + x := v_1 + if x.Op != Op386MOVWstoreconst { break } - v.reset(Op386MOVLstoreidx1) - v.AuxInt = i - 2 + a := x.AuxInt + if x.Aux != s { + break + } + mem := x.Args[1] + p0 := x.Args[0] + if !(x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 2) && clobber(x)) { + break + } + v.reset(Op386MOVLstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) + v.Aux = s + v.AddArg2(p0, mem) + return true + } + // match: (MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 2) && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) + for { + a := v.AuxInt + s := v.Aux + p0 := v_0 + x := v_1 + if x.Op != Op386MOVWstoreconst { + break + } + c := x.AuxInt + if x.Aux != s { + break + } + mem := x.Args[1] + p1 := x.Args[0] + if !(x.Uses == 1 && ValAndOff(a).Off() == ValAndOff(c).Off() && sequentialAddresses(p0, p1, 2) && clobber(x)) { + break + } + v.reset(Op386MOVLstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) v.Aux = s - v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type) - v0.AuxInt = 1 - v0.AddArg(idx) - v.AddArg4(p, v0, w0, mem) + v.AddArg2(p0, mem) return true } return false @@ -8764,32 +5641,6 @@ func rewriteValue386_Op386MULL(v *Value) bool { } break } - // match: (MULL x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (MULLloadidx4 x [off] {sym} ptr idx mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != Op386MOVLloadidx4 { - continue - } - off := l.AuxInt - sym := l.Aux - mem := l.Args[2] - ptr := l.Args[0] - idx := l.Args[1] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { - continue - } - v.reset(Op386MULLloadidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(x, ptr, idx, mem) - return true - } - break - } return false } func rewriteValue386_Op386MULLconst(v *Value) bool { @@ -9201,108 +6052,30 @@ func rewriteValue386_Op386MULLconst(v *Value) bool { v0.AddArg2(x, x) v.AddArg(v0) return true - } - // match: (MULLconst [c] (MOVLconst [d])) - // result: (MOVLconst [int64(int32(c*d))]) - for { - c := v.AuxInt - if v_0.Op != Op386MOVLconst { - break - } - d := v_0.AuxInt - v.reset(Op386MOVLconst) - v.AuxInt = int64(int32(c * d)) - return true - } - return false -} -func rewriteValue386_Op386MULLload(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MULLload [off1] {sym} val (ADDLconst [off2] base) mem) - // cond: is32Bit(off1+off2) - // result: (MULLload [off1+off2] {sym} val base mem) - for { - off1 := v.AuxInt - sym := v.Aux - val := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386MULLload) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg3(val, base, mem) - return true - } - // match: (MULLload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MULLload [off1+off2] {mergeSym(sym1,sym2)} val base mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - val := v_0 - if v_1.Op != Op386LEAL { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386MULLload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(val, base, mem) - return true - } - // match: (MULLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MULLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - val := v_0 - if v_1.Op != Op386LEAL4 { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - idx := v_1.Args[1] - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + } + // match: (MULLconst [c] (MOVLconst [d])) + // result: (MOVLconst [int64(int32(c*d))]) + for { + c := v.AuxInt + if v_0.Op != Op386MOVLconst { break } - v.reset(Op386MULLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(val, ptr, idx, mem) + d := v_0.AuxInt + v.reset(Op386MOVLconst) + v.AuxInt = int64(int32(c * d)) return true } return false } -func rewriteValue386_Op386MULLloadidx4(v *Value) bool { - v_3 := v.Args[3] +func rewriteValue386_Op386MULLload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block config := b.Func.Config - // match: (MULLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) + // match: (MULLload [off1] {sym} val (ADDLconst [off2] base) mem) // cond: is32Bit(off1+off2) - // result: (MULLloadidx4 [off1+off2] {sym} val base idx mem) + // result: (MULLload [off1+off2] {sym} val base mem) for { off1 := v.AuxInt sym := v.Aux @@ -9312,43 +6085,19 @@ func rewriteValue386_Op386MULLloadidx4(v *Value) bool { } off2 := v_1.AuxInt base := v_1.Args[0] - idx := v_2 - mem := v_3 + mem := v_2 if !(is32Bit(off1 + off2)) { break } - v.reset(Op386MULLloadidx4) + v.reset(Op386MULLload) v.AuxInt = off1 + off2 v.Aux = sym - v.AddArg4(val, base, idx, mem) - return true - } - // match: (MULLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) - // cond: is32Bit(off1+off2*4) - // result: (MULLloadidx4 [off1+off2*4] {sym} val base idx mem) - for { - off1 := v.AuxInt - sym := v.Aux - val := v_0 - base := v_1 - if v_2.Op != Op386ADDLconst { - break - } - off2 := v_2.AuxInt - idx := v_2.Args[0] - mem := v_3 - if !(is32Bit(off1 + off2*4)) { - break - } - v.reset(Op386MULLloadidx4) - v.AuxInt = off1 + off2*4 - v.Aux = sym - v.AddArg4(val, base, idx, mem) + v.AddArg3(val, base, mem) return true } - // match: (MULLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem) + // match: (MULLload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MULLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem) + // result: (MULLload [off1+off2] {mergeSym(sym1,sym2)} val base mem) for { off1 := v.AuxInt sym1 := v.Aux @@ -9359,15 +6108,14 @@ func rewriteValue386_Op386MULLloadidx4(v *Value) bool { off2 := v_1.AuxInt sym2 := v_1.Aux base := v_1.Args[0] - idx := v_2 - mem := v_3 + mem := v_2 if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(Op386MULLloadidx4) + v.reset(Op386MULLload) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg4(val, base, idx, mem) + v.AddArg3(val, base, mem) return true } return false @@ -9692,32 +6440,6 @@ func rewriteValue386_Op386ORL(v *Value) bool { } break } - // match: (ORL x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (ORLloadidx4 x [off] {sym} ptr idx mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != Op386MOVLloadidx4 { - continue - } - off := l.AuxInt - sym := l.Aux - mem := l.Args[2] - ptr := l.Args[0] - idx := l.Args[1] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { - continue - } - v.reset(Op386ORLloadidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(x, ptr, idx, mem) - return true - } - break - } // match: (ORL x x) // result: x for { @@ -9767,6 +6489,42 @@ func rewriteValue386_Op386ORL(v *Value) bool { } break } + // match: (ORL x0:(MOVBload [i] {s} p0 mem) s0:(SHLLconst [8] x1:(MOVBload [i] {s} p1 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0) + // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x0 := v_0 + if x0.Op != Op386MOVBload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + s0 := v_1 + if s0.Op != Op386SHLLconst || s0.AuxInt != 8 { + continue + } + x1 := s0.Args[0] + if x1.Op != Op386MOVBload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] + p1 := x1.Args[0] + if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x1.Pos, Op386MOVWload, typ.UInt16) + v.copyOf(v0) + v0.AuxInt = i + v0.Aux = s + v0.AddArg2(p0, mem) + return true + } + break + } // match: (ORL o0:(ORL x0:(MOVWload [i0] {s} p mem) s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem))) s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem))) // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0) // result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem) @@ -9831,57 +6589,9 @@ func rewriteValue386_Op386ORL(v *Value) bool { } break } - // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem))) - // cond: i1==i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0) - // result: @mergePoint(b,x0,x1) (MOVWloadidx1 [i0] {s} p idx mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x0 := v_0 - if x0.Op != Op386MOVBloadidx1 { - continue - } - i0 := x0.AuxInt - s := x0.Aux - mem := x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 { - p := x0_0 - idx := x0_1 - s0 := v_1 - if s0.Op != Op386SHLLconst || s0.AuxInt != 8 { - continue - } - x1 := s0.Args[0] - if x1.Op != Op386MOVBloadidx1 { - continue - } - i1 := x1.AuxInt - if x1.Aux != s { - continue - } - _ = x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 { - if p != x1_0 || idx != x1_1 || mem != x1.Args[2] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0)) { - continue - } - b = mergePoint(b, x0, x1) - v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type) - v.copyOf(v0) - v0.AuxInt = i0 - v0.Aux = s - v0.AddArg3(p, idx, mem) - return true - } - } - } - break - } - // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem))) - // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0) - // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 [i0] {s} p idx mem) + // match: (ORL o0:(ORL x0:(MOVWload [i] {s} p0 mem) s0:(SHLLconst [16] x1:(MOVBload [i] {s} p1 mem))) s1:(SHLLconst [24] x2:(MOVBload [i] {s} p2 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && sequentialAddresses(p0, p1, 2) && sequentialAddresses(p1, p2, 1) && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0) + // result: @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p0 mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { o0 := v_0 @@ -9893,65 +6603,46 @@ func rewriteValue386_Op386ORL(v *Value) bool { o0_1 := o0.Args[1] for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 { x0 := o0_0 - if x0.Op != Op386MOVWloadidx1 { + if x0.Op != Op386MOVWload { continue } - i0 := x0.AuxInt + i := x0.AuxInt s := x0.Aux - mem := x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i2 := 0; _i2 <= 1; _i2, x0_0, x0_1 = _i2+1, x0_1, x0_0 { - p := x0_0 - idx := x0_1 - s0 := o0_1 - if s0.Op != Op386SHLLconst || s0.AuxInt != 16 { - continue - } - x1 := s0.Args[0] - if x1.Op != Op386MOVBloadidx1 { - continue - } - i2 := x1.AuxInt - if x1.Aux != s { - continue - } - _ = x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i3 := 0; _i3 <= 1; _i3, x1_0, x1_1 = _i3+1, x1_1, x1_0 { - if p != x1_0 || idx != x1_1 || mem != x1.Args[2] { - continue - } - s1 := v_1 - if s1.Op != Op386SHLLconst || s1.AuxInt != 24 { - continue - } - x2 := s1.Args[0] - if x2.Op != Op386MOVBloadidx1 { - continue - } - i3 := x2.AuxInt - if x2.Aux != s { - continue - } - _ = x2.Args[2] - x2_0 := x2.Args[0] - x2_1 := x2.Args[1] - for _i4 := 0; _i4 <= 1; _i4, x2_0, x2_1 = _i4+1, x2_1, x2_0 { - if p != x2_0 || idx != x2_1 || mem != x2.Args[2] || !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) { - continue - } - b = mergePoint(b, x0, x1, x2) - v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type) - v.copyOf(v0) - v0.AuxInt = i0 - v0.Aux = s - v0.AddArg3(p, idx, mem) - return true - } - } + mem := x0.Args[1] + p0 := x0.Args[0] + s0 := o0_1 + if s0.Op != Op386SHLLconst || s0.AuxInt != 16 { + continue + } + x1 := s0.Args[0] + if x1.Op != Op386MOVBload || x1.AuxInt != i || x1.Aux != s { + continue } + _ = x1.Args[1] + p1 := x1.Args[0] + if mem != x1.Args[1] { + continue + } + s1 := v_1 + if s1.Op != Op386SHLLconst || s1.AuxInt != 24 { + continue + } + x2 := s1.Args[0] + if x2.Op != Op386MOVBload || x2.AuxInt != i || x2.Aux != s { + continue + } + _ = x2.Args[1] + p2 := x2.Args[0] + if mem != x2.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && sequentialAddresses(p0, p1, 2) && sequentialAddresses(p1, p2, 1) && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) { + continue + } + b = mergePoint(b, x0, x1, x2) + v0 := b.NewValue0(x2.Pos, Op386MOVLload, typ.UInt32) + v.copyOf(v0) + v0.AuxInt = i + v0.Aux = s + v0.AddArg2(p0, mem) + return true } } break @@ -10048,168 +6739,15 @@ func rewriteValue386_Op386ORLconstmodify(v *Value) bool { } return false } -func rewriteValue386_Op386ORLconstmodifyidx4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (ORLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) - // cond: ValAndOff(valoff1).canAdd(off2) - // result: (ORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem) - for { - valoff1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - base := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(ValAndOff(valoff1).canAdd(off2)) { - break - } - v.reset(Op386ORLconstmodifyidx4) - v.AuxInt = ValAndOff(valoff1).add(off2) - v.Aux = sym - v.AddArg3(base, idx, mem) - return true - } - // match: (ORLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) - // cond: ValAndOff(valoff1).canAdd(off2*4) - // result: (ORLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem) - for { - valoff1 := v.AuxInt - sym := v.Aux - base := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(ValAndOff(valoff1).canAdd(off2 * 4)) { - break - } - v.reset(Op386ORLconstmodifyidx4) - v.AuxInt = ValAndOff(valoff1).add(off2 * 4) - v.Aux = sym - v.AddArg3(base, idx, mem) - return true - } - // match: (ORLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem) - // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (ORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem) - for { - valoff1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386ORLconstmodifyidx4) - v.AuxInt = ValAndOff(valoff1).add(off2) - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, idx, mem) - return true - } - return false -} func rewriteValue386_Op386ORLload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block config := b.Func.Config - // match: (ORLload [off1] {sym} val (ADDLconst [off2] base) mem) - // cond: is32Bit(off1+off2) - // result: (ORLload [off1+off2] {sym} val base mem) - for { - off1 := v.AuxInt - sym := v.Aux - val := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386ORLload) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg3(val, base, mem) - return true - } - // match: (ORLload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (ORLload [off1+off2] {mergeSym(sym1,sym2)} val base mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - val := v_0 - if v_1.Op != Op386LEAL { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386ORLload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(val, base, mem) - return true - } - // match: (ORLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (ORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - val := v_0 - if v_1.Op != Op386LEAL4 { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - idx := v_1.Args[1] - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386ORLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(val, ptr, idx, mem) - return true - } - return false -} -func rewriteValue386_Op386ORLloadidx4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (ORLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) + // match: (ORLload [off1] {sym} val (ADDLconst [off2] base) mem) // cond: is32Bit(off1+off2) - // result: (ORLloadidx4 [off1+off2] {sym} val base idx mem) + // result: (ORLload [off1+off2] {sym} val base mem) for { off1 := v.AuxInt sym := v.Aux @@ -10219,43 +6757,19 @@ func rewriteValue386_Op386ORLloadidx4(v *Value) bool { } off2 := v_1.AuxInt base := v_1.Args[0] - idx := v_2 - mem := v_3 + mem := v_2 if !(is32Bit(off1 + off2)) { break } - v.reset(Op386ORLloadidx4) + v.reset(Op386ORLload) v.AuxInt = off1 + off2 v.Aux = sym - v.AddArg4(val, base, idx, mem) - return true - } - // match: (ORLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) - // cond: is32Bit(off1+off2*4) - // result: (ORLloadidx4 [off1+off2*4] {sym} val base idx mem) - for { - off1 := v.AuxInt - sym := v.Aux - val := v_0 - base := v_1 - if v_2.Op != Op386ADDLconst { - break - } - off2 := v_2.AuxInt - idx := v_2.Args[0] - mem := v_3 - if !(is32Bit(off1 + off2*4)) { - break - } - v.reset(Op386ORLloadidx4) - v.AuxInt = off1 + off2*4 - v.Aux = sym - v.AddArg4(val, base, idx, mem) + v.AddArg3(val, base, mem) return true } - // match: (ORLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem) + // match: (ORLload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (ORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem) + // result: (ORLload [off1+off2] {mergeSym(sym1,sym2)} val base mem) for { off1 := v.AuxInt sym1 := v.Aux @@ -10266,15 +6780,14 @@ func rewriteValue386_Op386ORLloadidx4(v *Value) bool { off2 := v_1.AuxInt sym2 := v_1.Aux base := v_1.Args[0] - idx := v_2 - mem := v_3 + mem := v_2 if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(Op386ORLloadidx4) + v.reset(Op386ORLload) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg4(val, base, idx, mem) + v.AddArg3(val, base, mem) return true } return false @@ -10332,107 +6845,6 @@ func rewriteValue386_Op386ORLmodify(v *Value) bool { } return false } -func rewriteValue386_Op386ORLmodifyidx4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (ORLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) - // cond: is32Bit(off1+off2) - // result: (ORLmodifyidx4 [off1+off2] {sym} base idx val mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - base := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386ORLmodifyidx4) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg4(base, idx, val, mem) - return true - } - // match: (ORLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) - // cond: is32Bit(off1+off2*4) - // result: (ORLmodifyidx4 [off1+off2*4] {sym} base idx val mem) - for { - off1 := v.AuxInt - sym := v.Aux - base := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(off1 + off2*4)) { - break - } - v.reset(Op386ORLmodifyidx4) - v.AuxInt = off1 + off2*4 - v.Aux = sym - v.AddArg4(base, idx, val, mem) - return true - } - // match: (ORLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (ORLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386ORLmodifyidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(base, idx, val, mem) - return true - } - // match: (ORLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) - // cond: validValAndOff(c,off) - // result: (ORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - if v_2.Op != Op386MOVLconst { - break - } - c := v_2.AuxInt - mem := v_3 - if !(validValAndOff(c, off)) { - break - } - v.reset(Op386ORLconstmodifyidx4) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - return false -} func rewriteValue386_Op386ROLBconst(v *Value) bool { v_0 := v.Args[0] // match: (ROLBconst [c] (ROLBconst [d] x)) @@ -11629,29 +8041,6 @@ func rewriteValue386_Op386SUBL(v *Value) bool { v.AddArg3(x, ptr, mem) return true } - // match: (SUBL x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (SUBLloadidx4 x [off] {sym} ptr idx mem) - for { - x := v_0 - l := v_1 - if l.Op != Op386MOVLloadidx4 { - break - } - off := l.AuxInt - sym := l.Aux - mem := l.Args[2] - ptr := l.Args[0] - idx := l.Args[1] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { - break - } - v.reset(Op386SUBLloadidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(x, ptr, idx, mem) - return true - } // match: (SUBL x x) // result: (MOVLconst [0]) for { @@ -11759,220 +8148,39 @@ func rewriteValue386_Op386SUBLload(v *Value) bool { v.AddArg3(val, base, mem) return true } - // match: (SUBLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (SUBLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - val := v_0 - if v_1.Op != Op386LEAL4 { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - idx := v_1.Args[1] - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386SUBLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(val, ptr, idx, mem) - return true - } - return false -} -func rewriteValue386_Op386SUBLloadidx4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (SUBLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) - // cond: is32Bit(off1+off2) - // result: (SUBLloadidx4 [off1+off2] {sym} val base idx mem) - for { - off1 := v.AuxInt - sym := v.Aux - val := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - base := v_1.Args[0] - idx := v_2 - mem := v_3 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386SUBLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg4(val, base, idx, mem) - return true - } - // match: (SUBLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) - // cond: is32Bit(off1+off2*4) - // result: (SUBLloadidx4 [off1+off2*4] {sym} val base idx mem) - for { - off1 := v.AuxInt - sym := v.Aux - val := v_0 - base := v_1 - if v_2.Op != Op386ADDLconst { - break - } - off2 := v_2.AuxInt - idx := v_2.Args[0] - mem := v_3 - if !(is32Bit(off1 + off2*4)) { - break - } - v.reset(Op386SUBLloadidx4) - v.AuxInt = off1 + off2*4 - v.Aux = sym - v.AddArg4(val, base, idx, mem) - return true - } - // match: (SUBLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (SUBLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - val := v_0 - if v_1.Op != Op386LEAL { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - base := v_1.Args[0] - idx := v_2 - mem := v_3 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386SUBLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(val, base, idx, mem) - return true - } return false } func rewriteValue386_Op386SUBLmodify(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (SUBLmodify [off1] {sym} (ADDLconst [off2] base) val mem) - // cond: is32Bit(off1+off2) - // result: (SUBLmodify [off1+off2] {sym} base val mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386SUBLmodify) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg3(base, val, mem) - return true - } - // match: (SUBLmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (SUBLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386SUBLmodify) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, val, mem) - return true - } - return false -} -func rewriteValue386_Op386SUBLmodifyidx4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (SUBLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) - // cond: is32Bit(off1+off2) - // result: (SUBLmodifyidx4 [off1+off2] {sym} base idx val mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - base := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386SUBLmodifyidx4) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg4(base, idx, val, mem) - return true - } - // match: (SUBLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) - // cond: is32Bit(off1+off2*4) - // result: (SUBLmodifyidx4 [off1+off2*4] {sym} base idx val mem) + b := v.Block + config := b.Func.Config + // match: (SUBLmodify [off1] {sym} (ADDLconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (SUBLmodify [off1+off2] {sym} base val mem) for { off1 := v.AuxInt sym := v.Aux - base := v_0 - if v_1.Op != Op386ADDLconst { + if v_0.Op != Op386ADDLconst { break } - off2 := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(off1 + off2*4)) { + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1 + off2)) { break } - v.reset(Op386SUBLmodifyidx4) - v.AuxInt = off1 + off2*4 + v.reset(Op386SUBLmodify) + v.AuxInt = off1 + off2 v.Aux = sym - v.AddArg4(base, idx, val, mem) + v.AddArg3(base, val, mem) return true } - // match: (SUBLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem) + // match: (SUBLmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem) // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (SUBLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem) + // result: (SUBLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := v.AuxInt sym1 := v.Aux @@ -11982,38 +8190,15 @@ func rewriteValue386_Op386SUBLmodifyidx4(v *Value) bool { off2 := v_0.AuxInt sym2 := v_0.Aux base := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 + val := v_1 + mem := v_2 if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(Op386SUBLmodifyidx4) + v.reset(Op386SUBLmodify) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg4(base, idx, val, mem) - return true - } - // match: (SUBLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) - // cond: validValAndOff(-c,off) - // result: (ADDLconstmodifyidx4 [makeValAndOff(-c,off)] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - if v_2.Op != Op386MOVLconst { - break - } - c := v_2.AuxInt - mem := v_3 - if !(validValAndOff(-c, off)) { - break - } - v.reset(Op386ADDLconstmodifyidx4) - v.AuxInt = makeValAndOff(-c, off) - v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg3(base, val, mem) return true } return false @@ -12300,32 +8485,6 @@ func rewriteValue386_Op386XORL(v *Value) bool { } break } - // match: (XORL x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (XORLloadidx4 x [off] {sym} ptr idx mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != Op386MOVLloadidx4 { - continue - } - off := l.AuxInt - sym := l.Aux - mem := l.Args[2] - ptr := l.Args[0] - idx := l.Args[1] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { - continue - } - v.reset(Op386XORLloadidx4) - v.AuxInt = off - v.Aux = sym - v.AddArg4(x, ptr, idx, mem) - return true - } - break - } // match: (XORL x x) // result: (MOVLconst [0]) for { @@ -12431,81 +8590,6 @@ func rewriteValue386_Op386XORLconstmodify(v *Value) bool { } return false } -func rewriteValue386_Op386XORLconstmodifyidx4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (XORLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) - // cond: ValAndOff(valoff1).canAdd(off2) - // result: (XORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem) - for { - valoff1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - base := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(ValAndOff(valoff1).canAdd(off2)) { - break - } - v.reset(Op386XORLconstmodifyidx4) - v.AuxInt = ValAndOff(valoff1).add(off2) - v.Aux = sym - v.AddArg3(base, idx, mem) - return true - } - // match: (XORLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) - // cond: ValAndOff(valoff1).canAdd(off2*4) - // result: (XORLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem) - for { - valoff1 := v.AuxInt - sym := v.Aux - base := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(ValAndOff(valoff1).canAdd(off2 * 4)) { - break - } - v.reset(Op386XORLconstmodifyidx4) - v.AuxInt = ValAndOff(valoff1).add(off2 * 4) - v.Aux = sym - v.AddArg3(base, idx, mem) - return true - } - // match: (XORLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem) - // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (XORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem) - for { - valoff1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386XORLconstmodifyidx4) - v.AuxInt = ValAndOff(valoff1).add(off2) - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, idx, mem) - return true - } - return false -} func rewriteValue386_Op386XORLload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -12557,109 +8641,6 @@ func rewriteValue386_Op386XORLload(v *Value) bool { v.AddArg3(val, base, mem) return true } - // match: (XORLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (XORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - val := v_0 - if v_1.Op != Op386LEAL4 { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - idx := v_1.Args[1] - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(Op386XORLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(val, ptr, idx, mem) - return true - } - return false -} -func rewriteValue386_Op386XORLloadidx4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (XORLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) - // cond: is32Bit(off1+off2) - // result: (XORLloadidx4 [off1+off2] {sym} val base idx mem) - for { - off1 := v.AuxInt - sym := v.Aux - val := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - base := v_1.Args[0] - idx := v_2 - mem := v_3 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386XORLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg4(val, base, idx, mem) - return true - } - // match: (XORLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) - // cond: is32Bit(off1+off2*4) - // result: (XORLloadidx4 [off1+off2*4] {sym} val base idx mem) - for { - off1 := v.AuxInt - sym := v.Aux - val := v_0 - base := v_1 - if v_2.Op != Op386ADDLconst { - break - } - off2 := v_2.AuxInt - idx := v_2.Args[0] - mem := v_3 - if !(is32Bit(off1 + off2*4)) { - break - } - v.reset(Op386XORLloadidx4) - v.AuxInt = off1 + off2*4 - v.Aux = sym - v.AddArg4(val, base, idx, mem) - return true - } - // match: (XORLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (XORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - val := v_0 - if v_1.Op != Op386LEAL { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - base := v_1.Args[0] - idx := v_2 - mem := v_3 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386XORLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(val, base, idx, mem) - return true - } return false } func rewriteValue386_Op386XORLmodify(v *Value) bool { @@ -12715,107 +8696,6 @@ func rewriteValue386_Op386XORLmodify(v *Value) bool { } return false } -func rewriteValue386_Op386XORLmodifyidx4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (XORLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) - // cond: is32Bit(off1+off2) - // result: (XORLmodifyidx4 [off1+off2] {sym} base idx val mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - off2 := v_0.AuxInt - base := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386XORLmodifyidx4) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg4(base, idx, val, mem) - return true - } - // match: (XORLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) - // cond: is32Bit(off1+off2*4) - // result: (XORLmodifyidx4 [off1+off2*4] {sym} base idx val mem) - for { - off1 := v.AuxInt - sym := v.Aux - base := v_0 - if v_1.Op != Op386ADDLconst { - break - } - off2 := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(off1 + off2*4)) { - break - } - v.reset(Op386XORLmodifyidx4) - v.AuxInt = off1 + off2*4 - v.Aux = sym - v.AddArg4(base, idx, val, mem) - return true - } - // match: (XORLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (XORLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { - break - } - v.reset(Op386XORLmodifyidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(base, idx, val, mem) - return true - } - // match: (XORLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) - // cond: validValAndOff(c,off) - // result: (XORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - idx := v_1 - if v_2.Op != Op386MOVLconst { - break - } - c := v_2.AuxInt - mem := v_3 - if !(validValAndOff(c, off)) { - break - } - v.reset(Op386XORLconstmodifyidx4) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - return false -} func rewriteValue386_OpConstNil(v *Value) bool { // match: (ConstNil) // result: (MOVLconst [0]) diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index 13873b2ac8..d6213e8741 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -3983,7 +3983,7 @@ func rewriteValuegeneric_OpConvert(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (Convert (Add64 (Convert ptr mem) off) mem) - // result: (Add64 ptr off) + // result: (AddPtr ptr off) for { if v_0.Op != OpAdd64 { break @@ -4001,14 +4001,14 @@ func rewriteValuegeneric_OpConvert(v *Value) bool { if mem != v_1 { continue } - v.reset(OpAdd64) + v.reset(OpAddPtr) v.AddArg2(ptr, off) return true } break } // match: (Convert (Add32 (Convert ptr mem) off) mem) - // result: (Add32 ptr off) + // result: (AddPtr ptr off) for { if v_0.Op != OpAdd32 { break @@ -4026,7 +4026,7 @@ func rewriteValuegeneric_OpConvert(v *Value) bool { if mem != v_1 { continue } - v.reset(OpAdd32) + v.reset(OpAddPtr) v.AddArg2(ptr, off) return true } diff --git a/test/codegen/memops.go b/test/codegen/memops.go index 9d18153a29..0df191480d 100644 --- a/test/codegen/memops.go +++ b/test/codegen/memops.go @@ -99,46 +99,61 @@ func compMem3(x, y *int) (int, bool) { func idxInt8(x, y []int8, i int) { var t int8 // amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*` + // 386: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*` t = x[i+1] // amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` + // 386: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` y[i+1] = t // amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` + // 386: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` x[i+1] = 77 } func idxInt16(x, y []int16, i int) { var t int16 // amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*` + // 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*` t = x[i+1] // amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` + // 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` y[i+1] = t // amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*` + // 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*` t = x[16*i+1] // amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` + // 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` y[16*i+1] = t // amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` + // 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` x[i+1] = 77 // amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` + // 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` x[16*i+1] = 77 } func idxInt32(x, y []int32, i int) { var t int32 // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + // 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` t = x[i+1] // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + // 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` y[i+1] = t // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` t = x[2*i+1] // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` y[2*i+1] = t // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*` + // 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*` t = x[16*i+1] // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + // 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` y[16*i+1] = t // amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + // 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` x[i+1] = 77 // amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + // 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` x[16*i+1] = 77 } @@ -160,24 +175,71 @@ func idxInt64(x, y []int64, i int) { func idxFloat32(x, y []float32, i int) { var t float32 - // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + // 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` t = x[i+1] - // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + // 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` y[i+1] = t - // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` + // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` + // 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` t = x[16*i+1] - // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + // 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` y[16*i+1] = t } func idxFloat64(x, y []float64, i int) { var t float64 - // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + // 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` t = x[i+1] - // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + // 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` y[i+1] = t - // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` + // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` + // 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` t = x[16*i+1] - // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` + // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` + // 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` y[16*i+1] = t } + +func idxLoadPlusOp(x []int32, i int) int32 { + s := x[0] + // 386: `ADDL\t4\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` + s += x[i+1] + // 386: `SUBL\t8\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` + s -= x[i+2] + // 386: `IMULL\t12\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` + s *= x[i+3] + // 386: `ANDL\t16\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` + s &= x[i+4] + // 386: `ORL\t20\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` + s |= x[i+5] + // 386: `XORL\t24\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` + s ^= x[i+6] + return s +} + +func idxStorePlusOp(x []int32, i int, v int32) { + // 386: `ADDL\t[A-Z]+, 4\([A-Z]+\)\([A-Z]+\*4\)` + x[i+1] += v + // 386: `SUBL\t[A-Z]+, 8\([A-Z]+\)\([A-Z]+\*4\)` + x[i+2] -= v + // 386: `ANDL\t[A-Z]+, 12\([A-Z]+\)\([A-Z]+\*4\)` + x[i+3] &= v + // 386: `ORL\t[A-Z]+, 16\([A-Z]+\)\([A-Z]+\*4\)` + x[i+4] |= v + // 386: `XORL\t[A-Z]+, 20\([A-Z]+\)\([A-Z]+\*4\)` + x[i+5] ^= v + + // 386: `ADDL\t[$]77, 24\([A-Z]+\)\([A-Z]+\*4\)` + x[i+6] += 77 + // 386: `ANDL\t[$]77, 28\([A-Z]+\)\([A-Z]+\*4\)` + x[i+7] &= 77 + // 386: `ORL\t[$]77, 32\([A-Z]+\)\([A-Z]+\*4\)` + x[i+8] |= 77 + // 386: `XORL\t[$]77, 36\([A-Z]+\)\([A-Z]+\*4\)` + x[i+9] ^= 77 +} -- cgit v1.2.3-54-g00ecf From f5558bb2f580ed40374a98e8db6fd58ae79f6e1d Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Fri, 27 Mar 2020 10:09:26 -0700 Subject: os/exec: add temporary debugging code for #25628 On linux-386 builders run the TestExtraFiles subprocess under strace, in hopes of finding out where the unexpected descriptor is coming from. For #25628 Change-Id: I9a62d6a5192a076525a616ccc71de74bbe7ebd58 Reviewed-on: https://go-review.googlesource.com/c/go/+/225799 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Bryan C. Mills --- src/os/exec/exec_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/os/exec/exec_test.go b/src/os/exec/exec_test.go index 91dd0a41ac..9d6069093e 100644 --- a/src/os/exec/exec_test.go +++ b/src/os/exec/exec_test.go @@ -79,6 +79,13 @@ func helperCommandContext(t *testing.T, ctx context.Context, s ...string) (cmd * } else { cmd = exec.Command(os.Args[0], cs...) } + + // Temporary code to try to resolve #25628. + // TODO(iant): Remove this when we no longer need it. + if runtime.GOARCH == "386" && runtime.GOOS == "linux" && testenv.Builder() != "" && len(s) == 1 && s[0] == "read3" && ctx == nil { + cmd = exec.Command("/usr/bin/strace", append([]string{"-f", os.Args[0]}, cs...)...) + } + cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1") return cmd } -- cgit v1.2.3-54-g00ecf From 78a45d8b4592dbd38057a2e9af83c9cf9d62ddc9 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Thu, 26 Mar 2020 09:13:11 -0700 Subject: runtime: lock mtxpoll in AIX netpollBreak netpollBreak calls netpollwakeup, and netpollwakeup expects the mtxpoll lock to be held, so that it has exclusive access to pendingUpdates. Not acquiring the lock was a mistake in CL 171824. Fortunately it rarely matters in practice. Change-Id: I32962ec2575c846ef3d6a91a4d821b2ff02d983c Reviewed-on: https://go-review.googlesource.com/c/go/+/225618 Reviewed-by: Michael Knyszek --- src/runtime/netpoll_aix.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/runtime/netpoll_aix.go b/src/runtime/netpoll_aix.go index c936fbb70f..61becc247e 100644 --- a/src/runtime/netpoll_aix.go +++ b/src/runtime/netpoll_aix.go @@ -130,7 +130,9 @@ func netpollarm(pd *pollDesc, mode int) { // netpollBreak interrupts a poll. func netpollBreak() { + lock(&mtxpoll) netpollwakeup() + unlock(&mtxpoll) } // netpoll checks for ready network connections. -- cgit v1.2.3-54-g00ecf From 33357270f1e0673641c9eb28498c9c6e2b9bac72 Mon Sep 17 00:00:00 2001 From: Andy Pan Date: Wed, 25 Dec 2019 03:33:14 +0000 Subject: runtime: refine netpollunblock by removing unreachable 'if' condition Change-Id: I58ac10013cadd78618124cb7ff134384d158ea4f GitHub-Last-Rev: 2dfff0d3d3d18ecb196d5357cdfec196424d9e3b GitHub-Pull-Request: golang/go#36276 Reviewed-on: https://go-review.googlesource.com/c/go/+/212557 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/runtime/netpoll.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/netpoll.go b/src/runtime/netpoll.go index 3852598b7e..918c361c2e 100644 --- a/src/runtime/netpoll.go +++ b/src/runtime/netpoll.go @@ -447,7 +447,7 @@ func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g { new = pdReady } if atomic.Casuintptr(gpp, old, new) { - if old == pdReady || old == pdWait { + if old == pdWait { old = 0 } return (*g)(unsafe.Pointer(old)) -- cgit v1.2.3-54-g00ecf From 9ceb1e5f5caca5666f9db50864c45ca1f88da1df Mon Sep 17 00:00:00 2001 From: Michael Matloob Date: Thu, 27 Feb 2020 17:14:07 -0500 Subject: cmd/go: avoid needing to manipulate ImportStack when constructing error Simplify the printing of PackageErrors by pushing and popping packages from the import stack when creating the error, rather than when printing the error. In some cases, we don't have the same amount of information to recreate the exact error, so we'll print the name of the package the error is for, even when it's redundant. In the case of import cycle errors, this change results in the addition of the position information of the error. This change supercedes CLs 220718 and 217106. It introduces a simpler way to format errors. Fixes #36173 Change-Id: Ie27011eb71f82e165ed4f9567bba6890a3849fc1 Reviewed-on: https://go-review.googlesource.com/c/go/+/224660 Run-TryBot: Michael Matloob TryBot-Result: Gobot Gobot Reviewed-by: Bryan C. Mills --- src/cmd/go/go_test.go | 2 +- src/cmd/go/internal/load/pkg.go | 121 ++++++++++++--------- src/cmd/go/internal/load/test.go | 1 - src/cmd/go/testdata/script/mod_empty_err.txt | 2 +- .../go/testdata/script/test_import_error_stack.txt | 3 + src/cmd/go/testdata/script/vet_internal.txt | 14 +-- 6 files changed, 81 insertions(+), 62 deletions(-) diff --git a/src/cmd/go/go_test.go b/src/cmd/go/go_test.go index 39e387b9e4..d446e457b5 100644 --- a/src/cmd/go/go_test.go +++ b/src/cmd/go/go_test.go @@ -2662,7 +2662,7 @@ func TestBadCommandLines(t *testing.T) { tg.tempFile("src/-x/x.go", "package x\n") tg.setenv("GOPATH", tg.path(".")) tg.runFail("build", "--", "-x") - tg.grepStderr("invalid input directory name \"-x\"", "did not reject -x directory") + tg.grepStderr("invalid import path \"-x\"", "did not reject -x import path") tg.tempFile("src/-x/y/y.go", "package y\n") tg.setenv("GOPATH", tg.path(".")) diff --git a/src/cmd/go/internal/load/pkg.go b/src/cmd/go/internal/load/pkg.go index 21dcee1315..6aea54340d 100644 --- a/src/cmd/go/internal/load/pkg.go +++ b/src/cmd/go/internal/load/pkg.go @@ -318,16 +318,16 @@ func (p *Package) copyBuild(pp *build.Package) { // A PackageError describes an error loading information about a package. type PackageError struct { - ImportStack []string // shortest path from package named on command line to this one - Pos string // position of error - Err error // the error itself - IsImportCycle bool // the error is an import cycle - Hard bool // whether the error is soft or hard; soft errors are ignored in some places + ImportStack []string // shortest path from package named on command line to this one + Pos string // position of error + Err error // the error itself + IsImportCycle bool // the error is an import cycle + Hard bool // whether the error is soft or hard; soft errors are ignored in some places + alwaysPrintStack bool // whether to always print the ImportStack } func (p *PackageError) Error() string { - // Import cycles deserve special treatment. - if p.Pos != "" && !p.IsImportCycle { + if p.Pos != "" && (len(p.ImportStack) == 0 || !p.alwaysPrintStack) { // Omit import stack. The full path to the file where the error // is the most important thing. return p.Pos + ": " + p.Err.Error() @@ -339,15 +339,14 @@ func (p *PackageError) Error() string { // last path on the stack, we don't omit the path. An error like // "package A imports B: error loading C caused by B" would not be clearer // if "imports B" were omitted. - stack := p.ImportStack - var ierr ImportPathError - if len(stack) > 0 && errors.As(p.Err, &ierr) && ierr.ImportPath() == stack[len(stack)-1] { - stack = stack[:len(stack)-1] - } - if len(stack) == 0 { + if len(p.ImportStack) == 0 { return p.Err.Error() } - return "package " + strings.Join(stack, "\n\timports ") + ": " + p.Err.Error() + var optpos string + if p.Pos != "" { + optpos = "\n\t" + p.Pos + } + return "package " + strings.Join(p.ImportStack, "\n\timports ") + optpos + ": " + p.Err.Error() } func (p *PackageError) Unwrap() error { return p.Err } @@ -549,9 +548,6 @@ func loadImport(pre *preload, path, srcDir string, parent *Package, stk *ImportS panic("LoadImport called with empty package path") } - stk.Push(path) - defer stk.Pop() - var parentPath, parentRoot string parentIsStd := false if parent != nil { @@ -564,6 +560,11 @@ func loadImport(pre *preload, path, srcDir string, parent *Package, stk *ImportS pre.preloadImports(bp.Imports, bp) } if bp == nil { + if importErr, ok := err.(ImportPathError); !ok || importErr.ImportPath() != path { + // Only add path to the error's import stack if it's not already present on the error. + stk.Push(path) + defer stk.Pop() + } return &Package{ PackagePublic: PackagePublic{ ImportPath: path, @@ -578,7 +579,9 @@ func loadImport(pre *preload, path, srcDir string, parent *Package, stk *ImportS importPath := bp.ImportPath p := packageCache[importPath] if p != nil { + stk.Push(path) p = reusePackage(p, stk) + stk.Pop() } else { p = new(Package) p.Internal.Local = build.IsLocalImport(path) @@ -588,8 +591,11 @@ func loadImport(pre *preload, path, srcDir string, parent *Package, stk *ImportS // Load package. // loadPackageData may return bp != nil even if an error occurs, // in order to return partial information. - p.load(stk, bp, err) - if p.Error != nil && p.Error.Pos == "" { + p.load(path, stk, bp, err) + // Add position information unless this is a NoGoError or an ImportCycle error. + // Import cycles deserve special treatment. + var g *build.NoGoError + if p.Error != nil && p.Error.Pos == "" && !errors.As(err, &g) && !p.Error.IsImportCycle { p = setErrorPos(p, importPos) } @@ -608,7 +614,7 @@ func loadImport(pre *preload, path, srcDir string, parent *Package, stk *ImportS return setErrorPos(perr, importPos) } if mode&ResolveImport != 0 { - if perr := disallowVendor(srcDir, path, p, stk); perr != p { + if perr := disallowVendor(srcDir, path, parentPath, p, stk); perr != p { return setErrorPos(perr, importPos) } } @@ -1246,7 +1252,7 @@ func disallowInternal(srcDir string, importer *Package, importerPath string, p * // as if it were generated into the testing directory tree // (it's actually in a temporary directory outside any Go tree). // This cleans up a former kludge in passing functionality to the testing package. - if strings.HasPrefix(p.ImportPath, "testing/internal") && len(*stk) >= 2 && (*stk)[len(*stk)-2] == "testmain" { + if str.HasPathPrefix(p.ImportPath, "testing/internal") && importerPath == "testmain" { return p } @@ -1262,11 +1268,10 @@ func disallowInternal(srcDir string, importer *Package, importerPath string, p * return p } - // The stack includes p.ImportPath. - // If that's the only thing on the stack, we started + // importerPath is empty: we started // with a name given on the command line, not an // import. Anything listed on the command line is fine. - if len(*stk) == 1 { + if importerPath == "" { return p } @@ -1315,8 +1320,9 @@ func disallowInternal(srcDir string, importer *Package, importerPath string, p * // Internal is present, and srcDir is outside parent's tree. Not allowed. perr := *p perr.Error = &PackageError{ - ImportStack: stk.Copy(), - Err: ImportErrorf(p.ImportPath, "use of internal package "+p.ImportPath+" not allowed"), + alwaysPrintStack: true, + ImportStack: stk.Copy(), + Err: ImportErrorf(p.ImportPath, "use of internal package "+p.ImportPath+" not allowed"), } perr.Incomplete = true return &perr @@ -1344,16 +1350,15 @@ func findInternal(path string) (index int, ok bool) { // disallowVendor checks that srcDir is allowed to import p as path. // If the import is allowed, disallowVendor returns the original package p. // If not, it returns a new package containing just an appropriate error. -func disallowVendor(srcDir string, path string, p *Package, stk *ImportStack) *Package { - // The stack includes p.ImportPath. - // If that's the only thing on the stack, we started +func disallowVendor(srcDir string, path string, importerPath string, p *Package, stk *ImportStack) *Package { + // If the importerPath is empty, we started // with a name given on the command line, not an // import. Anything listed on the command line is fine. - if len(*stk) == 1 { + if importerPath == "" { return p } - if perr := disallowVendorVisibility(srcDir, p, stk); perr != p { + if perr := disallowVendorVisibility(srcDir, p, importerPath, stk); perr != p { return perr } @@ -1376,12 +1381,12 @@ func disallowVendor(srcDir string, path string, p *Package, stk *ImportStack) *P // is not subject to the rules, only subdirectories of vendor. // This allows people to have packages and commands named vendor, // for maximal compatibility with existing source trees. -func disallowVendorVisibility(srcDir string, p *Package, stk *ImportStack) *Package { - // The stack includes p.ImportPath. - // If that's the only thing on the stack, we started +func disallowVendorVisibility(srcDir string, p *Package, importerPath string, stk *ImportStack) *Package { + // The stack does not include p.ImportPath. + // If there's nothing on the stack, we started // with a name given on the command line, not an // import. Anything listed on the command line is fine. - if len(*stk) == 1 { + if importerPath == "" { return p } @@ -1525,7 +1530,8 @@ func (p *Package) DefaultExecName() string { // load populates p using information from bp, err, which should // be the result of calling build.Context.Import. -func (p *Package) load(stk *ImportStack, bp *build.Package, err error) { +// stk contains the import stack, not including path itself. +func (p *Package) load(path string, stk *ImportStack, bp *build.Package, err error) { p.copyBuild(bp) // The localPrefix is the path we interpret ./ imports relative to. @@ -1548,7 +1554,16 @@ func (p *Package) load(stk *ImportStack, bp *build.Package, err error) { if err != nil { p.Incomplete = true + // Report path in error stack unless err is an ImportPathError with path already set. + pushed := false + if e, ok := err.(ImportPathError); !ok || e.ImportPath() != path { + stk.Push(path) + pushed = true // Remember to pop after setError. + } setError(base.ExpandScanner(p.rewordError(err))) + if pushed { + stk.Pop() + } if _, isScanErr := err.(scanner.ErrorList); !isScanErr { return } @@ -1675,6 +1690,23 @@ func (p *Package) load(stk *ImportStack, bp *build.Package, err error) { } } + // Check for case-insensitive collisions of import paths. + fold := str.ToFold(p.ImportPath) + if other := foldPath[fold]; other == "" { + foldPath[fold] = p.ImportPath + } else if other != p.ImportPath { + setError(ImportErrorf(p.ImportPath, "case-insensitive import collision: %q and %q", p.ImportPath, other)) + return + } + + if !SafeArg(p.ImportPath) { + setError(ImportErrorf(p.ImportPath, "invalid import path %q", p.ImportPath)) + return + } + + stk.Push(path) + defer stk.Pop() + // Check for case-insensitive collision of input files. // To avoid problems on case-insensitive files, we reject any package // where two different input files have equal names under a case-insensitive @@ -1703,10 +1735,6 @@ func (p *Package) load(stk *ImportStack, bp *build.Package, err error) { setError(fmt.Errorf("invalid input directory name %q", name)) return } - if !SafeArg(p.ImportPath) { - setError(ImportErrorf(p.ImportPath, "invalid import path %q", p.ImportPath)) - return - } // Build list of imported packages and full dependency list. imports := make([]*Package, 0, len(p.Imports)) @@ -1770,15 +1798,6 @@ func (p *Package) load(stk *ImportStack, bp *build.Package, err error) { return } - // Check for case-insensitive collisions of import paths. - fold := str.ToFold(p.ImportPath) - if other := foldPath[fold]; other == "" { - foldPath[fold] = p.ImportPath - } else if other != p.ImportPath { - setError(ImportErrorf(p.ImportPath, "case-insensitive import collision: %q and %q", p.ImportPath, other)) - return - } - if cfg.ModulesEnabled && p.Error == nil { mainPath := p.ImportPath if p.Internal.CmdlineFiles { @@ -2266,9 +2285,7 @@ func GoFilesPackage(gofiles []string) *Package { pkg := new(Package) pkg.Internal.Local = true pkg.Internal.CmdlineFiles = true - stk.Push("main") - pkg.load(&stk, bp, err) - stk.Pop() + pkg.load("command-line-arguments", &stk, bp, err) pkg.Internal.LocalPrefix = dirToImportPath(dir) pkg.ImportPath = "command-line-arguments" pkg.Target = "" diff --git a/src/cmd/go/internal/load/test.go b/src/cmd/go/internal/load/test.go index 866e0e567f..6465f46f4e 100644 --- a/src/cmd/go/internal/load/test.go +++ b/src/cmd/go/internal/load/test.go @@ -56,7 +56,6 @@ func TestPackagesFor(p *Package, cover *TestCover) (pmain, ptest, pxtest *Packag } if len(p1.DepsErrors) > 0 { perr := p1.DepsErrors[0] - perr.Pos = "" // show full import stack err = perr break } diff --git a/src/cmd/go/testdata/script/mod_empty_err.txt b/src/cmd/go/testdata/script/mod_empty_err.txt index b309f634dd..982e6b2e51 100644 --- a/src/cmd/go/testdata/script/mod_empty_err.txt +++ b/src/cmd/go/testdata/script/mod_empty_err.txt @@ -10,7 +10,7 @@ go list -e -f {{.Error}} ./empty stdout 'no Go files in \$WORK[/\\]empty' go list -e -f {{.Error}} ./exclude -stdout 'package example.com/m/exclude: build constraints exclude all Go files in \$WORK[/\\]exclude' +stdout 'build constraints exclude all Go files in \$WORK[/\\]exclude' go list -e -f {{.Error}} ./missing stdout 'stat '$WORK'[/\\]missing: directory not found' diff --git a/src/cmd/go/testdata/script/test_import_error_stack.txt b/src/cmd/go/testdata/script/test_import_error_stack.txt index 3b796053f7..c66c1213a4 100644 --- a/src/cmd/go/testdata/script/test_import_error_stack.txt +++ b/src/cmd/go/testdata/script/test_import_error_stack.txt @@ -1,6 +1,9 @@ ! go test testdep/p1 stderr 'package testdep/p1 \(test\)\n\timports testdep/p2\n\timports testdep/p3: build constraints exclude all Go files ' # check for full import stack +! go vet testdep/p1 +stderr 'package testdep/p1 \(test\)\n\timports testdep/p2\n\timports testdep/p3: build constraints exclude all Go files ' # check for full import stack + -- testdep/p1/p1.go -- package p1 -- testdep/p1/p1_test.go -- diff --git a/src/cmd/go/testdata/script/vet_internal.txt b/src/cmd/go/testdata/script/vet_internal.txt index 46e1ac7398..85f709302c 100644 --- a/src/cmd/go/testdata/script/vet_internal.txt +++ b/src/cmd/go/testdata/script/vet_internal.txt @@ -3,28 +3,28 @@ env GO111MODULE=off # Issue 36173. Verify that "go vet" prints line numbers on load errors. ! go vet a/a.go -stderr '^a[/\\]a.go:5:3: use of internal package' +stderr '^package command-line-arguments\n\ta[/\\]a.go:5:3: use of internal package' ! go vet a/a_test.go -stderr '^package command-line-arguments \(test\): use of internal package' # BUG +stderr '^package command-line-arguments \(test\)\n\ta[/\\]a_test.go:4:3: use of internal package' ! go vet a -stderr '^a[/\\]a.go:5:3: use of internal package' +stderr '^package a\n\ta[/\\]a.go:5:3: use of internal package' go vet b/b.go ! stderr 'use of internal package' ! go vet b/b_test.go -stderr '^package command-line-arguments \(test\): use of internal package' # BUG +stderr '^package command-line-arguments \(test\)\n\tb[/\\]b_test.go:4:3: use of internal package' ! go vet depends-on-a/depends-on-a.go -stderr '^a[/\\]a.go:5:3: use of internal package' +stderr '^package command-line-arguments\n\timports a\n\ta[/\\]a.go:5:3: use of internal package' ! go vet depends-on-a/depends-on-a_test.go -stderr '^package command-line-arguments \(test\)\n\timports a: use of internal package a/x/internal/y not allowed$' # BUG +stderr '^package command-line-arguments \(test\)\n\timports a\n\ta[/\\]a.go:5:3: use of internal package a/x/internal/y not allowed' ! go vet depends-on-a -stderr '^a[/\\]a.go:5:3: use of internal package' +stderr '^package depends-on-a\n\timports a\n\ta[/\\]a.go:5:3: use of internal package' -- a/a.go -- // A package with bad imports in both src and test -- cgit v1.2.3-54-g00ecf From 9131f08a23bd5923d135df15da30b322748ffa12 Mon Sep 17 00:00:00 2001 From: Bradford Lamson-Scribner Date: Tue, 10 Mar 2020 21:26:42 -0600 Subject: cmd/compile: add dark mode functionality to CFGs in the ssa.html output add dark mode to CFGs in the ssa.html output by targeting individual parts of each svg and applying dark mode styles to the stroke & fill. Fixes #37767 Change-Id: Ic867e161c6837c26d9d735ea02bc94fdb56102f6 Reviewed-on: https://go-review.googlesource.com/c/go/+/222877 Reviewed-by: Josh Bleecher Snyder Run-TryBot: Josh Bleecher Snyder TryBot-Result: Gobot Gobot --- src/cmd/compile/internal/ssa/html.go | 37 +++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/src/cmd/compile/internal/ssa/html.go b/src/cmd/compile/internal/ssa/html.go index 54fa54a477..1eed224934 100644 --- a/src/cmd/compile/internal/ssa/html.go +++ b/src/cmd/compile/internal/ssa/html.go @@ -263,6 +263,14 @@ body.darkmode table, th { border: 1px solid gray; } +body.darkmode text { + fill: white; +} + +body.darkmode svg polygon:first-child { + fill: rgb(21, 21, 21); +} + .highlight-aquamarine { background-color: aquamarine; color: black; } .highlight-coral { background-color: coral; color: black; } .highlight-lightpink { background-color: lightpink; color: black; } @@ -304,7 +312,7 @@ body.darkmode table, th { color: gray; } -.outline-blue { outline: blue solid 2px; } +.outline-blue { outline: #2893ff solid 2px; } .outline-red { outline: red solid 2px; } .outline-blueviolet { outline: blueviolet solid 2px; } .outline-darkolivegreen { outline: darkolivegreen solid 2px; } @@ -316,7 +324,7 @@ body.darkmode table, th { .outline-maroon { outline: maroon solid 2px; } .outline-black { outline: black solid 2px; } -ellipse.outline-blue { stroke-width: 2px; stroke: blue; } +ellipse.outline-blue { stroke-width: 2px; stroke: #2893ff; } ellipse.outline-red { stroke-width: 2px; stroke: red; } ellipse.outline-blueviolet { stroke-width: 2px; stroke: blueviolet; } ellipse.outline-darkolivegreen { stroke-width: 2px; stroke: darkolivegreen; } @@ -642,12 +650,35 @@ function makeDraggable(event) { function toggleDarkMode() { document.body.classList.toggle('darkmode'); + // Collect all of the "collapsed" elements and apply dark mode on each collapsed column const collapsedEls = document.getElementsByClassName('collapsed'); const len = collapsedEls.length; for (let i = 0; i < len; i++) { collapsedEls[i].classList.toggle('darkmode'); } + + // Collect and spread the appropriate elements from all of the svgs on the page into one array + const svgParts = [ + ...document.querySelectorAll('path'), + ...document.querySelectorAll('ellipse'), + ...document.querySelectorAll('polygon'), + ]; + + // Iterate over the svgParts specifically looking for white and black fill/stroke to be toggled. + // The verbose conditional is intentional here so that we do not mutate any svg path, ellipse, or polygon that is of any color other than white or black. + svgParts.forEach(el => { + if (el.attributes.stroke.value === 'white') { + el.attributes.stroke.value = 'black'; + } else if (el.attributes.stroke.value === 'black') { + el.attributes.stroke.value = 'white'; + } + if (el.attributes.fill.value === 'white') { + el.attributes.fill.value = 'black'; + } else if (el.attributes.fill.value === 'black') { + el.attributes.fill.value = 'white'; + } + }); } @@ -1016,7 +1047,7 @@ func (d *dotWriter) writeFuncSVG(w io.Writer, phase string, f *Func) { arrow = "dotvee" layoutDrawn[s.b.ID] = true } else if isBackEdge(b.ID, s.b.ID) { - color = "blue" + color = "#2893ff" } fmt.Fprintf(pipe, `%v -> %v [label=" %d ",style="%s",color="%s",arrowhead="%s"];`, b, s.b, i, style, color, arrow) } -- cgit v1.2.3-54-g00ecf From ef220dc53ed204386b30879ff1882b70a7fd602b Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Sat, 28 Mar 2020 00:21:57 +0000 Subject: Revert "runtime: lock mtxpoll in AIX netpollBreak" This reverts CL 225618. This is causing TestNetpollBreak to fail on AIX more often than not. Change-Id: Ia3c24041ead4b320202f7f5b17a6b286f639a689 Reviewed-on: https://go-review.googlesource.com/c/go/+/226198 Run-TryBot: Ian Lance Taylor Reviewed-by: Bryan C. Mills TryBot-Result: Gobot Gobot --- src/runtime/netpoll_aix.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/runtime/netpoll_aix.go b/src/runtime/netpoll_aix.go index 61becc247e..c936fbb70f 100644 --- a/src/runtime/netpoll_aix.go +++ b/src/runtime/netpoll_aix.go @@ -130,9 +130,7 @@ func netpollarm(pd *pollDesc, mode int) { // netpollBreak interrupts a poll. func netpollBreak() { - lock(&mtxpoll) netpollwakeup() - unlock(&mtxpoll) } // netpoll checks for ready network connections. -- cgit v1.2.3-54-g00ecf From d99fe1f40dfacfdebee22c13ed4471fd50f2cc1a Mon Sep 17 00:00:00 2001 From: PetarDambovaliev Date: Sat, 28 Mar 2020 08:00:17 +0000 Subject: time: remove some unnecessary/duplicated global slices Removes two variables: - days which is unused, and similar usage provided by longDayNames - months in favour of using longMonthNames Fixes #36359 Change-Id: I51b6b7408db9359c658462ba73e59ed432f655a6 GitHub-Last-Rev: 778d3ea157d363fcb5bced6d318381b44a1cac50 GitHub-Pull-Request: golang/go#36372 Reviewed-on: https://go-review.googlesource.com/c/go/+/213177 Reviewed-by: Emmanuel Odeke Run-TryBot: Emmanuel Odeke TryBot-Result: Gobot Gobot --- src/time/time.go | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/src/time/time.go b/src/time/time.go index 3f632dbc3e..3d242f2541 100644 --- a/src/time/time.go +++ b/src/time/time.go @@ -287,25 +287,10 @@ const ( December ) -var months = [...]string{ - "January", - "February", - "March", - "April", - "May", - "June", - "July", - "August", - "September", - "October", - "November", - "December", -} - // String returns the English name of the month ("January", "February", ...). func (m Month) String() string { if January <= m && m <= December { - return months[m-1] + return longMonthNames[m-1] } buf := make([]byte, 20) n := fmtInt(buf, uint64(m)) @@ -325,20 +310,10 @@ const ( Saturday ) -var days = [...]string{ - "Sunday", - "Monday", - "Tuesday", - "Wednesday", - "Thursday", - "Friday", - "Saturday", -} - // String returns the English name of the day ("Sunday", "Monday", ...). func (d Weekday) String() string { if Sunday <= d && d <= Saturday { - return days[d] + return longDayNames[d] } buf := make([]byte, 20) n := fmtInt(buf, uint64(d)) -- cgit v1.2.3-54-g00ecf From 45f99d85e0d22a4414ebbdc41de843d88064f374 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Fri, 27 Mar 2020 17:37:37 -0700 Subject: runtime: avoid racing on pendingUpdates in AIX netpollBreak Instead of calling netpollwakeup, just do the write in netpollBreak. Use the same signaling we now use in other netpollBreak instances. Change-Id: I53a65c22862ecc8484aee91d0e1ffb21a9e62d8c Reviewed-on: https://go-review.googlesource.com/c/go/+/226199 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Bryan C. Mills --- src/runtime/netpoll_aix.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/runtime/netpoll_aix.go b/src/runtime/netpoll_aix.go index c936fbb70f..3c1f70874d 100644 --- a/src/runtime/netpoll_aix.go +++ b/src/runtime/netpoll_aix.go @@ -4,7 +4,10 @@ package runtime -import "unsafe" +import ( + "runtime/internal/atomic" + "unsafe" +) // This is based on the former libgo/runtime/netpoll_select.c implementation // except that it uses poll instead of select and is written in Go. @@ -41,6 +44,8 @@ var ( rdwake int32 wrwake int32 pendingUpdates int32 + + netpollWakeSig uintptr // used to avoid duplicate calls of netpollBreak ) func netpollinit() { @@ -130,7 +135,10 @@ func netpollarm(pd *pollDesc, mode int) { // netpollBreak interrupts a poll. func netpollBreak() { - netpollwakeup() + if atomic.Casuintptr(&netpollWakeSig, 0, 1) { + b := [1]byte{0} + write(uintptr(wrwake), unsafe.Pointer(&b[0]), 1) + } } // netpoll checks for ready network connections. @@ -184,6 +192,7 @@ retry: var b [1]byte for read(rdwake, unsafe.Pointer(&b[0]), 1) == 1 { } + atomic.Storeuintptr(&netpollWakeSig, 0) } // Still look at the other fds even if the mode may have // changed, as netpollBreak might have been called. -- cgit v1.2.3-54-g00ecf From 2ba00e47545406b3dd11436e3f1acf841d4932c6 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Sat, 21 Mar 2020 13:22:14 +0100 Subject: doc: decrease prominence of GOROOT_BOOTSTRAP Go build scripts on UNIX (make.bash, all.bash) have not required GOROOT_BOOTSTRAP since August 2017 (CL 57753). Windows build scripts have followed suit since CL 96455. Most people building Go will have a Go toolchain in their PATH and will not need to specify a different toolchain. This CL removes the GOROOT_BOOTSTRAP mention from the contribution guide (it was there for Windows only, but it's not required anymore). The guide is meant to be light and clear for beginners and is not supposed to be a reference, so there's not need to keep mentioning GOROOT_BOOTSTRAP. Also update install-source.html to reflect the current status quo, where using the PATH is probably the first and most used default, and GOROOT_BOOTSTRAP is just an option. Change-Id: Iab453e61b0c749c256aaaf81ea9b2ae58822cb89 Reviewed-on: https://go-review.googlesource.com/c/go/+/224717 Run-TryBot: Giovanni Bajo TryBot-Result: Gobot Gobot Reviewed-by: Rob Pike --- doc/contribute.html | 4 +--- doc/install-source.html | 22 ++++++++-------------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/doc/contribute.html b/doc/contribute.html index 551d510288..4135d13652 100644 --- a/doc/contribute.html +++ b/doc/contribute.html @@ -552,9 +552,7 @@ $ ./all.bash

-(To build under Windows use all.bat; this also requires -setting the environment variable GOROOT_BOOTSTRAP to the -directory holding the Go tree for the bootstrap compiler.) +(To build under Windows use all.bat)

diff --git a/doc/install-source.html b/doc/install-source.html index 17b1c9cbb7..3d42a10ad6 100644 --- a/doc/install-source.html +++ b/doc/install-source.html @@ -106,23 +106,17 @@ Go does not support CentOS 6 on these systems. -

Install Go compiler binaries

+

Install Go compiler binaries for bootstrap

The Go toolchain is written in Go. To build it, you need a Go compiler installed. -The scripts that do the initial build of the tools look for an existing Go tool -chain in $GOROOT_BOOTSTRAP. -If unset, the default value of GOROOT_BOOTSTRAP -is $HOME/go1.4. -

- -

-There are many options for the bootstrap toolchain. -After obtaining one, set GOROOT_BOOTSTRAP to the -directory containing the unpacked tree. -For example, $GOROOT_BOOTSTRAP/bin/go should be -the go command binary for the bootstrap toolchain. -

+The scripts that do the initial build of the tools look for a "go" command +in $PATH, so as long as you have Go installed in your +system and configured in your $PATH, you are ready to build Go +from source. +Or if you prefer you can set $GOROOT_BOOTSTRAP to the +root of a Go installation to use to build the new Go toolchain; +$GOROOT_BOOTSTRAP/bin/go should be the go command to use.

Bootstrap toolchain from binary release

-- cgit v1.2.3-54-g00ecf From 5aef51a729f428bfd4b2c28fd2ba7950660608e0 Mon Sep 17 00:00:00 2001 From: Daniel Theophanes Date: Wed, 18 Mar 2020 10:03:51 -0700 Subject: database/sql: add test for Conn.Validator interface This addresses comments made by Russ after https://golang.org/cl/174122 was merged. It addes a test for the connection validator and renames the interface to just "Validator". Change-Id: Iea53e9b250c9be2e86e9b75906e7353e26437c5c Reviewed-on: https://go-review.googlesource.com/c/go/+/223963 Reviewed-by: Emmanuel Odeke --- src/database/sql/driver/driver.go | 8 ++++---- src/database/sql/fakedb_test.go | 4 ++-- src/database/sql/sql.go | 4 ++-- src/database/sql/sql_test.go | 31 +++++++++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/database/sql/driver/driver.go b/src/database/sql/driver/driver.go index a2b844d71f..76f1bd3aa1 100644 --- a/src/database/sql/driver/driver.go +++ b/src/database/sql/driver/driver.go @@ -261,15 +261,15 @@ type SessionResetter interface { ResetSession(ctx context.Context) error } -// ConnectionValidator may be implemented by Conn to allow drivers to +// Validator may be implemented by Conn to allow drivers to // signal if a connection is valid or if it should be discarded. // // If implemented, drivers may return the underlying error from queries, // even if the connection should be discarded by the connection pool. -type ConnectionValidator interface { - // ValidConnection is called prior to placing the connection into the +type Validator interface { + // IsValid is called prior to placing the connection into the // connection pool. The connection will be discarded if false is returned. - ValidConnection() bool + IsValid() bool } // Result is the result of a query execution. diff --git a/src/database/sql/fakedb_test.go b/src/database/sql/fakedb_test.go index 73dab101b7..b6e9a5707e 100644 --- a/src/database/sql/fakedb_test.go +++ b/src/database/sql/fakedb_test.go @@ -396,9 +396,9 @@ func (c *fakeConn) ResetSession(ctx context.Context) error { return nil } -var _ driver.ConnectionValidator = (*fakeConn)(nil) +var _ driver.Validator = (*fakeConn)(nil) -func (c *fakeConn) ValidConnection() bool { +func (c *fakeConn) IsValid() bool { return !c.isBad() } diff --git a/src/database/sql/sql.go b/src/database/sql/sql.go index 95906b1318..4093ffe1bb 100644 --- a/src/database/sql/sql.go +++ b/src/database/sql/sql.go @@ -512,8 +512,8 @@ func (dc *driverConn) validateConnection(needsReset bool) bool { if needsReset { dc.needReset = true } - if cv, ok := dc.ci.(driver.ConnectionValidator); ok { - return cv.ValidConnection() + if cv, ok := dc.ci.(driver.Validator); ok { + return cv.IsValid() } return true } diff --git a/src/database/sql/sql_test.go b/src/database/sql/sql_test.go index 0fc994d0a1..f08eba93b3 100644 --- a/src/database/sql/sql_test.go +++ b/src/database/sql/sql_test.go @@ -1543,6 +1543,37 @@ func TestConnTx(t *testing.T) { } } +// TestConnIsValid verifies that a database connection that should be discarded, +// is actually discarded and does not re-enter the connection pool. +// If the IsValid method from *fakeConn is removed, this test will fail. +func TestConnIsValid(t *testing.T) { + db := newTestDB(t, "people") + defer closeDB(t, db) + + db.SetMaxOpenConns(1) + + ctx := context.Background() + + c, err := db.Conn(ctx) + if err != nil { + t.Fatal(err) + } + + err = c.Raw(func(raw interface{}) error { + dc := raw.(*fakeConn) + dc.stickyBad = true + return nil + }) + if err != nil { + t.Fatal(err) + } + c.Close() + + if len(db.freeConn) > 0 && db.freeConn[0].ci.(*fakeConn).stickyBad { + t.Fatal("bad connection returned to pool; expected bad connection to be discarded") + } +} + // Tests fix for issue 2542, that we release a lock when querying on // a closed connection. func TestIssue2542Deadlock(t *testing.T) { -- cgit v1.2.3-54-g00ecf From 82047a080f0aa320e316773fe8bcbb7c7bcd5a1f Mon Sep 17 00:00:00 2001 From: alex-semenyuk Date: Sun, 29 Mar 2020 08:12:06 +0000 Subject: test, test/fixedbugs, crypto/x509, go/internal/gccgoimporter: fix typos Change-Id: Ie2d605ca8cc3bde2e26c6865642ff4e6412cd075 GitHub-Last-Rev: ce5c3ba369b2ef476e7c63e4404baa256584f357 GitHub-Pull-Request: golang/go#38137 Reviewed-on: https://go-review.googlesource.com/c/go/+/226201 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/crypto/x509/x509_test.go | 2 +- src/go/internal/gccgoimporter/parser.go | 2 +- test/blank1.go | 2 +- test/chanlinear.go | 2 +- test/fixedbugs/issue9521.go | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/crypto/x509/x509_test.go b/src/crypto/x509/x509_test.go index 0c6747d28d..c2f110e87b 100644 --- a/src/crypto/x509/x509_test.go +++ b/src/crypto/x509/x509_test.go @@ -1806,7 +1806,7 @@ func TestMD5(t *testing.T) { } } -// certMissingRSANULL contains an RSA public key where the AlgorithmIdentifer +// certMissingRSANULL contains an RSA public key where the AlgorithmIdentifier // parameters are omitted rather than being an ASN.1 NULL. const certMissingRSANULL = ` -----BEGIN CERTIFICATE----- diff --git a/src/go/internal/gccgoimporter/parser.go b/src/go/internal/gccgoimporter/parser.go index 9204b004f9..e2ef33f7ae 100644 --- a/src/go/internal/gccgoimporter/parser.go +++ b/src/go/internal/gccgoimporter/parser.go @@ -326,7 +326,7 @@ func (p *parser) parseConstValue(pkg *types.Package) (val constant.Value, typ ty if p.tok == '$' { p.next() if p.tok != scanner.Ident { - p.errorf("expected identifer after '$', got %s (%q)", scanner.TokenString(p.tok), p.lit) + p.errorf("expected identifier after '$', got %s (%q)", scanner.TokenString(p.tok), p.lit) } } diff --git a/test/blank1.go b/test/blank1.go index 1a9f012464..c9a8e6a290 100644 --- a/test/blank1.go +++ b/test/blank1.go @@ -4,7 +4,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Test that incorrect uses of the blank identifer are caught. +// Test that incorrect uses of the blank identifier are caught. // Does not compile. package _ // ERROR "invalid package name" diff --git a/test/chanlinear.go b/test/chanlinear.go index 55fee4ab9b..4d55586dc8 100644 --- a/test/chanlinear.go +++ b/test/chanlinear.go @@ -5,7 +5,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Test that dequeueing from a pending channel doesn't +// Test that dequeuing from a pending channel doesn't // take linear time. package main diff --git a/test/fixedbugs/issue9521.go b/test/fixedbugs/issue9521.go index 4e4a55f1e1..a33f0483f3 100644 --- a/test/fixedbugs/issue9521.go +++ b/test/fixedbugs/issue9521.go @@ -4,7 +4,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Test that an incorrect use of the blank identifer is caught. +// Test that an incorrect use of the blank identifier is caught. // Does not compile. package main -- cgit v1.2.3-54-g00ecf From 534f56b4b2b7e368e27d87af2d7721c4ffde37ba Mon Sep 17 00:00:00 2001 From: Michał Łowicki Date: Sun, 29 Mar 2020 17:59:08 +0100 Subject: doc: fix path to make.bash Change-Id: I78c7197b8b93590470a782b492bba177a14d80ec Reviewed-on: https://go-review.googlesource.com/c/go/+/226340 Reviewed-by: Ian Lance Taylor --- doc/contribute.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/contribute.html b/doc/contribute.html index 4135d13652..5fefac6bba 100644 --- a/doc/contribute.html +++ b/doc/contribute.html @@ -1006,7 +1006,7 @@ followed by run.bash.
  • In this section, we'll call the directory into which you cloned the Go repository $GODIR. -The go tool built by $GODIR/make.bash will be installed +The go tool built by $GODIR/src/make.bash will be installed in $GODIR/bin/go and you can invoke it to test your code. For instance, if you -- cgit v1.2.3-54-g00ecf From 7bfac4c3ddde3dd906b344f141a9d09a5f855c77 Mon Sep 17 00:00:00 2001 From: Dmitri Shuralyov Date: Sat, 28 Mar 2020 23:25:18 -0400 Subject: net/http: use DOMException.message property in error text Previously, details about the underlying fetch error were not visible in the net/http error text: net/http: fetch() failed: When using the message property, they are: net/http: fetch() failed: Failed to fetch net/http: fetch() failed: The user aborted a request. Reference: https://developer.mozilla.org/en-US/docs/Web/API/DOMException/message. Change-Id: Iecf7c6bac01abb164731a4d5c9af6582c250a1a0 Reviewed-on: https://go-review.googlesource.com/c/go/+/226205 Reviewed-by: Johan Brandhorst --- src/net/http/roundtrip_js.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/net/http/roundtrip_js.go b/src/net/http/roundtrip_js.go index 4dd99651a7..e14f3f7152 100644 --- a/src/net/http/roundtrip_js.go +++ b/src/net/http/roundtrip_js.go @@ -157,7 +157,7 @@ func (t *Transport) RoundTrip(req *Request) (*Response, error) { }) defer success.Release() failure := js.FuncOf(func(this js.Value, args []js.Value) interface{} { - err := fmt.Errorf("net/http: fetch() failed: %s", args[0].String()) + err := fmt.Errorf("net/http: fetch() failed: %s", args[0].Get("message").String()) select { case errCh <- err: case <-req.Context().Done(): -- cgit v1.2.3-54-g00ecf From 0b7c202e98949b530f7f4011efd454164356ba69 Mon Sep 17 00:00:00 2001 From: Alexander Greim Date: Sun, 29 Mar 2020 17:10:44 +0200 Subject: strings: make variable/type association consistent in function signatures The type annotation of some trim functions are inconsistent with all other function signatures of the strings package. Example: func TrimRight(s string, cutset string) string To be: func TrimRight(s, cutset string) string Change-Id: I456a33287bfb4ad6a7962e30a6424f209ac320c1 Reviewed-on: https://go-review.googlesource.com/c/go/+/226339 Run-TryBot: Emmanuel Odeke TryBot-Result: Gobot Gobot Reviewed-by: Emmanuel Odeke Reviewed-by: Ian Lance Taylor --- src/strings/strings.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/strings/strings.go b/src/strings/strings.go index 6d78b9ef16..314e2276d4 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -828,7 +828,7 @@ func makeCutsetFunc(cutset string) func(rune) bool { // Trim returns a slice of the string s with all leading and // trailing Unicode code points contained in cutset removed. -func Trim(s string, cutset string) string { +func Trim(s, cutset string) string { if s == "" || cutset == "" { return s } @@ -839,7 +839,7 @@ func Trim(s string, cutset string) string { // Unicode code points contained in cutset removed. // // To remove a prefix, use TrimPrefix instead. -func TrimLeft(s string, cutset string) string { +func TrimLeft(s, cutset string) string { if s == "" || cutset == "" { return s } @@ -850,7 +850,7 @@ func TrimLeft(s string, cutset string) string { // Unicode code points contained in cutset removed. // // To remove a suffix, use TrimSuffix instead. -func TrimRight(s string, cutset string) string { +func TrimRight(s, cutset string) string { if s == "" || cutset == "" { return s } -- cgit v1.2.3-54-g00ecf From 89e13c88e4f9f3a3eea7bf105e5af475727a4c33 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Sat, 28 Mar 2020 16:11:15 +0000 Subject: runtime: check the correct sanity condition in the page allocator Currently there are a few sanity checks in the page allocator which should fail immediately but because it's a check for a negative number on a uint, it's actually dead-code. If there's a bug in the page allocator which would cause the sanity check to fail, this could cause memory corruption by returning an invalid address (more precisely, one might either see a segfault, or span overlap). This change fixes these sanity checks to check the correct condition. Fixes #38130. Change-Id: Ia19786cece783d39f26df24dec8788833a6a3f21 Reviewed-on: https://go-review.googlesource.com/c/go/+/226297 Reviewed-by: Giovanni Bajo Reviewed-by: Cherry Zhang --- src/runtime/mpagealloc.go | 4 ++-- src/runtime/mpagecache.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go index bb751f1f8e..60926fbebf 100644 --- a/src/runtime/mpagealloc.go +++ b/src/runtime/mpagealloc.go @@ -724,7 +724,7 @@ nextLevel: // is what the final level represents. ci := chunkIdx(i) j, searchIdx := s.chunkOf(ci).find(npages, 0) - if j < 0 { + if j == ^uint(0) { // We couldn't find any space in this chunk despite the summaries telling // us it should be there. There's likely a bug, so dump some state and throw. sum := s.summary[len(s.summary)-1][i] @@ -766,7 +766,7 @@ func (s *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) { i := chunkIndex(s.searchAddr) if max := s.summary[len(s.summary)-1][i].max(); max >= uint(npages) { j, searchIdx := s.chunkOf(i).find(npages, chunkPageIndex(s.searchAddr)) - if j < 0 { + if j == ^uint(0) { print("runtime: max = ", max, ", npages = ", npages, "\n") print("runtime: searchIdx = ", chunkPageIndex(s.searchAddr), ", s.searchAddr = ", hex(s.searchAddr), "\n") throw("bad summary data") diff --git a/src/runtime/mpagecache.go b/src/runtime/mpagecache.go index 9fc338bd8e..5b679d357d 100644 --- a/src/runtime/mpagecache.go +++ b/src/runtime/mpagecache.go @@ -115,7 +115,7 @@ func (s *pageAlloc) allocToCache() pageCache { // Fast path: there's free pages at or near the searchAddr address. chunk := s.chunkOf(ci) j, _ := chunk.find(1, chunkPageIndex(s.searchAddr)) - if j < 0 { + if j == ^uint(0) { throw("bad summary data") } c = pageCache{ -- cgit v1.2.3-54-g00ecf From 5a312288799c0a433e2061550ff92689b627e080 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Wed, 25 Mar 2020 13:47:43 -0400 Subject: cmd/objdump: add support for -gnu option on Go objdump This adds support for the -gnu option on Go objdump. When this option is used, then output will include gnu assembly in comments alongside the Go assembly. The objdump test was updated to test this new option. This option is supported for the arches found in golang.org/x that provide the GNUsyntax function. Updates #34372 Change-Id: I9e60e1691526607dda3c857c4564dcef408b8391 Reviewed-on: https://go-review.googlesource.com/c/go/+/225459 Run-TryBot: Lynn Boger TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- src/cmd/internal/objfile/disasm.go | 42 +++++++++++++++++---------- src/cmd/objdump/main.go | 9 +++--- src/cmd/objdump/objdump_test.go | 59 ++++++++++++++++++++++++++++++++++---- src/cmd/pprof/pprof.go | 2 +- 4 files changed, 87 insertions(+), 25 deletions(-) diff --git a/src/cmd/internal/objfile/disasm.go b/src/cmd/internal/objfile/disasm.go index 35cfd35d37..b5f1cd1632 100644 --- a/src/cmd/internal/objfile/disasm.go +++ b/src/cmd/internal/objfile/disasm.go @@ -187,7 +187,7 @@ func (fc *FileCache) Line(filename string, line int) ([]byte, error) { // If filter is non-nil, the disassembly only includes functions with names matching filter. // If printCode is true, the disassembly includs corresponding source lines. // The disassembly only includes functions that overlap the range [start, end). -func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64, printCode bool) { +func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64, printCode bool, gnuAsm bool) { if start < d.textStart { start = d.textStart } @@ -229,7 +229,7 @@ func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64, pr var lastFile string var lastLine int - d.Decode(symStart, symEnd, relocs, func(pc, size uint64, file string, line int, text string) { + d.Decode(symStart, symEnd, relocs, gnuAsm, func(pc, size uint64, file string, line int, text string) { i := pc - d.textStart if printCode { @@ -266,7 +266,7 @@ func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64, pr } // Decode disassembles the text segment range [start, end), calling f for each instruction. -func (d *Disasm) Decode(start, end uint64, relocs []Reloc, f func(pc, size uint64, file string, line int, text string)) { +func (d *Disasm) Decode(start, end uint64, relocs []Reloc, gnuAsm bool, f func(pc, size uint64, file string, line int, text string)) { if start < d.textStart { start = d.textStart } @@ -277,7 +277,7 @@ func (d *Disasm) Decode(start, end uint64, relocs []Reloc, f func(pc, size uint6 lookup := d.lookup for pc := start; pc < end; { i := pc - d.textStart - text, size := d.disasm(code[i:], pc, lookup, d.byteOrder) + text, size := d.disasm(code[i:], pc, lookup, d.byteOrder, gnuAsm) file, line, _ := d.pcln.PCToLine(pc) sep := "\t" for len(relocs) > 0 && relocs[0].Addr < i+uint64(size) { @@ -291,17 +291,17 @@ func (d *Disasm) Decode(start, end uint64, relocs []Reloc, f func(pc, size uint6 } type lookupFunc = func(addr uint64) (sym string, base uint64) -type disasmFunc func(code []byte, pc uint64, lookup lookupFunc, ord binary.ByteOrder) (text string, size int) +type disasmFunc func(code []byte, pc uint64, lookup lookupFunc, ord binary.ByteOrder, _ bool) (text string, size int) -func disasm_386(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder) (string, int) { - return disasm_x86(code, pc, lookup, 32) +func disasm_386(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder, gnuAsm bool) (string, int) { + return disasm_x86(code, pc, lookup, 32, gnuAsm) } -func disasm_amd64(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder) (string, int) { - return disasm_x86(code, pc, lookup, 64) +func disasm_amd64(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder, gnuAsm bool) (string, int) { + return disasm_x86(code, pc, lookup, 64, gnuAsm) } -func disasm_x86(code []byte, pc uint64, lookup lookupFunc, arch int) (string, int) { +func disasm_x86(code []byte, pc uint64, lookup lookupFunc, arch int, gnuAsm bool) (string, int) { inst, err := x86asm.Decode(code, arch) var text string size := inst.Len @@ -309,7 +309,11 @@ func disasm_x86(code []byte, pc uint64, lookup lookupFunc, arch int) (string, in size = 1 text = "?" } else { - text = x86asm.GoSyntax(inst, pc, lookup) + if gnuAsm { + text = fmt.Sprintf("%-36s // %s", x86asm.GoSyntax(inst, pc, lookup), x86asm.GNUSyntax(inst, pc, nil)) + } else { + text = x86asm.GoSyntax(inst, pc, lookup) + } } return text, size } @@ -334,31 +338,35 @@ func (r textReader) ReadAt(data []byte, off int64) (n int, err error) { return } -func disasm_arm(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder) (string, int) { +func disasm_arm(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder, gnuAsm bool) (string, int) { inst, err := armasm.Decode(code, armasm.ModeARM) var text string size := inst.Len if err != nil || size == 0 || inst.Op == 0 { size = 4 text = "?" + } else if gnuAsm { + text = fmt.Sprintf("%-36s // %s", armasm.GoSyntax(inst, pc, lookup, textReader{code, pc}), armasm.GNUSyntax(inst)) } else { text = armasm.GoSyntax(inst, pc, lookup, textReader{code, pc}) } return text, size } -func disasm_arm64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder) (string, int) { +func disasm_arm64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) { inst, err := arm64asm.Decode(code) var text string if err != nil || inst.Op == 0 { text = "?" + } else if gnuAsm { + text = fmt.Sprintf("%-36s // %s", arm64asm.GoSyntax(inst, pc, lookup, textReader{code, pc}), arm64asm.GNUSyntax(inst)) } else { text = arm64asm.GoSyntax(inst, pc, lookup, textReader{code, pc}) } return text, 4 } -func disasm_ppc64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder) (string, int) { +func disasm_ppc64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) { inst, err := ppc64asm.Decode(code, byteOrder) var text string size := inst.Len @@ -366,7 +374,11 @@ func disasm_ppc64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.By size = 4 text = "?" } else { - text = ppc64asm.GoSyntax(inst, pc, lookup) + if gnuAsm { + text = fmt.Sprintf("%-36s // %s", ppc64asm.GoSyntax(inst, pc, lookup), ppc64asm.GNUSyntax(inst, pc)) + } else { + text = ppc64asm.GoSyntax(inst, pc, lookup) + } } return text, size } diff --git a/src/cmd/objdump/main.go b/src/cmd/objdump/main.go index 6a60697ebd..6605f8a60c 100644 --- a/src/cmd/objdump/main.go +++ b/src/cmd/objdump/main.go @@ -43,12 +43,13 @@ import ( "cmd/internal/objfile" ) -var printCode = flag.Bool("S", false, "print go code alongside assembly") +var printCode = flag.Bool("S", false, "print Go code alongside assembly") var symregexp = flag.String("s", "", "only dump symbols matching this regexp") +var gnuAsm = flag.Bool("gnu", false, "print GNU assembly next to Go assembly (where supported)") var symRE *regexp.Regexp func usage() { - fmt.Fprintf(os.Stderr, "usage: go tool objdump [-S] [-s symregexp] binary [start end]\n\n") + fmt.Fprintf(os.Stderr, "usage: go tool objdump [-S] [-gnu] [-s symregexp] binary [start end]\n\n") flag.PrintDefaults() os.Exit(2) } @@ -87,7 +88,7 @@ func main() { usage() case 1: // disassembly of entire object - dis.Print(os.Stdout, symRE, 0, ^uint64(0), *printCode) + dis.Print(os.Stdout, symRE, 0, ^uint64(0), *printCode, *gnuAsm) case 3: // disassembly of PC range @@ -99,6 +100,6 @@ func main() { if err != nil { log.Fatalf("invalid end PC: %v", err) } - dis.Print(os.Stdout, symRE, start, end, *printCode) + dis.Print(os.Stdout, symRE, start, end, *printCode, *gnuAsm) } } diff --git a/src/cmd/objdump/objdump_test.go b/src/cmd/objdump/objdump_test.go index 7ed32cf3c2..5030ec65d2 100644 --- a/src/cmd/objdump/objdump_test.go +++ b/src/cmd/objdump/objdump_test.go @@ -64,18 +64,42 @@ var x86Need = []string{ "RET", } +var amd64GnuNeed = []string{ + "movq", + "callq", + "cmpb", +} + +var i386GnuNeed = []string{ + "mov", + "call", + "cmp", +} + var armNeed = []string{ "B main.main(SB)", "BL main.Println(SB)", "RET", } +var arm64GnuNeed = []string{ + "ldr", + "bl", + "cmp", +} + var ppcNeed = []string{ "BR main.main(SB)", "CALL main.Println(SB)", "RET", } +var ppcGnuNeed = []string{ + "mflr", + "lbz", + "cmpw", +} + var target = flag.String("target", "", "test disassembly of `goos/goarch` binary") // objdump is fully cross platform: it can handle binaries @@ -87,7 +111,7 @@ var target = flag.String("target", "", "test disassembly of `goos/goarch` binary // binary for the current system (only) and test that objdump // can handle that one. -func testDisasm(t *testing.T, printCode bool, flags ...string) { +func testDisasm(t *testing.T, printCode bool, printGnuAsm bool, flags ...string) { t.Parallel() goarch := runtime.GOARCH if *target != "" { @@ -102,7 +126,7 @@ func testDisasm(t *testing.T, printCode bool, flags ...string) { goarch = f[1] } - hash := md5.Sum([]byte(fmt.Sprintf("%v-%v", flags, printCode))) + hash := md5.Sum([]byte(fmt.Sprintf("%v-%v-%v", flags, printCode, printGnuAsm))) hello := filepath.Join(tmp, fmt.Sprintf("hello-%x.exe", hash)) args := []string{"build", "-o", hello} args = append(args, flags...) @@ -133,6 +157,18 @@ func testDisasm(t *testing.T, printCode bool, flags ...string) { need = append(need, ppcNeed...) } + if printGnuAsm { + switch goarch { + case "amd64": + need = append(need, amd64GnuNeed...) + case "386": + need = append(need, i386GnuNeed...) + case "arm64": + need = append(need, arm64GnuNeed...) + case "ppc64", "ppc64le": + need = append(need, ppcGnuNeed...) + } + } args = []string{ "-s", "main.main", hello, @@ -142,6 +178,9 @@ func testDisasm(t *testing.T, printCode bool, flags ...string) { args = append([]string{"-S"}, args...) } + if printGnuAsm { + args = append([]string{"-gnu"}, args...) + } cmd = exec.Command(exe, args...) cmd.Dir = "testdata" // "Bad line" bug #36683 is sensitive to being run in the source directory out, err = cmd.CombinedOutput() @@ -180,7 +219,7 @@ func TestDisasm(t *testing.T) { case "s390x": t.Skipf("skipping on %s, issue 15255", runtime.GOARCH) } - testDisasm(t, false) + testDisasm(t, false, false) } func TestDisasmCode(t *testing.T) { @@ -188,7 +227,17 @@ func TestDisasmCode(t *testing.T) { case "mips", "mipsle", "mips64", "mips64le", "riscv64", "s390x": t.Skipf("skipping on %s, issue 19160", runtime.GOARCH) } - testDisasm(t, true) + testDisasm(t, true, false) +} + +func TestDisasmGnuAsm(t *testing.T) { + switch runtime.GOARCH { + case "mips", "mipsle", "mips64", "mips64le", "riscv64", "s390x": + t.Skipf("skipping on %s, issue 19160", runtime.GOARCH) + case "arm": + t.Skipf("skipping gnuAsm test on %s", runtime.GOARCH) + } + testDisasm(t, false, true) } func TestDisasmExtld(t *testing.T) { @@ -209,7 +258,7 @@ func TestDisasmExtld(t *testing.T) { if !build.Default.CgoEnabled { t.Skip("skipping because cgo is not enabled") } - testDisasm(t, false, "-ldflags=-linkmode=external") + testDisasm(t, false, false, "-ldflags=-linkmode=external") } func TestDisasmGoobj(t *testing.T) { diff --git a/src/cmd/pprof/pprof.go b/src/cmd/pprof/pprof.go index 42e310043d..903f9cc1db 100644 --- a/src/cmd/pprof/pprof.go +++ b/src/cmd/pprof/pprof.go @@ -177,7 +177,7 @@ func (t *objTool) Disasm(file string, start, end uint64) ([]driver.Inst, error) return nil, err } var asm []driver.Inst - d.Decode(start, end, nil, func(pc, size uint64, file string, line int, text string) { + d.Decode(start, end, nil, false, func(pc, size uint64, file string, line int, text string) { asm = append(asm, driver.Inst{Addr: pc, File: file, Line: line, Text: text}) }) return asm, nil -- cgit v1.2.3-54-g00ecf From 33b648c0e9428c8775043db75fdff5864a64219a Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Fri, 27 Mar 2020 22:03:33 -0700 Subject: cmd/compile: fix ephemeral pointer problem on amd64 Make sure we don't use the rewrite ptr + (c + x) -> c + (ptr + x), as that may create an ephemeral out-of-bounds pointer. I have not seen an actual bug caused by this yet, but we've seen them in the 386 port so I'm fixing this issue for amd64 as well. The load-combining rules needed to be reworked somewhat to still work without the above broken rule. Update #37881 Change-Id: I8046d170e89e2035195f261535e34ca7d8aca68a Reviewed-on: https://go-review.googlesource.com/c/go/+/226437 Run-TryBot: Keith Randall TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/ssa/gen/AMD64.rules | 420 ++-- src/cmd/compile/internal/ssa/rewrite.go | 45 +- src/cmd/compile/internal/ssa/rewriteAMD64.go | 2760 +++++++++++++++----------- test/codegen/memcombine.go | 8 +- 4 files changed, 1835 insertions(+), 1398 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index ca5962f249..b5133d6c14 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -1491,65 +1491,70 @@ // Little-endian loads -(ORL x0:(MOVBload [i0] {s} p0 mem) - sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem))) +(OR(L|Q) x0:(MOVBload [i0] {s} p mem) + sh:(SHL(L|Q)const [8] x1:(MOVBload [i1] {s} p mem))) && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem) + -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) -(ORQ x0:(MOVBload [i0] {s} p0 mem) - sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p1 mem))) - && i1 == i0+1 +(OR(L|Q) x0:(MOVBload [i] {s} p0 mem) + sh:(SHL(L|Q)const [8] x1:(MOVBload [i] {s} p1 mem))) && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem) + -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem) -(ORL x0:(MOVWload [i0] {s} p0 mem) - sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p1 mem))) +(OR(L|Q) x0:(MOVWload [i0] {s} p mem) + sh:(SHL(L|Q)const [16] x1:(MOVWload [i1] {s} p mem))) && i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem) + -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) -(ORQ x0:(MOVWload [i0] {s} p0 mem) - sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p1 mem))) - && i1 == i0+2 +(OR(L|Q) x0:(MOVWload [i] {s} p0 mem) + sh:(SHL(L|Q)const [16] x1:(MOVWload [i] {s} p1 mem))) && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem) + -> @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem) -(ORQ x0:(MOVLload [i0] {s} p0 mem) - sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p1 mem))) +(ORQ x0:(MOVLload [i0] {s} p mem) + sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem))) && i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p0 mem) + -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem) -(ORL - s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p0 mem)) - or:(ORL - s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p1 mem)) +(ORQ x0:(MOVLload [i] {s} p0 mem) + sh:(SHLQconst [32] x1:(MOVLload [i] {s} p1 mem))) + && x0.Uses == 1 + && x1.Uses == 1 + && sh.Uses == 1 + && sequentialAddresses(p0, p1, 4) + && mergePoint(b,x0,x1) != nil + && clobber(x0, x1, sh) + -> @mergePoint(b,x0,x1) (MOVQload [i] {s} p0 mem) + +(OR(L|Q) + s1:(SHL(L|Q)const [j1] x1:(MOVBload [i1] {s} p mem)) + or:(OR(L|Q) + s0:(SHL(L|Q)const [j0] x0:(MOVBload [i0] {s} p mem)) y)) && i1 == i0+1 && j1 == j0+8 @@ -1559,17 +1564,15 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j0] (MOVWload [i0] {s} p0 mem)) y) + -> @mergePoint(b,x0,x1,y) (OR(L|Q) (SHL(L|Q)const [j0] (MOVWload [i0] {s} p mem)) y) -(ORQ - s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p0 mem)) - or:(ORQ - s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p1 mem)) +(OR(L|Q) + s1:(SHL(L|Q)const [j1] x1:(MOVBload [i] {s} p1 mem)) + or:(OR(L|Q) + s0:(SHL(L|Q)const [j0] x0:(MOVBload [i] {s} p0 mem)) y)) - && i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 @@ -1577,15 +1580,15 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVWload [i0] {s} p0 mem)) y) + -> @mergePoint(b,x0,x1,y) (OR(L|Q) (SHL(L|Q)const [j0] (MOVWload [i] {s} p0 mem)) y) (ORQ - s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p0 mem)) + s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ - s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p1 mem)) + s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y)) && i1 == i0+2 && j1 == j0+16 @@ -1595,106 +1598,107 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVLload [i0] {s} p0 mem)) y) - -// Little-endian indexed loads - -// Move constants offsets from LEAQx up into load. This lets the above combining -// rules discover indexed load-combining instances. -//TODO:remove! These rules are bad. -(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ1 [0] {s1} x y) mem) -(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ2 [0] {s1} x y) mem) -(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ4 [0] {s1} x y) mem) -(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ8 [0] {s1} x y) mem) - -(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ1 [0] {s1} x y) val mem) -(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ2 [0] {s1} x y) val mem) -(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ4 [0] {s1} x y) val mem) -(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ8 [0] {s1} x y) val mem) + -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVLload [i0] {s} p mem)) y) + +(ORQ + s1:(SHLQconst [j1] x1:(MOVWload [i] {s} p1 mem)) + or:(ORQ + s0:(SHLQconst [j0] x0:(MOVWload [i] {s} p0 mem)) + y)) + && j1 == j0+16 + && j0 % 32 == 0 + && x0.Uses == 1 + && x1.Uses == 1 + && s0.Uses == 1 + && s1.Uses == 1 + && or.Uses == 1 + && sequentialAddresses(p0, p1, 2) + && mergePoint(b,x0,x1,y) != nil + && clobber(x0, x1, s0, s1, or) + -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVLload [i] {s} p0 mem)) y) // Big-endian loads -(ORL - x1:(MOVBload [i1] {s} p0 mem) - sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p1 mem))) +(OR(L|Q) + x1:(MOVBload [i1] {s} p mem) + sh:(SHL(L|Q)const [8] x0:(MOVBload [i0] {s} p mem))) && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p0 mem)) + -> @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p mem)) -(ORQ - x1:(MOVBload [i1] {s} p0 mem) - sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p1 mem))) - && i1 == i0+1 +(OR(L|Q) + x1:(MOVBload [i] {s} p1 mem) + sh:(SHL(L|Q)const [8] x0:(MOVBload [i] {s} p0 mem))) && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p0 mem)) + -> @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i] {s} p0 mem)) -(ORL - r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) - sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem)))) +(OR(L|Q) + r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) + sh:(SHL(L|Q)const [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) && i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p0 mem)) + -> @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p mem)) -(ORQ - r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) - sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem)))) - && i1 == i0+2 +(OR(L|Q) + r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)) + sh:(SHL(L|Q)const [16] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem)))) && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p0 mem)) + -> @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i] {s} p0 mem)) (ORQ - r1:(BSWAPL x1:(MOVLload [i1] {s} p0 mem)) - sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p1 mem)))) + r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) + sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem)))) && i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPQ (MOVQload [i0] {s} p0 mem)) + -> @mergePoint(b,x0,x1) (BSWAPQ (MOVQload [i0] {s} p mem)) + +(ORQ + r1:(BSWAPL x1:(MOVLload [i] {s} p1 mem)) + sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i] {s} p0 mem)))) + && x0.Uses == 1 + && x1.Uses == 1 + && r0.Uses == 1 + && r1.Uses == 1 + && sh.Uses == 1 + && sequentialAddresses(p0, p1, 4) + && mergePoint(b,x0,x1) != nil + && clobber(x0, x1, r0, r1, sh) + -> @mergePoint(b,x0,x1) (BSWAPQ (MOVQload [i] {s} p0 mem)) -(ORL - s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p0 mem)) - or:(ORL - s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p1 mem)) +(OR(L|Q) + s0:(SHL(L|Q)const [j0] x0:(MOVBload [i0] {s} p mem)) + or:(OR(L|Q) + s1:(SHL(L|Q)const [j1] x1:(MOVBload [i1] {s} p mem)) y)) && i1 == i0+1 && j1 == j0-8 @@ -1704,17 +1708,15 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j1] (ROLWconst [8] (MOVWload [i0] {s} p0 mem))) y) + -> @mergePoint(b,x0,x1,y) (OR(L|Q) (SHL(L|Q)const [j1] (ROLWconst [8] (MOVWload [i0] {s} p mem))) y) -(ORQ - s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p0 mem)) - or:(ORQ - s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p1 mem)) +(OR(L|Q) + s0:(SHL(L|Q)const [j0] x0:(MOVBload [i] {s} p0 mem)) + or:(OR(L|Q) + s1:(SHL(L|Q)const [j1] x1:(MOVBload [i] {s} p1 mem)) y)) - && i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 @@ -1722,15 +1724,15 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (ROLWconst [8] (MOVWload [i0] {s} p0 mem))) y) + -> @mergePoint(b,x0,x1,y) (OR(L|Q) (SHL(L|Q)const [j1] (ROLWconst [8] (MOVWload [i] {s} p0 mem))) y) (ORQ - s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p0 mem))) + s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ - s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p1 mem))) + s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y)) && i1 == i0+2 && j1 == j0-16 @@ -1742,41 +1744,90 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (BSWAPL (MOVLload [i0] {s} p0 mem))) y) + -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (BSWAPL (MOVLload [i0] {s} p mem))) y) + +(ORQ + s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))) + or:(ORQ + s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem))) + y)) + && j1 == j0-16 + && j1 % 32 == 0 + && x0.Uses == 1 + && x1.Uses == 1 + && r0.Uses == 1 + && r1.Uses == 1 + && s0.Uses == 1 + && s1.Uses == 1 + && or.Uses == 1 + && sequentialAddresses(p0, p1, 2) + && mergePoint(b,x0,x1,y) != nil + && clobber(x0, x1, r0, r1, s0, s1, or) + -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (BSWAPL (MOVLload [i] {s} p0 mem))) y) // Combine 2 byte stores + shift into rolw 8 + word store +(MOVBstore [i] {s} p w + x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem)) + && x0.Uses == 1 + && clobber(x0) + -> (MOVWstore [i-1] {s} p (ROLWconst [8] w) mem) (MOVBstore [i] {s} p1 w - x0:(MOVBstore [i-1] {s} p0 (SHRWconst [8] w) mem)) + x0:(MOVBstore [i] {s} p0 (SHRWconst [8] w) mem)) && x0.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 1) && clobber(x0) - -> (MOVWstore [i-1] {s} p0 (ROLWconst [8] w) mem) + -> (MOVWstore [i] {s} p0 (ROLWconst [8] w) mem) // Combine stores + shifts into bswap and larger (unaligned) stores +(MOVBstore [i] {s} p w + x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) + x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) + x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem)))) + && x0.Uses == 1 + && x1.Uses == 1 + && x2.Uses == 1 + && clobber(x0, x1, x2) + -> (MOVLstore [i-3] {s} p (BSWAPL w) mem) (MOVBstore [i] {s} p3 w - x2:(MOVBstore [i-1] {s} p2 (SHRLconst [8] w) - x1:(MOVBstore [i-2] {s} p1 (SHRLconst [16] w) - x0:(MOVBstore [i-3] {s} p0 (SHRLconst [24] w) mem)))) + x2:(MOVBstore [i] {s} p2 (SHRLconst [8] w) + x1:(MOVBstore [i] {s} p1 (SHRLconst [16] w) + x0:(MOVBstore [i] {s} p0 (SHRLconst [24] w) mem)))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 - && same(p0, p1, 1) - && same(p1, p2, 1) - && same(p2, p3, 1) + && sequentialAddresses(p0, p1, 1) + && sequentialAddresses(p1, p2, 1) + && sequentialAddresses(p2, p3, 1) && clobber(x0, x1, x2) - -> (MOVLstore [i-3] {s} p0 (BSWAPL w) mem) - + -> (MOVLstore [i] {s} p0 (BSWAPL w) mem) + +(MOVBstore [i] {s} p w + x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) + x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) + x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) + x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) + x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) + x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) + x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem)))))))) + && x0.Uses == 1 + && x1.Uses == 1 + && x2.Uses == 1 + && x3.Uses == 1 + && x4.Uses == 1 + && x5.Uses == 1 + && x6.Uses == 1 + && clobber(x0, x1, x2, x3, x4, x5, x6) + -> (MOVQstore [i-7] {s} p (BSWAPQ w) mem) (MOVBstore [i] {s} p7 w - x6:(MOVBstore [i-1] {s} p6 (SHRQconst [8] w) - x5:(MOVBstore [i-2] {s} p5 (SHRQconst [16] w) - x4:(MOVBstore [i-3] {s} p4 (SHRQconst [24] w) - x3:(MOVBstore [i-4] {s} p3 (SHRQconst [32] w) - x2:(MOVBstore [i-5] {s} p2 (SHRQconst [40] w) - x1:(MOVBstore [i-6] {s} p1 (SHRQconst [48] w) - x0:(MOVBstore [i-7] {s} p0 (SHRQconst [56] w) mem)))))))) + x6:(MOVBstore [i] {s} p6 (SHRQconst [8] w) + x5:(MOVBstore [i] {s} p5 (SHRQconst [16] w) + x4:(MOVBstore [i] {s} p4 (SHRQconst [24] w) + x3:(MOVBstore [i] {s} p3 (SHRQconst [32] w) + x2:(MOVBstore [i] {s} p2 (SHRQconst [40] w) + x1:(MOVBstore [i] {s} p1 (SHRQconst [48] w) + x0:(MOVBstore [i] {s} p0 (SHRQconst [56] w) mem)))))))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 @@ -1784,99 +1835,122 @@ && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 - && same(p0, p1, 1) - && same(p1, p2, 1) - && same(p2, p3, 1) - && same(p3, p4, 1) - && same(p4, p5, 1) - && same(p5, p6, 1) - && same(p6, p7, 1) + && sequentialAddresses(p0, p1, 1) + && sequentialAddresses(p1, p2, 1) + && sequentialAddresses(p2, p3, 1) + && sequentialAddresses(p3, p4, 1) + && sequentialAddresses(p4, p5, 1) + && sequentialAddresses(p5, p6, 1) + && sequentialAddresses(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6) - -> (MOVQstore [i-7] {s} p0 (BSWAPQ w) mem) + -> (MOVQstore [i] {s} p0 (BSWAPQ w) mem) // Combine constant stores into larger (unaligned) stores. -(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem)) +(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) && x.Uses == 1 - && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) -(MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem)) + -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) +(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) && x.Uses == 1 - && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) -(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem)) + -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) +(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) && x.Uses == 1 - && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) -(MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem)) + -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) +(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) && x.Uses == 1 - && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) -(MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem)) + -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) +(MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) && x.Uses == 1 - && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) - -> (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) -(MOVLstoreconst [a] {s} p1 x:(MOVLstoreconst [c] {s} p0 mem)) + -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) +(MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem)) && x.Uses == 1 - && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) - -> (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) -(MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [c2] {s} p0 mem)) + -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) +(MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) && config.useSSE && x.Uses == 1 - && same(p0, p1, 1) && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x) - -> (MOVOstore [ValAndOff(c2).Off()] {s} p0 (MOVOconst [0]) mem) + -> (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem) -// Combine stores into larger (unaligned) stores. -(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) +// Combine stores into larger (unaligned) stores. Little endian. +(MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstore [i-1] {s} p w mem) +(MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem)) && x.Uses == 1 - && same(p0, p1, 1) && clobber(x) - -> (MOVWstore [i-1] {s} p0 w mem) -(MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHR(W|L|Q)const [8] w) mem)) + -> (MOVWstore [i] {s} p w mem) +(MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstore [i-1] {s} p w0 mem) +(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) x:(MOVBstore [i] {s} p0 w mem)) + && x.Uses == 1 + && sequentialAddresses(p0, p1, 1) + && clobber(x) + -> (MOVWstore [i] {s} p0 w mem) +(MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) mem)) && x.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 1) && clobber(x) -> (MOVWstore [i] {s} p0 w mem) -(MOVBstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHR(L|Q)const [j-8] w) mem)) +(MOVBstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVBstore [i] {s} p0 w0:(SHR(L|Q)const [j-8] w) mem)) && x.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 1) && clobber(x) - -> (MOVWstore [i-1] {s} p0 w0 mem) -(MOVWstore [i] {s} p1 (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p0 w mem)) + -> (MOVWstore [i] {s} p0 w0 mem) + +(MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVLstore [i-2] {s} p w mem) +(MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVLstore [i-2] {s} p w0 mem) +(MOVWstore [i] {s} p1 (SHR(L|Q)const [16] w) x:(MOVWstore [i] {s} p0 w mem)) + && x.Uses == 1 + && sequentialAddresses(p0, p1, 2) + && clobber(x) + -> (MOVLstore [i] {s} p0 w mem) +(MOVWstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVWstore [i] {s} p0 w0:(SHR(L|Q)const [j-16] w) mem)) + && x.Uses == 1 + && sequentialAddresses(p0, p1, 2) + && clobber(x) + -> (MOVLstore [i] {s} p0 w0 mem) + +(MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem)) && x.Uses == 1 - && same(p0, p1, 1) && clobber(x) - -> (MOVLstore [i-2] {s} p0 w mem) -(MOVWstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHR(L|Q)const [j-16] w) mem)) + -> (MOVQstore [i-4] {s} p w mem) +(MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem)) && x.Uses == 1 - && same(p0, p1, 1) && clobber(x) - -> (MOVLstore [i-2] {s} p0 w0 mem) -(MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p0 w mem)) + -> (MOVQstore [i-4] {s} p w0 mem) +(MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i] {s} p0 w mem)) && x.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 4) && clobber(x) - -> (MOVQstore [i-4] {s} p0 w mem) -(MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p0 w0:(SHRQconst [j-32] w) mem)) + -> (MOVQstore [i] {s} p0 w mem) +(MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i] {s} p0 w0:(SHRQconst [j-32] w) mem)) && x.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 4) && clobber(x) - -> (MOVQstore [i-4] {s} p0 w0 mem) + -> (MOVQstore [i] {s} p0 w0 mem) (MOVBstore [i] {s} p x1:(MOVBload [j] {s2} p2 mem) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index fc03f0d72c..878b15eeee 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1260,46 +1260,15 @@ func sequentialAddresses(x, y *Value, n int64) bool { x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { return true } - return false -} - -// same reports whether x and y are the same value. -// It checks to a maximum depth of d, so it may report -// a false negative. -// TODO: remove when amd64 port is switched to using sequentialAddresses -func same(x, y *Value, depth int) bool { - if x == y { + if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil && + (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || + x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { return true } - if depth <= 0 { - return false - } - if x.Op != y.Op || x.Aux != y.Aux || x.AuxInt != y.AuxInt { - return false - } - if len(x.Args) != len(y.Args) { - return false - } - if opcodeTable[x.Op].commutative { - // Check exchanged ordering first. - for i, a := range x.Args { - j := i - if j < 2 { - j ^= 1 - } - b := y.Args[j] - if !same(a, b, depth-1) { - goto checkNormalOrder - } - } + if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux && + (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || + x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { return true - checkNormalOrder: } - for i, a := range x.Args { - b := y.Args[i] - if !same(a, b, depth-1) { - return false - } - } - return true + return false } diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index b9a401cca9..e4d86485d4 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -10140,7 +10140,6 @@ func rewriteValueAMD64_OpAMD64MOVBatomicload(v *Value) bool { func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block // match: (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) // result: (MOVBQZX x) @@ -10205,118 +10204,6 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool { v.AddArg2(base, mem) return true } - // match: (MOVBload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBload [i0+i1] {s0} (LEAQ1 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVBload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVBload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBload [i0+i1] {s0} (LEAQ2 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVBload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVBload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBload [i0+i1] {s0} (LEAQ4 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVBload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVBload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBload [i0+i1] {s0} (LEAQ8 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVBload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem) // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem) @@ -10722,159 +10609,124 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v.AddArg3(base, val, mem) return true } - // match: (MOVBstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBstore [i0+i1] {s0} (LEAQ1 [0] {s1} x y) val mem) + // match: (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem)) + // cond: x0.Uses == 1 && clobber(x0) + // result: (MOVWstore [i-1] {s} p (ROLWconst [8] w) mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { + i := v.AuxInt + s := v.Aux + p := v_0 + w := v_1 + x0 := v_2 + if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-1 || x0.Aux != s { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + mem := x0.Args[2] + if p != x0.Args[0] { break } - v.reset(OpAMD64MOVBstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + x0_1 := x0.Args[1] + if x0_1.Op != OpAMD64SHRWconst || x0_1.AuxInt != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && clobber(x0)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i - 1 + v.Aux = s + v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, w.Type) + v0.AuxInt = 8 + v0.AddArg(w) + v.AddArg3(p, v0, mem) return true } - // match: (MOVBstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBstore [i0+i1] {s0} (LEAQ2 [0] {s1} x y) val mem) + // match: (MOVBstore [i] {s} p1 w x0:(MOVBstore [i] {s} p0 (SHRWconst [8] w) mem)) + // cond: x0.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x0) + // result: (MOVWstore [i] {s} p0 (ROLWconst [8] w) mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { + i := v.AuxInt + s := v.Aux + p1 := v_0 + w := v_1 + x0 := v_2 + if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i || x0.Aux != s { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + mem := x0.Args[2] + p0 := x0.Args[0] + x0_1 := x0.Args[1] + if x0_1.Op != OpAMD64SHRWconst || x0_1.AuxInt != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x0)) { break } - v.reset(OpAMD64MOVBstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s + v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, w.Type) + v0.AuxInt = 8 + v0.AddArg(w) + v.AddArg3(p0, v0, mem) return true } - // match: (MOVBstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBstore [i0+i1] {s0} (LEAQ4 [0] {s1} x y) val mem) + // match: (MOVBstore [i] {s} p w x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2) + // result: (MOVLstore [i-3] {s} p (BSWAPL w) mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { + i := v.AuxInt + s := v.Aux + p := v_0 + w := v_1 + x2 := v_2 + if x2.Op != OpAMD64MOVBstore || x2.AuxInt != i-1 || x2.Aux != s { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + _ = x2.Args[2] + if p != x2.Args[0] { break } - v.reset(OpAMD64MOVBstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVBstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBstore [i0+i1] {s0} (LEAQ8 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { + x2_1 := x2.Args[1] + if x2_1.Op != OpAMD64SHRLconst || x2_1.AuxInt != 8 || w != x2_1.Args[0] { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + x1 := x2.Args[2] + if x1.Op != OpAMD64MOVBstore || x1.AuxInt != i-2 || x1.Aux != s { break } - v.reset(OpAMD64MOVBstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVBstore [i] {s} p1 w x0:(MOVBstore [i-1] {s} p0 (SHRWconst [8] w) mem)) - // cond: x0.Uses == 1 && same(p0, p1, 1) && clobber(x0) - // result: (MOVWstore [i-1] {s} p0 (ROLWconst [8] w) mem) - for { - i := v.AuxInt - s := v.Aux - p1 := v_0 - w := v_1 - x0 := v_2 - if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-1 || x0.Aux != s { + _ = x1.Args[2] + if p != x1.Args[0] { + break + } + x1_1 := x1.Args[1] + if x1_1.Op != OpAMD64SHRLconst || x1_1.AuxInt != 16 || w != x1_1.Args[0] { + break + } + x0 := x1.Args[2] + if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-3 || x0.Aux != s { break } mem := x0.Args[2] - p0 := x0.Args[0] + if p != x0.Args[0] { + break + } x0_1 := x0.Args[1] - if x0_1.Op != OpAMD64SHRWconst || x0_1.AuxInt != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && same(p0, p1, 1) && clobber(x0)) { + if x0_1.Op != OpAMD64SHRLconst || x0_1.AuxInt != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)) { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i - 1 + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 3 v.Aux = s - v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, w.Type) - v0.AuxInt = 8 + v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, w.Type) v0.AddArg(w) - v.AddArg3(p0, v0, mem) + v.AddArg3(p, v0, mem) return true } - // match: (MOVBstore [i] {s} p3 w x2:(MOVBstore [i-1] {s} p2 (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p1 (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p0 (SHRLconst [24] w) mem)))) - // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && clobber(x0, x1, x2) - // result: (MOVLstore [i-3] {s} p0 (BSWAPL w) mem) + // match: (MOVBstore [i] {s} p3 w x2:(MOVBstore [i] {s} p2 (SHRLconst [8] w) x1:(MOVBstore [i] {s} p1 (SHRLconst [16] w) x0:(MOVBstore [i] {s} p0 (SHRLconst [24] w) mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && clobber(x0, x1, x2) + // result: (MOVLstore [i] {s} p0 (BSWAPL w) mem) for { i := v.AuxInt s := v.Aux p3 := v_0 w := v_1 x2 := v_2 - if x2.Op != OpAMD64MOVBstore || x2.AuxInt != i-1 || x2.Aux != s { + if x2.Op != OpAMD64MOVBstore || x2.AuxInt != i || x2.Aux != s { break } _ = x2.Args[2] @@ -10884,7 +10736,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } x1 := x2.Args[2] - if x1.Op != OpAMD64MOVBstore || x1.AuxInt != i-2 || x1.Aux != s { + if x1.Op != OpAMD64MOVBstore || x1.AuxInt != i || x1.Aux != s { break } _ = x1.Args[2] @@ -10894,37 +10746,39 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } x0 := x1.Args[2] - if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-3 || x0.Aux != s { + if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i || x0.Aux != s { break } mem := x0.Args[2] p0 := x0.Args[0] x0_1 := x0.Args[1] - if x0_1.Op != OpAMD64SHRLconst || x0_1.AuxInt != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && clobber(x0, x1, x2)) { + if x0_1.Op != OpAMD64SHRLconst || x0_1.AuxInt != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && clobber(x0, x1, x2)) { break } v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 3 + v.AuxInt = i v.Aux = s v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, w.Type) v0.AddArg(w) v.AddArg3(p0, v0, mem) return true } - // match: (MOVBstore [i] {s} p7 w x6:(MOVBstore [i-1] {s} p6 (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p5 (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p4 (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p3 (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p2 (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p1 (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p0 (SHRQconst [56] w) mem)))))))) - // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && same(p3, p4, 1) && same(p4, p5, 1) && same(p5, p6, 1) && same(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6) - // result: (MOVQstore [i-7] {s} p0 (BSWAPQ w) mem) + // match: (MOVBstore [i] {s} p w x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem)))))))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6) + // result: (MOVQstore [i-7] {s} p (BSWAPQ w) mem) for { i := v.AuxInt s := v.Aux - p7 := v_0 + p := v_0 w := v_1 x6 := v_2 if x6.Op != OpAMD64MOVBstore || x6.AuxInt != i-1 || x6.Aux != s { break } _ = x6.Args[2] - p6 := x6.Args[0] + if p != x6.Args[0] { + break + } x6_1 := x6.Args[1] if x6_1.Op != OpAMD64SHRQconst || x6_1.AuxInt != 8 || w != x6_1.Args[0] { break @@ -10934,7 +10788,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x5.Args[2] - p5 := x5.Args[0] + if p != x5.Args[0] { + break + } x5_1 := x5.Args[1] if x5_1.Op != OpAMD64SHRQconst || x5_1.AuxInt != 16 || w != x5_1.Args[0] { break @@ -10944,7 +10800,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x4.Args[2] - p4 := x4.Args[0] + if p != x4.Args[0] { + break + } x4_1 := x4.Args[1] if x4_1.Op != OpAMD64SHRQconst || x4_1.AuxInt != 24 || w != x4_1.Args[0] { break @@ -10954,7 +10812,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x3.Args[2] - p3 := x3.Args[0] + if p != x3.Args[0] { + break + } x3_1 := x3.Args[1] if x3_1.Op != OpAMD64SHRQconst || x3_1.AuxInt != 32 || w != x3_1.Args[0] { break @@ -10964,7 +10824,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x2.Args[2] - p2 := x2.Args[0] + if p != x2.Args[0] { + break + } x2_1 := x2.Args[1] if x2_1.Op != OpAMD64SHRQconst || x2_1.AuxInt != 40 || w != x2_1.Args[0] { break @@ -10974,7 +10836,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x1.Args[2] - p1 := x1.Args[0] + if p != x1.Args[0] { + break + } x1_1 := x1.Args[1] if x1_1.Op != OpAMD64SHRQconst || x1_1.AuxInt != 48 || w != x1_1.Args[0] { break @@ -10984,9 +10848,11 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } mem := x0.Args[2] - p0 := x0.Args[0] + if p != x0.Args[0] { + break + } x0_1 := x0.Args[1] - if x0_1.Op != OpAMD64SHRQconst || x0_1.AuxInt != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && same(p3, p4, 1) && same(p4, p5, 1) && same(p5, p6, 1) && same(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6)) { + if x0_1.Op != OpAMD64SHRQconst || x0_1.AuxInt != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6)) { break } v.reset(OpAMD64MOVQstore) @@ -10994,103 +10860,326 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v.Aux = s v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, w.Type) v0.AddArg(w) - v.AddArg3(p0, v0, mem) + v.AddArg3(p, v0, mem) return true } - // match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i-1] {s} p0 w mem) + // match: (MOVBstore [i] {s} p7 w x6:(MOVBstore [i] {s} p6 (SHRQconst [8] w) x5:(MOVBstore [i] {s} p5 (SHRQconst [16] w) x4:(MOVBstore [i] {s} p4 (SHRQconst [24] w) x3:(MOVBstore [i] {s} p3 (SHRQconst [32] w) x2:(MOVBstore [i] {s} p2 (SHRQconst [40] w) x1:(MOVBstore [i] {s} p1 (SHRQconst [48] w) x0:(MOVBstore [i] {s} p0 (SHRQconst [56] w) mem)))))))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && sequentialAddresses(p3, p4, 1) && sequentialAddresses(p4, p5, 1) && sequentialAddresses(p5, p6, 1) && sequentialAddresses(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6) + // result: (MOVQstore [i] {s} p0 (BSWAPQ w) mem) for { i := v.AuxInt s := v.Aux - p1 := v_0 - if v_1.Op != OpAMD64SHRWconst || v_1.AuxInt != 8 { + p7 := v_0 + w := v_1 + x6 := v_2 + if x6.Op != OpAMD64MOVBstore || x6.AuxInt != i || x6.Aux != s { break } - w := v_1.Args[0] - x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + _ = x6.Args[2] + p6 := x6.Args[0] + x6_1 := x6.Args[1] + if x6_1.Op != OpAMD64SHRQconst || x6_1.AuxInt != 8 || w != x6_1.Args[0] { break } - mem := x.Args[2] - p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + x5 := x6.Args[2] + if x5.Op != OpAMD64MOVBstore || x5.AuxInt != i || x5.Aux != s { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i - 1 - v.Aux = s - v.AddArg3(p0, w, mem) - return true - } - // match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i-1] {s} p0 w mem) - for { - i := v.AuxInt - s := v.Aux - p1 := v_0 - if v_1.Op != OpAMD64SHRLconst || v_1.AuxInt != 8 { + _ = x5.Args[2] + p5 := x5.Args[0] + x5_1 := x5.Args[1] + if x5_1.Op != OpAMD64SHRQconst || x5_1.AuxInt != 16 || w != x5_1.Args[0] { break } - w := v_1.Args[0] - x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + x4 := x5.Args[2] + if x4.Op != OpAMD64MOVBstore || x4.AuxInt != i || x4.Aux != s { break } - mem := x.Args[2] - p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + _ = x4.Args[2] + p4 := x4.Args[0] + x4_1 := x4.Args[1] + if x4_1.Op != OpAMD64SHRQconst || x4_1.AuxInt != 24 || w != x4_1.Args[0] { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i - 1 - v.Aux = s - v.AddArg3(p0, w, mem) - return true - } - // match: (MOVBstore [i] {s} p1 (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i-1] {s} p0 w mem) - for { - i := v.AuxInt - s := v.Aux - p1 := v_0 - if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 8 { + x3 := x4.Args[2] + if x3.Op != OpAMD64MOVBstore || x3.AuxInt != i || x3.Aux != s { break } - w := v_1.Args[0] - x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + _ = x3.Args[2] + p3 := x3.Args[0] + x3_1 := x3.Args[1] + if x3_1.Op != OpAMD64SHRQconst || x3_1.AuxInt != 32 || w != x3_1.Args[0] { break } - mem := x.Args[2] - p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + x2 := x3.Args[2] + if x2.Op != OpAMD64MOVBstore || x2.AuxInt != i || x2.Aux != s { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i - 1 - v.Aux = s - v.AddArg3(p0, w, mem) - return true - } - // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRWconst [8] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i] {s} p0 w mem) - for { - i := v.AuxInt - s := v.Aux - p1 := v_0 - w := v_1 - x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { - break + _ = x2.Args[2] + p2 := x2.Args[0] + x2_1 := x2.Args[1] + if x2_1.Op != OpAMD64SHRQconst || x2_1.AuxInt != 40 || w != x2_1.Args[0] { + break + } + x1 := x2.Args[2] + if x1.Op != OpAMD64MOVBstore || x1.AuxInt != i || x1.Aux != s { + break + } + _ = x1.Args[2] + p1 := x1.Args[0] + x1_1 := x1.Args[1] + if x1_1.Op != OpAMD64SHRQconst || x1_1.AuxInt != 48 || w != x1_1.Args[0] { + break + } + x0 := x1.Args[2] + if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i || x0.Aux != s { + break + } + mem := x0.Args[2] + p0 := x0.Args[0] + x0_1 := x0.Args[1] + if x0_1.Op != OpAMD64SHRQconst || x0_1.AuxInt != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && sequentialAddresses(p3, p4, 1) && sequentialAddresses(p4, p5, 1) && sequentialAddresses(p5, p6, 1) && sequentialAddresses(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6)) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = i + v.Aux = s + v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, w.Type) + v0.AddArg(w) + v.AddArg3(p0, v0, mem) + return true + } + // match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRWconst || v_1.AuxInt != 8 { + break + } + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + break } mem := x.Args[2] - p0 := x.Args[0] + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg3(p, w, mem) + return true + } + // match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRLconst || v_1.AuxInt != 8 { + break + } + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg3(p, w, mem) + return true + } + // match: (MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 8 { + break + } + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg3(p, w, mem) + return true + } + // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRWconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + w := v_1 + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] { + break + } + x_1 := x.Args[1] + if x_1.Op != OpAMD64SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p, w, mem) + return true + } + // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRLconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + w := v_1 + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] { + break + } + x_1 := x.Args[1] + if x_1.Op != OpAMD64SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p, w, mem) + return true + } + // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRQconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + w := v_1 + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] { + break + } x_1 := x.Args[1] - if x_1.Op != OpAMD64SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if x_1.Op != OpAMD64SHRQconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p, w, mem) + return true + } + // match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w0 mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRLconst { + break + } + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] { + break + } + w0 := x.Args[1] + if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg3(p, w0, mem) + return true + } + // match: (MOVBstore [i] {s} p (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRQconst [j-8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w0 mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRQconst { + break + } + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] { + break + } + w0 := x.Args[1] + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg3(p, w0, mem) + return true + } + // match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w mem) + for { + i := v.AuxInt + s := v.Aux + p1 := v_0 + if v_1.Op != OpAMD64SHRWconst || v_1.AuxInt != 8 { + break + } + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { + break + } + mem := x.Args[2] + p0 := x.Args[0] + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) @@ -11099,22 +11188,24 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRLconst [8] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) // result: (MOVWstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux p1 := v_0 - w := v_1 + if v_1.Op != OpAMD64SHRLconst || v_1.AuxInt != 8 { + break + } + w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] - x_1 := x.Args[1] - if x_1.Op != OpAMD64SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) @@ -11123,22 +11214,48 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRQconst [8] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // match: (MOVBstore [i] {s} p1 (SHRQconst [8] w) x:(MOVBstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) // result: (MOVWstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux p1 := v_0 - w := v_1 + if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 8 { + break + } + w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p0, w, mem) + return true + } + // match: (MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHRWconst [8] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w mem) + for { + i := v.AuxInt + s := v.Aux + p0 := v_0 + w := v_1 + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { + break + } + mem := x.Args[2] + p1 := x.Args[0] x_1 := x.Args[1] - if x_1.Op != OpAMD64SHRQconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if x_1.Op != OpAMD64SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) @@ -11147,9 +11264,57 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRLconst [j-8] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i-1] {s} p0 w0 mem) + // match: (MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHRLconst [8] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w mem) + for { + i := v.AuxInt + s := v.Aux + p0 := v_0 + w := v_1 + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { + break + } + mem := x.Args[2] + p1 := x.Args[0] + x_1 := x.Args[1] + if x_1.Op != OpAMD64SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p0, w, mem) + return true + } + // match: (MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHRQconst [8] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w mem) + for { + i := v.AuxInt + s := v.Aux + p0 := v_0 + w := v_1 + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { + break + } + mem := x.Args[2] + p1 := x.Args[0] + x_1 := x.Args[1] + if x_1.Op != OpAMD64SHRQconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p0, w, mem) + return true + } + // match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRLconst [j-8] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux @@ -11160,24 +11325,24 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { j := v_1.AuxInt w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] w0 := x.Args[1] - if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) - v.AuxInt = i - 1 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w0, mem) return true } - // match: (MOVBstore [i] {s} p1 (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRQconst [j-8] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i-1] {s} p0 w0 mem) + // match: (MOVBstore [i] {s} p1 (SHRQconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRQconst [j-8] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux @@ -11188,17 +11353,17 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { j := v_1.AuxInt w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] w0 := x.Args[1] - if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) - v.AuxInt = i - 1 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w0, mem) return true @@ -11337,13 +11502,13 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) + // match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) for { c := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != OpAMD64MOVBstoreconst { break @@ -11353,23 +11518,22 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVWstoreconst) v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) v.Aux = s - v.AddArg2(p0, mem) + v.AddArg2(p, mem) return true } - // match: (MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) + // match: (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) for { a := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != OpAMD64MOVBstoreconst { break @@ -11379,14 +11543,13 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVWstoreconst) v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) v.Aux = s - v.AddArg2(p0, mem) + v.AddArg2(p, mem) return true } // match: (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) @@ -11860,118 +12023,6 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool { v.AddArg2(base, mem) return true } - // match: (MOVLload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLload [i0+i1] {s0} (LEAQ1 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVLload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVLload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLload [i0+i1] {s0} (LEAQ2 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVLload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVLload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLload [i0+i1] {s0} (LEAQ4 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVLload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVLload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLload [i0+i1] {s0} (LEAQ8 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVLload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } // match: (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) // result: (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem) @@ -12174,125 +12225,64 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { v.AddArg3(base, val, mem) return true } - // match: (MOVLstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLstore [i0+i1] {s0} (LEAQ1 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVLstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLstore [i0+i1] {s0} (LEAQ2 [0] {s1} x y) val mem) + // match: (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVQstore [i-4] {s} p w mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 32 { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVLstore || x.AuxInt != i-4 || x.Aux != s { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + mem := x.Args[2] + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = i - 4 + v.Aux = s + v.AddArg3(p, w, mem) return true } - // match: (MOVLstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLstore [i0+i1] {s0} (LEAQ4 [0] {s1} x y) val mem) + // match: (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVQstore [i-4] {s} p w0 mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRQconst { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVLstore || x.AuxInt != i-4 || x.Aux != s { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVLstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLstore [i0+i1] {s0} (LEAQ8 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { + mem := x.Args[2] + if p != x.Args[0] { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + w0 := x.Args[1] + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + v.reset(OpAMD64MOVQstore) + v.AuxInt = i - 4 + v.Aux = s + v.AddArg3(p, w0, mem) return true } - // match: (MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVQstore [i-4] {s} p0 w mem) + // match: (MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x) + // result: (MOVQstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux @@ -12302,23 +12292,23 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { } w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVLstore || x.AuxInt != i-4 || x.Aux != s { + if x.Op != OpAMD64MOVLstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x)) { break } v.reset(OpAMD64MOVQstore) - v.AuxInt = i - 4 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w, mem) return true } - // match: (MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p0 w0:(SHRQconst [j-32] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVQstore [i-4] {s} p0 w0 mem) + // match: (MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i] {s} p0 w0:(SHRQconst [j-32] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x) + // result: (MOVQstore [i] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux @@ -12329,17 +12319,17 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { j := v_1.AuxInt w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVLstore || x.AuxInt != i-4 || x.Aux != s { + if x.Op != OpAMD64MOVLstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] w0 := x.Args[1] - if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x)) { break } v.reset(OpAMD64MOVQstore) - v.AuxInt = i - 4 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w0, mem) return true @@ -13018,13 +13008,13 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) - // result: (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) + // match: (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) + // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) for { c := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != OpAMD64MOVLstoreconst { break @@ -13034,8 +13024,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVQstore) @@ -13043,16 +13032,16 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { v.Aux = s v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64) v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32 - v.AddArg3(p0, v0, mem) + v.AddArg3(p, v0, mem) return true } - // match: (MOVLstoreconst [a] {s} p1 x:(MOVLstoreconst [c] {s} p0 mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) - // result: (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) + // match: (MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) + // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) for { a := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != OpAMD64MOVLstoreconst { break @@ -13062,8 +13051,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVQstore) @@ -13071,7 +13059,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { v.Aux = s v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64) v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32 - v.AddArg3(p0, v0, mem) + v.AddArg3(p, v0, mem) return true } // match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) @@ -13423,118 +13411,6 @@ func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool { v.AddArg2(base, mem) return true } - // match: (MOVQload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQload [i0+i1] {s0} (LEAQ1 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVQload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQload [i0+i1] {s0} (LEAQ2 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVQload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQload [i0+i1] {s0} (LEAQ4 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVQload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQload [i0+i1] {s0} (LEAQ8 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } // match: (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) @@ -13614,7 +13490,6 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem) // cond: is32Bit(off1+off2) // result: (MOVQstore [off1+off2] {sym} ptr val mem) @@ -13681,122 +13556,6 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { v.AddArg3(base, val, mem) return true } - // match: (MOVQstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQstore [i0+i1] {s0} (LEAQ1 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVQstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQstore [i0+i1] {s0} (LEAQ2 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVQstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQstore [i0+i1] {s0} (LEAQ4 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVQstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQstore [i0+i1] {s0} (LEAQ8 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } // match: (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) @@ -14430,13 +14189,13 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [c2] {s} p0 mem)) - // cond: config.useSSE && x.Uses == 1 && same(p0, p1, 1) && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x) - // result: (MOVOstore [ValAndOff(c2).Off()] {s} p0 (MOVOconst [0]) mem) + // match: (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) + // cond: config.useSSE && x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x) + // result: (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem) for { c := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != OpAMD64MOVQstoreconst { break @@ -14446,8 +14205,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(config.useSSE && x.Uses == 1 && same(p0, p1, 1) && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) { + if p != x.Args[0] || !(config.useSSE && x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) { break } v.reset(OpAMD64MOVOstore) @@ -14455,7 +14213,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { v.Aux = s v0 := b.NewValue0(x.Pos, OpAMD64MOVOconst, types.TypeInt128) v0.AuxInt = 0 - v.AddArg3(p0, v0, mem) + v.AddArg3(p, v0, mem) return true } // match: (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) @@ -15119,118 +14877,6 @@ func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool { v.AddArg2(base, mem) return true } - // match: (MOVWload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWload [i0+i1] {s0} (LEAQ1 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVWload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVWload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWload [i0+i1] {s0} (LEAQ2 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVWload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVWload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWload [i0+i1] {s0} (LEAQ4 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVWload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVWload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWload [i0+i1] {s0} (LEAQ8 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVWload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) @@ -15416,125 +15062,119 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { v.AddArg3(base, val, mem) return true } - // match: (MOVWstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWstore [i0+i1] {s0} (LEAQ1 [0] {s1} x y) val mem) + // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstore [i-2] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRLconst || v_1.AuxInt != 16 { + break + } + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p, w, mem) + return true + } + // match: (MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstore [i-2] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 16 { + break + } + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p, w, mem) + return true + } + // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstore [i-2] {s} p w0 mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRLconst { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVWstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWstore [i0+i1] {s0} (LEAQ2 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { + mem := x.Args[2] + if p != x.Args[0] { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + w0 := x.Args[1] + if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p, w0, mem) return true } - // match: (MOVWstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWstore [i0+i1] {s0} (LEAQ4 [0] {s1} x y) val mem) + // match: (MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstore [i-2] {s} p w0 mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRQconst { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVWstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWstore [i0+i1] {s0} (LEAQ8 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { + mem := x.Args[2] + if p != x.Args[0] { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + w0 := x.Args[1] + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p, w0, mem) return true } - // match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVLstore [i-2] {s} p0 w mem) + // match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x) + // result: (MOVLstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux @@ -15544,23 +15184,23 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { } w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + if x.Op != OpAMD64MOVWstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) { break } v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 2 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w, mem) return true } - // match: (MOVWstore [i] {s} p1 (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVLstore [i-2] {s} p0 w mem) + // match: (MOVWstore [i] {s} p1 (SHRQconst [16] w) x:(MOVWstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x) + // result: (MOVLstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux @@ -15570,23 +15210,23 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { } w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + if x.Op != OpAMD64MOVWstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) { break } v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 2 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w, mem) return true } - // match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRLconst [j-16] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVLstore [i-2] {s} p0 w0 mem) + // match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRLconst [j-16] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x) + // result: (MOVLstore [i] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux @@ -15597,24 +15237,24 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { j := v_1.AuxInt w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + if x.Op != OpAMD64MOVWstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] w0 := x.Args[1] - if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) { break } v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 2 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w0, mem) return true } - // match: (MOVWstore [i] {s} p1 (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRQconst [j-16] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVLstore [i-2] {s} p0 w0 mem) + // match: (MOVWstore [i] {s} p1 (SHRQconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRQconst [j-16] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x) + // result: (MOVLstore [i] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux @@ -15625,17 +15265,17 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { j := v_1.AuxInt w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + if x.Op != OpAMD64MOVWstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] w0 := x.Args[1] - if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) { break } v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 2 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w0, mem) return true @@ -15774,13 +15414,13 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) + // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) for { c := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != OpAMD64MOVWstoreconst { break @@ -15790,23 +15430,22 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVLstoreconst) v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) v.Aux = s - v.AddArg2(p0, mem) + v.AddArg2(p, mem) return true } - // match: (MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) + // match: (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) for { a := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != OpAMD64MOVWstoreconst { break @@ -15816,14 +15455,13 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVLstoreconst) v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) v.Aux = s - v.AddArg2(p0, mem) + v.AddArg2(p, mem) return true } // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) @@ -17887,9 +17525,9 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v.copyOf(x) return true } - // match: (ORL x0:(MOVBload [i0] {s} p0 mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem) + // match: (ORL x0:(MOVBload [i0] {s} p mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem))) + // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -17899,7 +17537,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 { continue @@ -17913,8 +17551,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x1.Args[1] - p1 := x1.Args[0] - if mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -17922,14 +17559,50 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s + v0.AddArg2(p, mem) + return true + } + break + } + // match: (ORL x0:(MOVBload [i] {s} p0 mem) sh:(SHLLconst [8] x1:(MOVBload [i] {s} p1 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x0 := v_0 + if x0.Op != OpAMD64MOVBload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 { + continue + } + x1 := sh.Args[0] + if x1.Op != OpAMD64MOVBload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] + p1 := x1.Args[0] + if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) + v.copyOf(v0) + v0.AuxInt = i + v0.Aux = s v0.AddArg2(p0, mem) return true } break } - // match: (ORL x0:(MOVWload [i0] {s} p0 mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p1 mem))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem) + // match: (ORL x0:(MOVWload [i0] {s} p mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem))) + // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -17939,7 +17612,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 { continue @@ -17953,8 +17626,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x1.Args[1] - p1 := x1.Args[0] - if mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -17962,14 +17634,50 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s + v0.AddArg2(p, mem) + return true + } + break + } + // match: (ORL x0:(MOVWload [i] {s} p0 mem) sh:(SHLLconst [16] x1:(MOVWload [i] {s} p1 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x0 := v_0 + if x0.Op != OpAMD64MOVWload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 { + continue + } + x1 := sh.Args[0] + if x1.Op != OpAMD64MOVWload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] + p1 := x1.Args[0] + if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32) + v.copyOf(v0) + v0.AuxInt = i + v0.Aux = s v0.AddArg2(p0, mem) return true } break } - // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p0 mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p1 mem)) y)) - // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j0] (MOVWload [i0] {s} p0 mem)) y) + // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) y)) + // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j0] (MOVWload [i0] {s} p mem)) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s1 := v_0 @@ -17984,7 +17692,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] or := v_1 if or.Op != OpAMD64ORL { continue @@ -18007,12 +17715,11 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] { + if p != x0.Args[0] || mem != x0.Args[1] { continue } y := or_1 - if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -18023,6 +17730,66 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) v2.AuxInt = i0 v2.Aux = s + v2.AddArg2(p, mem) + v1.AddArg(v2) + v0.AddArg2(v1, y) + return true + } + } + break + } + // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i] {s} p1 mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i] {s} p0 mem)) y)) + // cond: j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j0] (MOVWload [i] {s} p0 mem)) y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + s1 := v_0 + if s1.Op != OpAMD64SHLLconst { + continue + } + j1 := s1.AuxInt + x1 := s1.Args[0] + if x1.Op != OpAMD64MOVBload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p1 := x1.Args[0] + or := v_1 + if or.Op != OpAMD64ORL { + continue + } + _ = or.Args[1] + or_0 := or.Args[0] + or_1 := or.Args[1] + for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 { + s0 := or_0 + if s0.Op != OpAMD64SHLLconst { + continue + } + j0 := s0.AuxInt + x0 := s0.Args[0] + if x0.Op != OpAMD64MOVBload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] { + continue + } + y := or_1 + if !(j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + continue + } + b = mergePoint(b, x0, x1, y) + v0 := b.NewValue0(x0.Pos, OpAMD64ORL, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x0.Pos, OpAMD64SHLLconst, v.Type) + v1.AuxInt = j0 + v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) + v2.AuxInt = i + v2.Aux = s v2.AddArg2(p0, mem) v1.AddArg(v2) v0.AddArg2(v1, y) @@ -18031,9 +17798,9 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { } break } - // match: (ORL x1:(MOVBload [i1] {s} p0 mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p1 mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p0 mem)) + // match: (ORL x1:(MOVBload [i1] {s} p mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem))) + // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x1 := v_0 @@ -18043,7 +17810,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 { continue @@ -18057,8 +17824,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -18068,15 +17834,54 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) v1.AuxInt = i0 v1.Aux = s + v1.AddArg2(p, mem) + v0.AddArg(v1) + return true + } + break + } + // match: (ORL x1:(MOVBload [i] {s} p1 mem) sh:(SHLLconst [8] x0:(MOVBload [i] {s} p0 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i] {s} p0 mem)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x1 := v_0 + if x1.Op != OpAMD64MOVBload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p1 := x1.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 { + continue + } + x0 := sh.Args[0] + if x0.Op != OpAMD64MOVBload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, v.Type) + v.copyOf(v0) + v0.AuxInt = 8 + v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) + v1.AuxInt = i + v1.Aux = s v1.AddArg2(p0, mem) v0.AddArg(v1) return true } break } - // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem)))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - // result: @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p0 mem)) + // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) + // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { r1 := v_0 @@ -18090,7 +17895,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 { continue @@ -18108,15 +17913,60 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) + v1.AuxInt = i0 + v1.Aux = s + v1.AddArg2(p, mem) + v0.AddArg(v1) + return true + } + break + } + // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i] {s} p0 mem)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + r1 := v_0 + if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 { + continue + } + x1 := r1.Args[0] + if x1.Op != OpAMD64MOVWload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p1 := x1.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 { + continue + } + r0 := sh.Args[0] + if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 { + continue + } + x0 := r0.Args[0] + if x0.Op != OpAMD64MOVWload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { continue } b = mergePoint(b, x0, x1) v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, v.Type) v.copyOf(v0) v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) - v1.AuxInt = i0 + v1.AuxInt = i v1.Aux = s v1.AddArg2(p0, mem) v0.AddArg(v1) @@ -18124,9 +17974,9 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { } break } - // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p0 mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p1 mem)) y)) - // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j1] (ROLWconst [8] (MOVWload [i0] {s} p0 mem))) y) + // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) y)) + // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j1] (ROLWconst [8] (MOVWload [i0] {s} p mem))) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s0 := v_0 @@ -18141,7 +17991,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] or := v_1 if or.Op != OpAMD64ORL { continue @@ -18164,12 +18014,74 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x1.Args[1] + if p != x1.Args[0] || mem != x1.Args[1] { + continue + } + y := or_1 + if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + continue + } + b = mergePoint(b, x0, x1, y) + v0 := b.NewValue0(x1.Pos, OpAMD64ORL, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x1.Pos, OpAMD64SHLLconst, v.Type) + v1.AuxInt = j1 + v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16) + v2.AuxInt = 8 + v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) + v3.AuxInt = i0 + v3.Aux = s + v3.AddArg2(p, mem) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(v1, y) + return true + } + } + break + } + // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i] {s} p0 mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i] {s} p1 mem)) y)) + // cond: j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j1] (ROLWconst [8] (MOVWload [i] {s} p0 mem))) y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + s0 := v_0 + if s0.Op != OpAMD64SHLLconst { + continue + } + j0 := s0.AuxInt + x0 := s0.Args[0] + if x0.Op != OpAMD64MOVBload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + or := v_1 + if or.Op != OpAMD64ORL { + continue + } + _ = or.Args[1] + or_0 := or.Args[0] + or_1 := or.Args[1] + for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 { + s1 := or_0 + if s1.Op != OpAMD64SHLLconst { + continue + } + j1 := s1.AuxInt + x1 := s1.Args[0] + if x1.Op != OpAMD64MOVBload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] p1 := x1.Args[0] if mem != x1.Args[1] { continue } y := or_1 - if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + if !(j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -18180,7 +18092,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16) v2.AuxInt = 8 v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) - v3.AuxInt = i0 + v3.AuxInt = i v3.Aux = s v3.AddArg2(p0, mem) v2.AddArg(v3) @@ -18804,9 +18716,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v.copyOf(x) return true } - // match: (ORQ x0:(MOVBload [i0] {s} p0 mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p1 mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem) + // match: (ORQ x0:(MOVBload [i0] {s} p mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem))) + // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -18816,7 +18728,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 { continue @@ -18830,8 +18742,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] - p1 := x1.Args[0] - if mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -18839,14 +18750,50 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s + v0.AddArg2(p, mem) + return true + } + break + } + // match: (ORQ x0:(MOVBload [i] {s} p0 mem) sh:(SHLQconst [8] x1:(MOVBload [i] {s} p1 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x0 := v_0 + if x0.Op != OpAMD64MOVBload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 { + continue + } + x1 := sh.Args[0] + if x1.Op != OpAMD64MOVBload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] + p1 := x1.Args[0] + if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) + v.copyOf(v0) + v0.AuxInt = i + v0.Aux = s v0.AddArg2(p0, mem) return true } break } - // match: (ORQ x0:(MOVWload [i0] {s} p0 mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p1 mem))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem) + // match: (ORQ x0:(MOVWload [i0] {s} p mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem))) + // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -18856,7 +18803,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 { continue @@ -18870,8 +18817,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] - p1 := x1.Args[0] - if mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -18879,14 +18825,50 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s + v0.AddArg2(p, mem) + return true + } + break + } + // match: (ORQ x0:(MOVWload [i] {s} p0 mem) sh:(SHLQconst [16] x1:(MOVWload [i] {s} p1 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x0 := v_0 + if x0.Op != OpAMD64MOVWload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 { + continue + } + x1 := sh.Args[0] + if x1.Op != OpAMD64MOVWload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] + p1 := x1.Args[0] + if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32) + v.copyOf(v0) + v0.AuxInt = i + v0.Aux = s v0.AddArg2(p0, mem) return true } break } - // match: (ORQ x0:(MOVLload [i0] {s} p0 mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p1 mem))) - // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p0 mem) + // match: (ORQ x0:(MOVLload [i0] {s} p mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem))) + // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -18896,7 +18878,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 { continue @@ -18910,23 +18892,181 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] + if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x1.Pos, OpAMD64MOVQload, typ.UInt64) + v.copyOf(v0) + v0.AuxInt = i0 + v0.Aux = s + v0.AddArg2(p, mem) + return true + } + break + } + // match: (ORQ x0:(MOVLload [i] {s} p0 mem) sh:(SHLQconst [32] x1:(MOVLload [i] {s} p1 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVQload [i] {s} p0 mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x0 := v_0 + if x0.Op != OpAMD64MOVLload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 { + continue + } + x1 := sh.Args[0] + if x1.Op != OpAMD64MOVLload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] + p1 := x1.Args[0] + if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x1.Pos, OpAMD64MOVQload, typ.UInt64) + v.copyOf(v0) + v0.AuxInt = i + v0.Aux = s + v0.AddArg2(p0, mem) + return true + } + break + } + // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) y)) + // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVWload [i0] {s} p mem)) y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + s1 := v_0 + if s1.Op != OpAMD64SHLQconst { + continue + } + j1 := s1.AuxInt + x1 := s1.Args[0] + if x1.Op != OpAMD64MOVBload { + continue + } + i1 := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p := x1.Args[0] + or := v_1 + if or.Op != OpAMD64ORQ { + continue + } + _ = or.Args[1] + or_0 := or.Args[0] + or_1 := or.Args[1] + for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 { + s0 := or_0 + if s0.Op != OpAMD64SHLQconst { + continue + } + j0 := s0.AuxInt + x0 := s0.Args[0] + if x0.Op != OpAMD64MOVBload { + continue + } + i0 := x0.AuxInt + if x0.Aux != s { + continue + } + _ = x0.Args[1] + if p != x0.Args[0] || mem != x0.Args[1] { + continue + } + y := or_1 + if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + continue + } + b = mergePoint(b, x0, x1, y) + v0 := b.NewValue0(x0.Pos, OpAMD64ORQ, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x0.Pos, OpAMD64SHLQconst, v.Type) + v1.AuxInt = j0 + v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) + v2.AuxInt = i0 + v2.Aux = s + v2.AddArg2(p, mem) + v1.AddArg(v2) + v0.AddArg2(v1, y) + return true + } + } + break + } + // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i] {s} p1 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i] {s} p0 mem)) y)) + // cond: j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVWload [i] {s} p0 mem)) y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + s1 := v_0 + if s1.Op != OpAMD64SHLQconst { + continue + } + j1 := s1.AuxInt + x1 := s1.Args[0] + if x1.Op != OpAMD64MOVBload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] p1 := x1.Args[0] - if mem != x1.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + or := v_1 + if or.Op != OpAMD64ORQ { continue } - b = mergePoint(b, x0, x1) - v0 := b.NewValue0(x1.Pos, OpAMD64MOVQload, typ.UInt64) - v.copyOf(v0) - v0.AuxInt = i0 - v0.Aux = s - v0.AddArg2(p0, mem) - return true + _ = or.Args[1] + or_0 := or.Args[0] + or_1 := or.Args[1] + for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 { + s0 := or_0 + if s0.Op != OpAMD64SHLQconst { + continue + } + j0 := s0.AuxInt + x0 := s0.Args[0] + if x0.Op != OpAMD64MOVBload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] { + continue + } + y := or_1 + if !(j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + continue + } + b = mergePoint(b, x0, x1, y) + v0 := b.NewValue0(x0.Pos, OpAMD64ORQ, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x0.Pos, OpAMD64SHLQconst, v.Type) + v1.AuxInt = j0 + v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) + v2.AuxInt = i + v2.Aux = s + v2.AddArg2(p0, mem) + v1.AddArg(v2) + v0.AddArg2(v1, y) + return true + } } break } - // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p0 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p1 mem)) y)) - // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVWload [i0] {s} p0 mem)) y) + // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y)) + // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVLload [i0] {s} p mem)) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s1 := v_0 @@ -18935,13 +19075,13 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } j1 := s1.AuxInt x1 := s1.Args[0] - if x1.Op != OpAMD64MOVBload { + if x1.Op != OpAMD64MOVWload { continue } i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] or := v_1 if or.Op != OpAMD64ORQ { continue @@ -18956,7 +19096,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } j0 := s0.AuxInt x0 := s0.Args[0] - if x0.Op != OpAMD64MOVBload { + if x0.Op != OpAMD64MOVWload { continue } i0 := x0.AuxInt @@ -18964,12 +19104,11 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] { + if p != x0.Args[0] || mem != x0.Args[1] { continue } y := or_1 - if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -18977,10 +19116,10 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v.copyOf(v0) v1 := b.NewValue0(x0.Pos, OpAMD64SHLQconst, v.Type) v1.AuxInt = j0 - v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) + v2 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) v2.AuxInt = i0 v2.Aux = s - v2.AddArg2(p0, mem) + v2.AddArg2(p, mem) v1.AddArg(v2) v0.AddArg2(v1, y) return true @@ -18988,9 +19127,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } break } - // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p0 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p1 mem)) y)) - // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVLload [i0] {s} p0 mem)) y) + // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i] {s} p1 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i] {s} p0 mem)) y)) + // cond: j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVLload [i] {s} p0 mem)) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s1 := v_0 @@ -19002,10 +19141,10 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { if x1.Op != OpAMD64MOVWload { continue } - i1 := x1.AuxInt + i := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p1 := x1.Args[0] or := v_1 if or.Op != OpAMD64ORQ { continue @@ -19020,20 +19159,16 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } j0 := s0.AuxInt x0 := s0.Args[0] - if x0.Op != OpAMD64MOVWload { - continue - } - i0 := x0.AuxInt - if x0.Aux != s { + if x0.Op != OpAMD64MOVWload || x0.AuxInt != i || x0.Aux != s { continue } _ = x0.Args[1] - p1 := x0.Args[0] + p0 := x0.Args[0] if mem != x0.Args[1] { continue } y := or_1 - if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + if !(j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -19042,7 +19177,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v1 := b.NewValue0(x0.Pos, OpAMD64SHLQconst, v.Type) v1.AuxInt = j0 v2 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) - v2.AuxInt = i0 + v2.AuxInt = i v2.Aux = s v2.AddArg2(p0, mem) v1.AddArg(v2) @@ -19052,9 +19187,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } break } - // match: (ORQ x1:(MOVBload [i1] {s} p0 mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p1 mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p0 mem)) + // match: (ORQ x1:(MOVBload [i1] {s} p mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem))) + // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x1 := v_0 @@ -19064,7 +19199,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 { continue @@ -19078,8 +19213,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -19089,15 +19223,54 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) v1.AuxInt = i0 v1.Aux = s + v1.AddArg2(p, mem) + v0.AddArg(v1) + return true + } + break + } + // match: (ORQ x1:(MOVBload [i] {s} p1 mem) sh:(SHLQconst [8] x0:(MOVBload [i] {s} p0 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i] {s} p0 mem)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x1 := v_0 + if x1.Op != OpAMD64MOVBload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p1 := x1.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 { + continue + } + x0 := sh.Args[0] + if x0.Op != OpAMD64MOVBload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, v.Type) + v.copyOf(v0) + v0.AuxInt = 8 + v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) + v1.AuxInt = i + v1.Aux = s v1.AddArg2(p0, mem) v0.AddArg(v1) return true } break } - // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem)))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - // result: @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p0 mem)) + // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) + // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { r1 := v_0 @@ -19111,7 +19284,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 { continue @@ -19129,8 +19302,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -19139,15 +19311,61 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) v1.AuxInt = i0 v1.Aux = s + v1.AddArg2(p, mem) + v0.AddArg(v1) + return true + } + break + } + // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i] {s} p0 mem)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + r1 := v_0 + if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 { + continue + } + x1 := r1.Args[0] + if x1.Op != OpAMD64MOVWload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p1 := x1.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 { + continue + } + r0 := sh.Args[0] + if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 { + continue + } + x0 := r0.Args[0] + if x0.Op != OpAMD64MOVWload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) + v1.AuxInt = i + v1.Aux = s v1.AddArg2(p0, mem) v0.AddArg(v1) return true } break } - // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p0 mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p1 mem)))) - // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - // result: @mergePoint(b,x0,x1) (BSWAPQ (MOVQload [i0] {s} p0 mem)) + // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem)))) + // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPQ (MOVQload [i0] {s} p mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { r1 := v_0 @@ -19161,7 +19379,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 { continue @@ -19179,8 +19397,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -19189,15 +19406,61 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v1 := b.NewValue0(x0.Pos, OpAMD64MOVQload, typ.UInt64) v1.AuxInt = i0 v1.Aux = s + v1.AddArg2(p, mem) + v0.AddArg(v1) + return true + } + break + } + // match: (ORQ r1:(BSWAPL x1:(MOVLload [i] {s} p1 mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i] {s} p0 mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPQ (MOVQload [i] {s} p0 mem)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + r1 := v_0 + if r1.Op != OpAMD64BSWAPL { + continue + } + x1 := r1.Args[0] + if x1.Op != OpAMD64MOVLload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p1 := x1.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 { + continue + } + r0 := sh.Args[0] + if r0.Op != OpAMD64BSWAPL { + continue + } + x0 := r0.Args[0] + if x0.Op != OpAMD64MOVLload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x0.Pos, OpAMD64MOVQload, typ.UInt64) + v1.AuxInt = i + v1.Aux = s v1.AddArg2(p0, mem) v0.AddArg(v1) return true } break } - // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p0 mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p1 mem)) y)) - // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (ROLWconst [8] (MOVWload [i0] {s} p0 mem))) y) + // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) y)) + // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (ROLWconst [8] (MOVWload [i0] {s} p mem))) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s0 := v_0 @@ -19212,7 +19475,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] or := v_1 if or.Op != OpAMD64ORQ { continue @@ -19235,12 +19498,74 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] + if p != x1.Args[0] || mem != x1.Args[1] { + continue + } + y := or_1 + if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + continue + } + b = mergePoint(b, x0, x1, y) + v0 := b.NewValue0(x1.Pos, OpAMD64ORQ, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x1.Pos, OpAMD64SHLQconst, v.Type) + v1.AuxInt = j1 + v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16) + v2.AuxInt = 8 + v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) + v3.AuxInt = i0 + v3.Aux = s + v3.AddArg2(p, mem) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(v1, y) + return true + } + } + break + } + // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i] {s} p0 mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i] {s} p1 mem)) y)) + // cond: j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (ROLWconst [8] (MOVWload [i] {s} p0 mem))) y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + s0 := v_0 + if s0.Op != OpAMD64SHLQconst { + continue + } + j0 := s0.AuxInt + x0 := s0.Args[0] + if x0.Op != OpAMD64MOVBload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + or := v_1 + if or.Op != OpAMD64ORQ { + continue + } + _ = or.Args[1] + or_0 := or.Args[0] + or_1 := or.Args[1] + for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 { + s1 := or_0 + if s1.Op != OpAMD64SHLQconst { + continue + } + j1 := s1.AuxInt + x1 := s1.Args[0] + if x1.Op != OpAMD64MOVBload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] p1 := x1.Args[0] if mem != x1.Args[1] { continue } y := or_1 - if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + if !(j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -19251,7 +19576,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16) v2.AuxInt = 8 v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) - v3.AuxInt = i0 + v3.AuxInt = i v3.Aux = s v3.AddArg2(p0, mem) v2.AddArg(v3) @@ -19262,9 +19587,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } break } - // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p0 mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p1 mem))) y)) - // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (BSWAPL (MOVLload [i0] {s} p0 mem))) y) + // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y)) + // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (BSWAPL (MOVLload [i0] {s} p mem))) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s0 := v_0 @@ -19283,7 +19608,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] or := v_1 if or.Op != OpAMD64ORQ { continue @@ -19310,12 +19635,81 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] + if p != x1.Args[0] || mem != x1.Args[1] { + continue + } + y := or_1 + if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) { + continue + } + b = mergePoint(b, x0, x1, y) + v0 := b.NewValue0(x1.Pos, OpAMD64ORQ, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x1.Pos, OpAMD64SHLQconst, v.Type) + v1.AuxInt = j1 + v2 := b.NewValue0(x1.Pos, OpAMD64BSWAPL, typ.UInt32) + v3 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32) + v3.AuxInt = i0 + v3.Aux = s + v3.AddArg2(p, mem) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(v1, y) + return true + } + } + break + } + // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem))) y)) + // cond: j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (BSWAPL (MOVLload [i] {s} p0 mem))) y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + s0 := v_0 + if s0.Op != OpAMD64SHLQconst { + continue + } + j0 := s0.AuxInt + r0 := s0.Args[0] + if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 { + continue + } + x0 := r0.Args[0] + if x0.Op != OpAMD64MOVWload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + or := v_1 + if or.Op != OpAMD64ORQ { + continue + } + _ = or.Args[1] + or_0 := or.Args[0] + or_1 := or.Args[1] + for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 { + s1 := or_0 + if s1.Op != OpAMD64SHLQconst { + continue + } + j1 := s1.AuxInt + r1 := s1.Args[0] + if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 { + continue + } + x1 := r1.Args[0] + if x1.Op != OpAMD64MOVWload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] p1 := x1.Args[0] if mem != x1.Args[1] { continue } y := or_1 - if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) { + if !(j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -19325,7 +19719,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v1.AuxInt = j1 v2 := b.NewValue0(x1.Pos, OpAMD64BSWAPL, typ.UInt32) v3 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32) - v3.AuxInt = i0 + v3.AuxInt = i v3.Aux = s v3.AddArg2(p0, mem) v2.AddArg(v3) diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index e2d703cb0c..6ad9514557 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -160,14 +160,14 @@ func load_le_byte8_uint64_inv(s []byte) uint64 { func load_be_byte2_uint16(s []byte) uint16 { // arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` - // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR` + // amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR` // ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ` return uint16(s[0])<<8 | uint16(s[1]) } func load_be_byte2_uint16_inv(s []byte) uint16 { // arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` - // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR` + // amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR` // ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ` return uint16(s[1]) | uint16(s[0])<<8 } @@ -179,7 +179,7 @@ func load_be_byte4_uint32(s []byte) uint32 { func load_be_byte4_uint32_inv(s []byte) uint32 { // arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]` - // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR` + // amd64:`MOVL\s\([A-Z]+\)`,`BSWAPL`,-`MOV[BW]`,-`OR` return uint32(s[3]) | uint32(s[2])<<8 | uint32(s[1])<<16 | uint32(s[0])<<24 } @@ -191,7 +191,7 @@ func load_be_byte8_uint64(s []byte) uint64 { func load_be_byte8_uint64_inv(s []byte) uint64 { // arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]` - // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,-`MOV[BWL]\t[^$]`,-`OR` + // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR` // ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z` return uint64(s[7]) | uint64(s[6])<<8 | uint64(s[5])<<16 | uint64(s[4])<<24 | uint64(s[3])<<32 | uint64(s[2])<<40 | uint64(s[1])<<48 | uint64(s[0])<<56 } -- cgit v1.2.3-54-g00ecf From 3ee782b11da3fb0313603ad0e3be8ab6755802a9 Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Fri, 27 Mar 2020 15:04:09 -0400 Subject: os/signal: rework test timeouts and concurrency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a uniform function (named “quiesce”) to wait for possible signals in a way that gives the kernel many opportunities to deliver them. Simplify channel usage and concurrency in stress tests. Use (*testing.T).Deadline instead of parsing the deadline in TestMain. In TestStop, sleep forever in a loop if we expect the test to die from a signal. That should reduce the flakiness of TestNohup, since TestStop will no longer spuriously pass when run as a subprocess of TestNohup. Since independent signals should not interfere, run the different signals in TestStop in parallel when testing in short mode. Since TestNohup runs TestStop as a subprocess, and TestStop needs to wait many times for signals to quiesce, run its test subprocesses concurrently and in short mode — reducing the latency of that test by more than a factor of 2. The above two changes reduce the running time of TestNohup on my workstation to ~345ms, making it possible to run much larger counts of the test in the same amount of wall time. If the test remains flaky after this CL, we can spend all or part of that latency improvement on a longer settle time. Updates #33174 Change-Id: I09206f213d8c1888b50bf974f965221a5d482419 Reviewed-on: https://go-review.googlesource.com/c/go/+/226138 Run-TryBot: Bryan C. Mills TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/os/signal/signal_test.go | 359 +++++++++++++++++++++++++------------------ 1 file changed, 212 insertions(+), 147 deletions(-) diff --git a/src/os/signal/signal_test.go b/src/os/signal/signal_test.go index a4489ffd3e..a85d7606c8 100644 --- a/src/os/signal/signal_test.go +++ b/src/os/signal/signal_test.go @@ -22,21 +22,10 @@ import ( "time" ) -var testDeadline time.Time - -func TestMain(m *testing.M) { - flag.Parse() - - // TODO(golang.org/issue/28135): Remove this setup and use t.Deadline instead. - timeoutFlag := flag.Lookup("test.timeout") - if timeoutFlag != nil { - if d := timeoutFlag.Value.(flag.Getter).Get().(time.Duration); d != 0 { - testDeadline = time.Now().Add(d) - } - } - - os.Exit(m.Run()) -} +// settleTime is an upper bound on how long we expect signals to take to be +// delivered. Lower values make the test faster, but also flakier — especially +// on heavily loaded systems. +const settleTime = 100 * time.Millisecond func waitSig(t *testing.T, c <-chan os.Signal, sig os.Signal) { waitSig1(t, c, sig, false) @@ -48,27 +37,45 @@ func waitSigAll(t *testing.T, c <-chan os.Signal, sig os.Signal) { func waitSig1(t *testing.T, c <-chan os.Signal, sig os.Signal, all bool) { // Sleep multiple times to give the kernel more tries to // deliver the signal. - for i := 0; i < 10; i++ { + start := time.Now() + timer := time.NewTimer(settleTime / 10) + defer timer.Stop() + // If the caller notified for all signals on c, filter out SIGURG, + // which is used for runtime preemption and can come at unpredictable times. + // General user code should filter out all unexpected signals instead of just + // SIGURG, but since os/signal is tightly coupled to the runtime it seems + // appropriate to be stricter here. + for time.Since(start) < settleTime { select { case s := <-c: - // If the caller notified for all signals on - // c, filter out SIGURG, which is used for - // runtime preemption and can come at - // unpredictable times. - if all && s == syscall.SIGURG { - continue + if s == sig { + return } - if s != sig { + if !all || s != syscall.SIGURG { t.Fatalf("signal was %v, want %v", s, sig) } - return - - case <-time.After(100 * time.Millisecond): + case <-timer.C: + timer.Reset(settleTime / 10) } } t.Fatalf("timeout waiting for %v", sig) } +// quiesce waits until we can be reasonably confident that all pending signals +// have been delivered by the OS. +func quiesce() { + // The kernel will deliver a signal as a thread returns + // from a syscall. If the only active thread is sleeping, + // and the system is busy, the kernel may not get around + // to waking up a thread to catch the signal. + // We try splitting up the sleep to give the kernel + // many chances to deliver the signal. + start := time.Now() + for time.Since(start) < settleTime { + time.Sleep(settleTime / 10) + } +} + // Test that basic signal handling works. func TestSignal(t *testing.T) { // Ask for SIGHUP @@ -112,49 +119,42 @@ func TestStress(t *testing.T) { dur = 100 * time.Millisecond } defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(4)) - done := make(chan bool) - finished := make(chan bool) - go func() { - sig := make(chan os.Signal, 1) - Notify(sig, syscall.SIGUSR1) - defer Stop(sig) - Loop: - for { - select { - case <-sig: - case <-done: - break Loop - } - } - finished <- true - }() + + sig := make(chan os.Signal, 1) + Notify(sig, syscall.SIGUSR1) + go func() { - Loop: + stop := time.After(dur) for { select { - case <-done: - break Loop + case <-stop: + // Allow enough time for all signals to be delivered before we stop + // listening for them. + quiesce() + Stop(sig) + // According to its documentation, “[w]hen Stop returns, it in + // guaranteed that c will receive no more signals.” So we can safely + // close sig here: if there is a send-after-close race here, that is a + // bug in Stop and we would like to detect it. + close(sig) + return + default: syscall.Kill(syscall.Getpid(), syscall.SIGUSR1) runtime.Gosched() } } - finished <- true }() - time.Sleep(dur) - close(done) - <-finished - <-finished - // When run with 'go test -cpu=1,2,4' SIGUSR1 from this test can slip - // into subsequent TestSignal() causing failure. - // Sleep for a while to reduce the possibility of the failure. - time.Sleep(10 * time.Millisecond) + + for range sig { + // Receive signals until the sender closes sig. + } } func testCancel(t *testing.T, ignore bool) { // Send SIGWINCH. By default this signal should be ignored. syscall.Kill(syscall.Getpid(), syscall.SIGWINCH) - time.Sleep(100 * time.Millisecond) + quiesce() // Ask to be notified on c1 when a SIGWINCH is received. c1 := make(chan os.Signal, 1) @@ -202,17 +202,19 @@ func testCancel(t *testing.T, ignore bool) { syscall.Kill(syscall.Getpid(), syscall.SIGHUP) } + quiesce() + select { case s := <-c1: t.Fatalf("unexpected signal %v", s) - case <-time.After(100 * time.Millisecond): + default: // nothing to read - good } select { case s := <-c2: t.Fatalf("unexpected signal %v", s) - case <-time.After(100 * time.Millisecond): + default: // nothing to read - good } @@ -289,7 +291,10 @@ func TestDetectNohup(t *testing.T) { } } -var sendUncaughtSighup = flag.Int("send_uncaught_sighup", 0, "send uncaught SIGHUP during TestStop") +var ( + sendUncaughtSighup = flag.Int("send_uncaught_sighup", 0, "send uncaught SIGHUP during TestStop") + dieFromSighup = flag.Bool("die_from_sighup", false, "wait to die from uncaught SIGHUP") +) // Test that Stop cancels the channel's registrations. func TestStop(t *testing.T) { @@ -300,54 +305,61 @@ func TestStop(t *testing.T) { } for _, sig := range sigs { - // Send the signal. - // If it's SIGWINCH, we should not see it. - // If it's SIGHUP, maybe we'll die. Let the flag tell us what to do. - if sig == syscall.SIGWINCH || (sig == syscall.SIGHUP && *sendUncaughtSighup == 1) { - syscall.Kill(syscall.Getpid(), sig) - } - - // The kernel will deliver a signal as a thread returns - // from a syscall. If the only active thread is sleeping, - // and the system is busy, the kernel may not get around - // to waking up a thread to catch the signal. - // We try splitting up the sleep to give the kernel - // another chance to deliver the signal. - time.Sleep(50 * time.Millisecond) - time.Sleep(50 * time.Millisecond) - - // Ask for signal - c := make(chan os.Signal, 1) - Notify(c, sig) - defer Stop(c) - - // Send this process that signal - syscall.Kill(syscall.Getpid(), sig) - waitSig(t, c, sig) + sig := sig + t.Run(fmt.Sprint(sig), func(t *testing.T) { + // When calling Notify with a specific signal, + // independent signals should not interfere with each other, + // and we end up needing to wait for signals to quiesce a lot. + // Test the three different signals concurrently. + t.Parallel() + + // Send the signal. + // If it's SIGWINCH or SIGUSR1 we should not see it. + // If it's SIGHUP, maybe we'll die. Let the flag tell us what to do. + switch sig { + case syscall.SIGHUP: + if *sendUncaughtSighup == 1 { + syscall.Kill(syscall.Getpid(), sig) + for *dieFromSighup { + quiesce() + } + } + default: + syscall.Kill(syscall.Getpid(), sig) + } + quiesce() - Stop(c) - time.Sleep(50 * time.Millisecond) - select { - case s := <-c: - t.Fatalf("unexpected signal %v", s) - case <-time.After(50 * time.Millisecond): - // nothing to read - good - } + // Ask for signal + c := make(chan os.Signal, 1) + Notify(c, sig) - // Send the signal. - // If it's SIGWINCH, we should not see it. - // If it's SIGHUP, maybe we'll die. Let the flag tell us what to do. - if sig != syscall.SIGHUP || *sendUncaughtSighup == 2 { + // Send this process that signal syscall.Kill(syscall.Getpid(), sig) - } + waitSig(t, c, sig) + + // Stop watching for the signal and send it again. + // If it's SIGHUP, maybe we'll die. Let the flag tell us what to do. + Stop(c) + switch sig { + case syscall.SIGHUP: + if *sendUncaughtSighup == 2 { + syscall.Kill(syscall.Getpid(), sig) + for *dieFromSighup { + quiesce() + } + } + default: + syscall.Kill(syscall.Getpid(), sig) + } - time.Sleep(50 * time.Millisecond) - select { - case s := <-c: - t.Fatalf("unexpected signal %v", s) - case <-time.After(50 * time.Millisecond): - // nothing to read - good - } + quiesce() + select { + case s := <-c: + t.Fatalf("unexpected signal %v", s) + default: + // nothing to read - good + } + }) } } @@ -371,12 +383,38 @@ func TestNohup(t *testing.T) { // // Both should fail without nohup and succeed with nohup. + var subTimeout time.Duration + + var wg sync.WaitGroup + wg.Add(2) + if deadline, ok := t.Deadline(); ok { + subTimeout = time.Until(deadline) + subTimeout -= subTimeout / 10 // Leave 10% headroom for propagating output. + } for i := 1; i <= 2; i++ { - out, err := exec.Command(os.Args[0], "-test.run=TestStop", "-send_uncaught_sighup="+strconv.Itoa(i)).CombinedOutput() - if err == nil { - t.Fatalf("ran test with -send_uncaught_sighup=%d and it succeeded: expected failure.\nOutput:\n%s", i, out) - } + i := i + go t.Run(fmt.Sprintf("uncaught-%d", i), func(t *testing.T) { + defer wg.Done() + + args := []string{ + "-test.v", + "-test.run=TestStop", + "-send_uncaught_sighup=" + strconv.Itoa(i), + "-die_from_sighup", + } + if subTimeout != 0 { + args = append(args, fmt.Sprintf("-test.timeout=%v", subTimeout)) + } + out, err := exec.Command(os.Args[0], args...).CombinedOutput() + + if err == nil { + t.Errorf("ran test with -send_uncaught_sighup=%d and it succeeded: expected failure.\nOutput:\n%s", i, out) + } else { + t.Logf("test with -send_uncaught_sighup=%d failed as expected.\nError: %v\nOutput:\n%s", i, err, out) + } + }) } + wg.Wait() Stop(c) @@ -387,21 +425,46 @@ func TestNohup(t *testing.T) { } // Again, this time with nohup, assuming we can find it. - _, err := os.Stat("/usr/bin/nohup") + _, err := exec.LookPath("nohup") if err != nil { t.Skip("cannot find nohup; skipping second half of test") } + wg.Add(2) + if deadline, ok := t.Deadline(); ok { + subTimeout = time.Until(deadline) + subTimeout -= subTimeout / 10 // Leave 10% headroom for propagating output. + } for i := 1; i <= 2; i++ { - os.Remove("nohup.out") - out, err := exec.Command("/usr/bin/nohup", os.Args[0], "-test.run=TestStop", "-send_uncaught_sighup="+strconv.Itoa(i)).CombinedOutput() + i := i + go t.Run(fmt.Sprintf("nohup-%d", i), func(t *testing.T) { + defer wg.Done() - data, _ := ioutil.ReadFile("nohup.out") - os.Remove("nohup.out") - if err != nil { - t.Fatalf("ran test with -send_uncaught_sighup=%d under nohup and it failed: expected success.\nError: %v\nOutput:\n%s%s", i, err, out, data) - } + // POSIX specifies that nohup writes to a file named nohup.out if standard + // output is a terminal. However, for an exec.Command, standard output is + // not a terminal — so we don't need to read or remove that file (and, + // indeed, cannot even create it if the current user is unable to write to + // GOROOT/src, such as when GOROOT is installed and owned by root). + + args := []string{ + os.Args[0], + "-test.v", + "-test.run=TestStop", + "-send_uncaught_sighup=" + strconv.Itoa(i), + } + if subTimeout != 0 { + args = append(args, fmt.Sprintf("-test.timeout=%v", subTimeout)) + } + out, err := exec.Command("nohup", args...).CombinedOutput() + + if err != nil { + t.Errorf("ran test with -send_uncaught_sighup=%d under nohup and it failed: expected success.\nError: %v\nOutput:\n%s", i, err, out) + } else { + t.Logf("ran test with -send_uncaught_sighup=%d under nohup.\nOutput:\n%s", i, out) + } + }) } + wg.Wait() } // Test that SIGCONT works (issue 8953). @@ -416,7 +479,7 @@ func TestSIGCONT(t *testing.T) { // Test race between stopping and receiving a signal (issue 14571). func TestAtomicStop(t *testing.T) { if os.Getenv("GO_TEST_ATOMIC_STOP") != "" { - atomicStopTestProgram() + atomicStopTestProgram(t) t.Fatal("atomicStopTestProgram returned") } @@ -438,8 +501,8 @@ func TestAtomicStop(t *testing.T) { const execs = 10 for i := 0; i < execs; i++ { timeout := "0" - if !testDeadline.IsZero() { - timeout = testDeadline.Sub(time.Now()).String() + if deadline, ok := t.Deadline(); ok { + timeout = time.Until(deadline).String() } cmd := exec.Command(os.Args[0], "-test.run=TestAtomicStop", "-test.timeout="+timeout) cmd.Env = append(os.Environ(), "GO_TEST_ATOMIC_STOP=1") @@ -478,7 +541,7 @@ func TestAtomicStop(t *testing.T) { // atomicStopTestProgram is run in a subprocess by TestAtomicStop. // It tries to trigger a signal delivery race. This function should // either catch a signal or die from it. -func atomicStopTestProgram() { +func atomicStopTestProgram(t *testing.T) { // This test won't work if SIGINT is ignored here. if Ignored(syscall.SIGINT) { fmt.Println("SIGINT is ignored") @@ -488,10 +551,10 @@ func atomicStopTestProgram() { const tries = 10 timeout := 2 * time.Second - if !testDeadline.IsZero() { + if deadline, ok := t.Deadline(); ok { // Give each try an equal slice of the deadline, with one slice to spare for // cleanup. - timeout = testDeadline.Sub(time.Now()) / (tries + 1) + timeout = time.Until(deadline) / (tries + 1) } pid := syscall.Getpid() @@ -541,43 +604,45 @@ func TestTime(t *testing.T) { dur = 100 * time.Millisecond } defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(4)) - done := make(chan bool) - finished := make(chan bool) - go func() { - sig := make(chan os.Signal, 1) - Notify(sig, syscall.SIGUSR1) - defer Stop(sig) - Loop: - for { - select { - case <-sig: - case <-done: - break Loop - } - } - finished <- true - }() + + sig := make(chan os.Signal, 1) + Notify(sig, syscall.SIGUSR1) + + stop := make(chan struct{}) go func() { - Loop: for { select { - case <-done: - break Loop + case <-stop: + // Allow enough time for all signals to be delivered before we stop + // listening for them. + quiesce() + Stop(sig) + // According to its documentation, “[w]hen Stop returns, it in + // guaranteed that c will receive no more signals.” So we can safely + // close sig here: if there is a send-after-close race, that is a bug in + // Stop and we would like to detect it. + close(sig) + return + default: syscall.Kill(syscall.Getpid(), syscall.SIGUSR1) runtime.Gosched() } } - finished <- true }() + + done := make(chan struct{}) + go func() { + for range sig { + // Receive signals until the sender closes sig. + } + close(done) + }() + t0 := time.Now() for t1 := t0; t1.Sub(t0) < dur; t1 = time.Now() { } // hammering on getting time - close(done) - <-finished - <-finished - // When run with 'go test -cpu=1,2,4' SIGUSR1 from this test can slip - // into subsequent TestSignal() causing failure. - // Sleep for a while to reduce the possibility of the failure. - time.Sleep(10 * time.Millisecond) + + close(stop) + <-done } -- cgit v1.2.3-54-g00ecf From a265c2c448497fcee1633d2e2b912da52ea22d3c Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 30 Mar 2020 00:38:09 +0200 Subject: cmd/cgo, misc/cgo: only cache anonymous struct typedefs with parent name CL 181857 broke the translation of certain C types using cmd/cgo -godefs because it stores each typedef, array and qualified type with their parent type name in the translation cache. Fix this by only considering the parent type for typedefs of anonymous structs which is the only case where types might become ambiguous. Updates #31891 Fixes #37479 Fixes #37621 Change-Id: I301a749ec89585789cb0d213593bb8b7341beb88 Reviewed-on: https://go-review.googlesource.com/c/go/+/226341 Run-TryBot: Tobias Klauser TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- misc/cgo/testgodefs/testdata/issue37479.go | 33 ++++++++++++++++++++++++++++++ misc/cgo/testgodefs/testdata/issue37621.go | 23 +++++++++++++++++++++ misc/cgo/testgodefs/testdata/main.go | 8 ++++++++ misc/cgo/testgodefs/testgodefs_test.go | 2 ++ src/cmd/cgo/gcc.go | 19 ++++++++++++++--- 5 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 misc/cgo/testgodefs/testdata/issue37479.go create mode 100644 misc/cgo/testgodefs/testdata/issue37621.go diff --git a/misc/cgo/testgodefs/testdata/issue37479.go b/misc/cgo/testgodefs/testdata/issue37479.go new file mode 100644 index 0000000000..a210eb5bc5 --- /dev/null +++ b/misc/cgo/testgodefs/testdata/issue37479.go @@ -0,0 +1,33 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// +build ignore + +package main + +/* +typedef struct A A; + +typedef struct { + struct A *next; + struct A **prev; +} N; + +struct A +{ + N n; +}; + +typedef struct B +{ + A* a; +} B; +*/ +import "C" + +type N C.N + +type A C.A + +type B C.B diff --git a/misc/cgo/testgodefs/testdata/issue37621.go b/misc/cgo/testgodefs/testdata/issue37621.go new file mode 100644 index 0000000000..d5ace3f6d6 --- /dev/null +++ b/misc/cgo/testgodefs/testdata/issue37621.go @@ -0,0 +1,23 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// +build ignore + +package main + +/* +struct tt { + long long a; + long long b; +}; + +struct s { + struct tt ts[3]; +}; +*/ +import "C" + +type TT C.struct_tt + +type S C.struct_s diff --git a/misc/cgo/testgodefs/testdata/main.go b/misc/cgo/testgodefs/testdata/main.go index 1ce0fd0d1e..ef45b95e65 100644 --- a/misc/cgo/testgodefs/testdata/main.go +++ b/misc/cgo/testgodefs/testdata/main.go @@ -11,5 +11,13 @@ var v2 = v1.L // Test that P, Q, and R all point to byte. var v3 = Issue8478{P: (*byte)(nil), Q: (**byte)(nil), R: (***byte)(nil)} +// Test that N, A and B are fully defined +var v4 = N{} +var v5 = A{} +var v6 = B{} + +// Test that S is fully defined +var v7 = S{} + func main() { } diff --git a/misc/cgo/testgodefs/testgodefs_test.go b/misc/cgo/testgodefs/testgodefs_test.go index c02c3ff0ac..438d23d65c 100644 --- a/misc/cgo/testgodefs/testgodefs_test.go +++ b/misc/cgo/testgodefs/testgodefs_test.go @@ -21,6 +21,8 @@ var filePrefixes = []string{ "anonunion", "issue8478", "fieldtypedef", + "issue37479", + "issue37621", } func TestGoDefs(t *testing.T) { diff --git a/src/cmd/cgo/gcc.go b/src/cmd/cgo/gcc.go index fcab494ea0..c4128e9502 100644 --- a/src/cmd/cgo/gcc.go +++ b/src/cmd/cgo/gcc.go @@ -2243,7 +2243,7 @@ func (c *typeConv) loadType(dtype dwarf.Type, pos token.Pos, parent string) *Typ // Translate to zero-length array instead. count = 0 } - sub := c.loadType(dt.Type, pos, key) + sub := c.Type(dt.Type, pos) t.Align = sub.Align t.Go = &ast.ArrayType{ Len: c.intExpr(count), @@ -2388,7 +2388,7 @@ func (c *typeConv) loadType(dtype dwarf.Type, pos token.Pos, parent string) *Typ c.ptrs[key] = append(c.ptrs[key], t) case *dwarf.QualType: - t1 := c.loadType(dt.Type, pos, key) + t1 := c.Type(dt.Type, pos) t.Size = t1.Size t.Align = t1.Align t.Go = t1.Go @@ -2472,7 +2472,13 @@ func (c *typeConv) loadType(dtype dwarf.Type, pos token.Pos, parent string) *Typ } name := c.Ident("_Ctype_" + dt.Name) goIdent[name.Name] = name - sub := c.loadType(dt.Type, pos, key) + akey := "" + if c.anonymousStructTypedef(dt) { + // only load type recursively for typedefs of anonymous + // structs, see issues 37479 and 37621. + akey = key + } + sub := c.loadType(dt.Type, pos, akey) if c.badPointerTypedef(dt) { // Treat this typedef as a uintptr. s := *sub @@ -2993,6 +2999,13 @@ func fieldPrefix(fld []*ast.Field) string { return prefix } +// anonymousStructTypedef reports whether dt is a C typedef for an anonymous +// struct. +func (c *typeConv) anonymousStructTypedef(dt *dwarf.TypedefType) bool { + st, ok := dt.Type.(*dwarf.StructType) + return ok && st.StructName == "" +} + // badPointerTypedef reports whether t is a C typedef that should not be considered a pointer in Go. // A typedef is bad if C code sometimes stores non-pointers in this type. // TODO: Currently our best solution is to find these manually and list them as -- cgit v1.2.3-54-g00ecf From 14ad23d1f599199f89ba32cc4bc20049277ce9c1 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Sun, 29 Mar 2020 14:21:12 +0200 Subject: cmd/compile: avoid zero extensions after 32-bit shifts zeroUpper32Bits wasn't checking for shift-extension ops. This would not check shifts that were marking as bounded by prove (normally, shifts are wrapped in a sequence that ends with an ANDL, and zeroUpper32Bits would see the ANDL). This produces no changes on generated output right now, but will be important once CL196679 lands because many shifts will be marked as bounded, and lower will stop generating the masking code sequence around them. Change-Id: Iaea94acc5b60bb9a5021c9fb7e4a1e2e5244435e Reviewed-on: https://go-review.googlesource.com/c/go/+/226338 Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/rewrite.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 878b15eeee..51dba5eb71 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -990,7 +990,9 @@ func zeroUpper32Bits(x *Value, depth int) bool { OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL, OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst, OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst, - OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL: + OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL, + OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst, + OpAMD64SHLL, OpAMD64SHLLconst: return true case OpArg: return x.Type.Width == 4 -- cgit v1.2.3-54-g00ecf From 16237b22a85b2b20bb7915d3c64a7a2e071623ea Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Mon, 30 Mar 2020 15:11:52 -0400 Subject: cmd/objdump: test disassembly on ARM and ARM64 We support disassembly on both ARM and ARM64. Tests are only enabled on one or the other. This CL enables both. Change-Id: If89d78b975c241c2b14f72b714dcdc771b4b382c Reviewed-on: https://go-review.googlesource.com/c/go/+/226459 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: David Chase --- src/cmd/objdump/objdump_test.go | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/cmd/objdump/objdump_test.go b/src/cmd/objdump/objdump_test.go index 5030ec65d2..c974d6707b 100644 --- a/src/cmd/objdump/objdump_test.go +++ b/src/cmd/objdump/objdump_test.go @@ -58,7 +58,7 @@ func buildObjdump() error { return nil } -var x86Need = []string{ +var x86Need = []string{ // for both 386 and AMD64 "JMP main.main(SB)", "CALL main.Println(SB)", "RET", @@ -82,7 +82,13 @@ var armNeed = []string{ "RET", } -var arm64GnuNeed = []string{ +var arm64Need = []string{ + "JMP main.main(SB)", + "CALL main.Println(SB)", + "RET", +} + +var armGnuNeed = []string{ // for both ARM and AMR64 "ldr", "bl", "cmp", @@ -153,6 +159,8 @@ func testDisasm(t *testing.T, printCode bool, printGnuAsm bool, flags ...string) need = append(need, x86Need...) case "arm": need = append(need, armNeed...) + case "arm64": + need = append(need, arm64Need...) case "ppc64", "ppc64le": need = append(need, ppcNeed...) } @@ -163,8 +171,8 @@ func testDisasm(t *testing.T, printCode bool, printGnuAsm bool, flags ...string) need = append(need, amd64GnuNeed...) case "386": need = append(need, i386GnuNeed...) - case "arm64": - need = append(need, arm64GnuNeed...) + case "arm", "arm64": + need = append(need, armGnuNeed...) case "ppc64", "ppc64le": need = append(need, ppcGnuNeed...) } @@ -234,8 +242,6 @@ func TestDisasmGnuAsm(t *testing.T) { switch runtime.GOARCH { case "mips", "mipsle", "mips64", "mips64le", "riscv64", "s390x": t.Skipf("skipping on %s, issue 19160", runtime.GOARCH) - case "arm": - t.Skipf("skipping gnuAsm test on %s", runtime.GOARCH) } testDisasm(t, false, true) } -- cgit v1.2.3-54-g00ecf From fde6868ac3f3f049247084f2c76efec3555a2395 Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Mon, 30 Mar 2020 15:01:33 -0400 Subject: os/signal: in TestStop, skip the final "unexpected signal" check for SIGUSR1 on Android In CL 226138, I updated TestStop to have more uniform behavior for its signals. However, that test seems to always fail for SIGUSR1 on the Android ARM builders. I'm not sure what's special about Android for this particular case, but let's skip the test to unbreak the builders while I investigate. For #38165 Updates #33174 Change-Id: I35a70346cd9757a92acd505a020bf95e6871405c Reviewed-on: https://go-review.googlesource.com/c/go/+/226458 Run-TryBot: Bryan C. Mills TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/os/signal/signal_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/os/signal/signal_test.go b/src/os/signal/signal_test.go index a85d7606c8..bec5c1599e 100644 --- a/src/os/signal/signal_test.go +++ b/src/os/signal/signal_test.go @@ -355,6 +355,9 @@ func TestStop(t *testing.T) { quiesce() select { case s := <-c: + if sig == syscall.SIGUSR1 && s == syscall.SIGUSR1 && runtime.GOOS == "android" { + testenv.SkipFlaky(t, 38165) + } t.Fatalf("unexpected signal %v", s) default: // nothing to read - good -- cgit v1.2.3-54-g00ecf From 71d477469c5529b56779cdb3bc235d0a87fe9877 Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Thu, 19 Dec 2019 08:15:06 +0000 Subject: cmd/asm: align an instruction or a function's address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recently, the gVisor project needs an instruction's address with 128 bytes alignment and a function's start address with 2K bytes alignment to fit the architecture requirement for interrupt table. This patch allows aligning the address of an instruction to be aligned to a specific value (2^n and not higher than 2048) and the address of a function to be 2048 bytes. The main changes include: 1. Adds ALIGN2048 flag to align a function's address with 2048 bytes. e.g. "TEXT ·Add(SB),NOSPLIT|ALIGN2048" indicates that the address of Add function should be aligned to 2048 bytes. 2. Adds a new element in the FuncInfo structure defined in cmd/internal/obj/link.go file to record the alignment information. 3. Adds a new element in the Func structure defined in cmd/internal/goobj/read.go file to read the alignment information. 4. Because go introduces a new object file format, also add a new element in the FuncInfo structure defined in cmd/internal/goobj2/funcinfo.go to record the alignment information. 5. Adds the assembler support to align an intruction's offset with a specific value (2^n and not higher than 2048). e.g. "PCALIGN $256" indicates that the next instruction should be aligned to 256 bytes. This CL also adds a test. Change-Id: I31cfa6fb5bc35dee2c44bf65913e90cddfcb492a Reviewed-on: https://go-review.googlesource.com/c/go/+/212767 Reviewed-by: Keith Randall Run-TryBot: Keith Randall TryBot-Result: Gobot Gobot --- src/cmd/internal/goobj/read.go | 2 + src/cmd/internal/goobj/readnew.go | 1 + src/cmd/internal/goobj2/funcinfo.go | 3 + src/cmd/internal/obj/arm64/asm7.go | 33 ++++------ src/cmd/internal/obj/arm64/asm_test.go | 27 ++++++++- src/cmd/internal/obj/link.go | 1 + src/cmd/internal/obj/objfile.go | 1 + src/cmd/internal/obj/objfile2.go | 1 + src/cmd/internal/obj/plist.go | 7 +++ src/cmd/internal/obj/textflag.go | 5 ++ src/cmd/link/internal/ld/data.go | 4 ++ src/cmd/link/internal/loader/loader.go | 1 + src/cmd/link/internal/objfile/objfile.go | 1 + src/cmd/link/link_test.go | 101 +++++++++++++++++++++++++++++++ src/runtime/textflag.h | 5 ++ 15 files changed, 168 insertions(+), 25 deletions(-) diff --git a/src/cmd/internal/goobj/read.go b/src/cmd/internal/goobj/read.go index e61e95dcc8..48537d2b1c 100644 --- a/src/cmd/internal/goobj/read.go +++ b/src/cmd/internal/goobj/read.go @@ -95,6 +95,7 @@ type Var struct { type Func struct { Args int64 // size in bytes of argument frame: inputs and outputs Frame int64 // size in bytes of local variable frame + Align uint32 // alignment requirement in bytes for the address of the function Leaf bool // function omits save of link register (ARM) NoSplit bool // function omits stack split prologue TopFrame bool // function is the top of the call stack @@ -590,6 +591,7 @@ func (r *objReader) parseObject(prefix []byte) error { s.Func = f f.Args = r.readInt() f.Frame = r.readInt() + f.Align = uint32(r.readInt()) flags := r.readInt() f.Leaf = flags&(1<<0) != 0 f.TopFrame = flags&(1<<4) != 0 diff --git a/src/cmd/internal/goobj/readnew.go b/src/cmd/internal/goobj/readnew.go index 3f9d0d1db6..1acf18a594 100644 --- a/src/cmd/internal/goobj/readnew.go +++ b/src/cmd/internal/goobj/readnew.go @@ -149,6 +149,7 @@ func (r *objReader) readNew() { f := &Func{ Args: int64(info.Args), Frame: int64(info.Locals), + Align: info.Align, NoSplit: info.NoSplit != 0, Leaf: osym.Leaf(), TopFrame: osym.TopFrame(), diff --git a/src/cmd/internal/goobj2/funcinfo.go b/src/cmd/internal/goobj2/funcinfo.go index 8620931970..946415b246 100644 --- a/src/cmd/internal/goobj2/funcinfo.go +++ b/src/cmd/internal/goobj2/funcinfo.go @@ -18,6 +18,7 @@ type FuncInfo struct { Args uint32 Locals uint32 + Align uint32 Pcsp uint32 Pcfile uint32 @@ -42,6 +43,7 @@ func (a *FuncInfo) Write(w *bytes.Buffer) { writeUint32(a.Args) writeUint32(a.Locals) + writeUint32(a.Align) writeUint32(a.Pcsp) writeUint32(a.Pcfile) @@ -79,6 +81,7 @@ func (a *FuncInfo) Read(b []byte) { a.Args = readUint32() a.Locals = readUint32() + a.Align = readUint32() a.Pcsp = readUint32() a.Pcfile = readUint32() diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index e8b092a2a8..dbe816e735 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -886,25 +886,10 @@ const OP_NOOP = 0xd503201f // align code to a certain length by padding bytes. func pcAlignPadLength(pc int64, alignedValue int64, ctxt *obj.Link) int { - switch alignedValue { - case 8: - if pc%8 == 4 { - return 4 - } - case 16: - switch pc % 16 { - case 4: - return 12 - case 8: - return 8 - case 12: - return 4 - } - default: - ctxt.Diag("Unexpected alignment: %d for PCALIGN directive\n", alignedValue) + if !((alignedValue&(alignedValue-1) == 0) && 8 <= alignedValue && alignedValue <= 2048) { + ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", alignedValue) } - - return 0 + return int(-pc & (alignedValue - 1)) } func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { @@ -940,8 +925,12 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if m == 0 { switch p.As { case obj.APCALIGN: - a := p.From.Offset - m = pcAlignPadLength(pc, a, ctxt) + alignedValue := p.From.Offset + m = pcAlignPadLength(pc, alignedValue, ctxt) + // Update the current text symbol ailgnment value. + if int32(alignedValue) > cursym.Func.Align { + cursym.Func.Align = int32(alignedValue) + } break case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: continue @@ -1017,8 +1006,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if m == 0 { switch p.As { case obj.APCALIGN: - a := p.From.Offset - m = pcAlignPadLength(pc, a, ctxt) + alignedValue := p.From.Offset + m = pcAlignPadLength(pc, alignedValue, ctxt) break case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: continue diff --git a/src/cmd/internal/obj/arm64/asm_test.go b/src/cmd/internal/obj/arm64/asm_test.go index 1691828739..9efdb0217f 100644 --- a/src/cmd/internal/obj/arm64/asm_test.go +++ b/src/cmd/internal/obj/arm64/asm_test.go @@ -18,7 +18,9 @@ import ( // TestLarge generates a very large file to verify that large // program builds successfully, in particular, too-far -// conditional branches are fixed. +// conditional branches are fixed, and also verify that the +// instruction's pc can be correctly aligned even when branches +// need to be fixed. func TestLarge(t *testing.T) { if testing.Short() { t.Skip("Skip in short mode") @@ -41,10 +43,27 @@ func TestLarge(t *testing.T) { t.Fatalf("can't write output: %v\n", err) } - // build generated file - cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) + pattern := `0x0080\s00128\s\(.*\)\tMOVD\t\$3,\sR3` + + // assemble generated file + cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-S", "-o", filepath.Join(dir, "test.o"), tmpfile) cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux") out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("Assemble failed: %v, output: %s", err, out) + } + matched, err := regexp.MatchString(pattern, string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("The alignment is not correct: %t, output:%s\n", matched, out) + } + + // build generated file + cmd = exec.Command(testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) + cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux") + out, err = cmd.CombinedOutput() if err != nil { t.Errorf("Build failed: %v, output: %s", err, out) } @@ -56,6 +75,8 @@ func gen(buf *bytes.Buffer) { fmt.Fprintln(buf, "TBZ $5, R0, label") fmt.Fprintln(buf, "CBZ R0, label") fmt.Fprintln(buf, "BEQ label") + fmt.Fprintln(buf, "PCALIGN $128") + fmt.Fprintln(buf, "MOVD $3, R3") for i := 0; i < 1<<19; i++ { fmt.Fprintln(buf, "MOVD R0, R1") } diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index d1cc536a8c..0879c611ba 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -398,6 +398,7 @@ type LSym struct { type FuncInfo struct { Args int32 Locals int32 + Align int32 Text *Prog Autot map[*LSym]struct{} Pcln Pcln diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index 7fd97f7363..46e8a551ad 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -346,6 +346,7 @@ func (w *objWriter) writeSym(s *LSym) { w.writeInt(int64(s.Func.Args)) w.writeInt(int64(s.Func.Locals)) + w.writeInt(int64(s.Func.Align)) w.writeBool(s.NoSplit()) flags = int64(0) if s.Leaf() { diff --git a/src/cmd/internal/obj/objfile2.go b/src/cmd/internal/obj/objfile2.go index 69019e033d..626df56bd4 100644 --- a/src/cmd/internal/obj/objfile2.go +++ b/src/cmd/internal/obj/objfile2.go @@ -374,6 +374,7 @@ func genFuncInfoSyms(ctxt *Link) { NoSplit: nosplit, Args: uint32(s.Func.Args), Locals: uint32(s.Func.Locals), + Align: uint32(s.Func.Align), } pc := &s.Func.Pcln o.Pcsp = pcdataoff diff --git a/src/cmd/internal/obj/plist.go b/src/cmd/internal/obj/plist.go index 7579dd0390..be19221a13 100644 --- a/src/cmd/internal/obj/plist.go +++ b/src/cmd/internal/obj/plist.go @@ -133,6 +133,13 @@ func (ctxt *Link) InitTextSym(s *LSym, flag int) { s.Set(AttrNeedCtxt, flag&NEEDCTXT != 0) s.Set(AttrNoFrame, flag&NOFRAME != 0) s.Set(AttrTopFrame, flag&TOPFRAME != 0) + if flag&ALIGN2048 != 0 { + if objabi.GOARCH != "arm64" { + ctxt.Diag("ALIGN2048 flag only works on ARM64 at present.") + } + s.Func.Align = 2048 + } + s.Type = objabi.STEXT ctxt.Text = append(ctxt.Text, s) diff --git a/src/cmd/internal/obj/textflag.go b/src/cmd/internal/obj/textflag.go index d2cec734b1..3681a3b67b 100644 --- a/src/cmd/internal/obj/textflag.go +++ b/src/cmd/internal/obj/textflag.go @@ -51,4 +51,9 @@ const ( // Function is the top of the call stack. Call stack unwinders should stop // at this function. TOPFRAME = 2048 + + // ALIGN2048 means that the address of the function must be aligned to a + // 2048 bytes boundary. + // Only works on arm64 at present. + ALIGN2048 = 4096 ) diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go index 7ca01c8c25..31613e5cef 100644 --- a/src/cmd/link/internal/ld/data.go +++ b/src/cmd/link/internal/ld/data.go @@ -2119,6 +2119,10 @@ func assignAddress(ctxt *Link, sect *sym.Section, n int, s *sym.Symbol, va uint6 funcsize = uint64(s.Size) } + if sect.Align < s.Align { + sect.Align = s.Align + } + // On ppc64x a text section should not be larger than 2^26 bytes due to the size of // call target offset field in the bl instruction. Splitting into smaller text // sections smaller than this limit allows the GNU linker to modify the long calls diff --git a/src/cmd/link/internal/loader/loader.go b/src/cmd/link/internal/loader/loader.go index 0adc395fef..bd9c6b4fe9 100644 --- a/src/cmd/link/internal/loader/loader.go +++ b/src/cmd/link/internal/loader/loader.go @@ -1227,6 +1227,7 @@ func loadObjFull(l *Loader, r *oReader) { info.Pcdata = append(info.Pcdata, info.PcdataEnd) // for the ease of knowing where it ends pc.Args = int32(info.Args) pc.Locals = int32(info.Locals) + s.Align = int32(info.Align) npc := len(info.Pcdata) - 1 // -1 as we appended one above pc.Pcdata = pcDataBatch[:npc:npc] diff --git a/src/cmd/link/internal/objfile/objfile.go b/src/cmd/link/internal/objfile/objfile.go index a15d3c3e07..295acb2d29 100644 --- a/src/cmd/link/internal/objfile/objfile.go +++ b/src/cmd/link/internal/objfile/objfile.go @@ -312,6 +312,7 @@ overwrite: pc.Args = r.readInt32() pc.Locals = r.readInt32() + s.Align = r.readInt32() if r.readUint8() != 0 { s.Attr |= sym.AttrNoSplit } diff --git a/src/cmd/link/link_test.go b/src/cmd/link/link_test.go index 4f792bd1f1..025e882106 100644 --- a/src/cmd/link/link_test.go +++ b/src/cmd/link/link_test.go @@ -3,6 +3,7 @@ package main import ( "bufio" "bytes" + "cmd/internal/objabi" "debug/macho" "internal/testenv" "io/ioutil" @@ -447,3 +448,103 @@ func TestStrictDup(t *testing.T) { t.Errorf("unexpected output:\n%s", out) } } + +const testFuncAlignSrc = ` +package main +import ( + "fmt" + "reflect" +) +func alignFunc() +func alignPc() + +func main() { + addr1 := reflect.ValueOf(alignFunc).Pointer() + addr2 := reflect.ValueOf(alignPc).Pointer() + switch { + case (addr1 % 2048) != 0 && (addr2 % 512) != 0: + fmt.Printf("expected 2048 bytes alignment, got %v; expected 512 bytes alignment, got %v\n", addr1, addr2) + case (addr2 % 512) != 0: + fmt.Printf("expected 512 bytes alignment, got %v\n", addr2) + case (addr1 % 2048) != 0: + fmt.Printf("expected 2048 bytes alignment, got %v\n", addr1) + default: + fmt.Printf("PASS") + } +} +` + +const testFuncAlignAsmSrc = ` +#include "textflag.h" +TEXT ·alignFunc(SB),NOSPLIT|ALIGN2048, $0-0 + MOVD $1, R0 + MOVD $2, R1 + RET + +TEXT ·alignPc(SB),NOSPLIT, $0-0 + MOVD $2, R0 + PCALIGN $512 + MOVD $3, R1 + RET +` + +// TestFuncAlign verifies that the address of a function can be aligned +// with a specfic value on arm64. +func TestFuncAlign(t *testing.T) { + if objabi.GOARCH != "arm64" { + t.Skipf("Skipping FuncAlign test on %s", objabi.GOARCH) + } + testenv.MustHaveGoBuild(t) + + tmpdir, err := ioutil.TempDir("", "TestFuncAlign") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpdir) + + src := filepath.Join(tmpdir, "falign.go") + err = ioutil.WriteFile(src, []byte(testFuncAlignSrc), 0666) + if err != nil { + t.Fatal(err) + } + src = filepath.Join(tmpdir, "falign.s") + err = ioutil.WriteFile(src, []byte(testFuncAlignAsmSrc), 0666) + if err != nil { + t.Fatal(err) + } + + // Build and run with old object file format. + cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "falign") + cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux") + cmd.Dir = tmpdir + out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("build failed: %v", err) + } + cmd = exec.Command(tmpdir + "/falign") + out, err = cmd.CombinedOutput() + if err != nil { + t.Errorf("failed to run with err %v, output: %s", err, out) + } + if string(out) != "PASS" { + t.Errorf("unexpected output: %s\n", out) + } + + // Build and run with new object file format. + cmd = exec.Command(testenv.GoToolPath(t), "build", "-o", "falign", "-gcflags=all=-newobj", "-asmflags=all=-newobj", "-ldflags=-newobj") + cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux") + cmd.Dir = tmpdir + out, err = cmd.CombinedOutput() + if err != nil { + t.Errorf("build with newobj failed: %v", err) + } + cmd = exec.Command(tmpdir + "/falign") + out, err = cmd.CombinedOutput() + if err != nil { + t.Errorf("failed to run with -newobj, err: %v, output: %s", err, out) + } + if string(out) != "PASS" { + t.Errorf("unexpected output with -newobj: %s\n", out) + } + +} diff --git a/src/runtime/textflag.h b/src/runtime/textflag.h index daca36d948..bbbef6357a 100644 --- a/src/runtime/textflag.h +++ b/src/runtime/textflag.h @@ -35,3 +35,8 @@ // Function is the top of the call stack. Call stack unwinders should stop // at this function. #define TOPFRAME 2048 +// ALIGN2048 means that the address of the function must be aligned to a +// 2048 bytes boundary. +// Only works on arm64 at present. +#define ALIGN2048 4096 + -- cgit v1.2.3-54-g00ecf From d98023ebb5c2db9a445699b690f2cf6fd77f4b7e Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Mon, 30 Mar 2020 18:25:47 -0700 Subject: runtime, internal/poll: name error codes Use explicit names for the error code returned by pollReset and pollWait, rather than just 0, 1, 2, 3. Change-Id: I0ab12cae57693deab7cca9cdd2fadd597e23a956 Reviewed-on: https://go-review.googlesource.com/c/go/+/226537 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Emmanuel Odeke --- src/internal/poll/fd_poll_runtime.go | 17 +++++++++---- src/runtime/netpoll.go | 46 ++++++++++++++++++++++++------------ 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/src/internal/poll/fd_poll_runtime.go b/src/internal/poll/fd_poll_runtime.go index d32f4a0ddd..fd73166ac3 100644 --- a/src/internal/poll/fd_poll_runtime.go +++ b/src/internal/poll/fd_poll_runtime.go @@ -107,15 +107,24 @@ func (pd *pollDesc) pollable() bool { return pd.runtimeCtx != 0 } +// Error values returned by runtime_pollReset and runtime_pollWait. +// These must match the values in runtime/netpoll.go. +const ( + pollNoError = 0 + pollErrClosing = 1 + pollErrTimeout = 2 + pollErrNotPollable = 3 +) + func convertErr(res int, isFile bool) error { switch res { - case 0: + case pollNoError: return nil - case 1: + case pollErrClosing: return errClosing(isFile) - case 2: + case pollErrTimeout: return ErrTimeout - case 3: + case pollErrNotPollable: return ErrNotPollable } println("unreachable: ", res) diff --git a/src/runtime/netpoll.go b/src/runtime/netpoll.go index 918c361c2e..a332045342 100644 --- a/src/runtime/netpoll.go +++ b/src/runtime/netpoll.go @@ -33,6 +33,15 @@ import ( // func netpollIsPollDescriptor(fd uintptr) bool // Reports whether fd is a file descriptor used by the poller. +// Error codes returned by runtime_pollReset and runtime_pollWait. +// These must match the values in internal/poll/fd_poll_runtime.go. +const ( + pollNoError = 0 // no error + pollErrClosing = 1 // descriptor is closed + pollErrTimeout = 2 // I/O timeout + pollErrNotPollable = 3 // general error polling descriptor +) + // pollDesc contains 2 binary semaphores, rg and wg, to park reader and writer // goroutines respectively. The semaphore can be in the following states: // pdReady - io readiness notification is pending; @@ -176,40 +185,47 @@ func (c *pollCache) free(pd *pollDesc) { unlock(&c.lock) } +// poll_runtime_pollReset, which is internal/poll.runtime_pollReset, +// prepares a descriptor for polling in mode, which is 'r' or 'w'. +// This returns an error code; the codes are defined above. //go:linkname poll_runtime_pollReset internal/poll.runtime_pollReset func poll_runtime_pollReset(pd *pollDesc, mode int) int { - err := netpollcheckerr(pd, int32(mode)) - if err != 0 { - return err + errcode := netpollcheckerr(pd, int32(mode)) + if errcode != pollNoError { + return errcode } if mode == 'r' { pd.rg = 0 } else if mode == 'w' { pd.wg = 0 } - return 0 + return pollNoError } +// poll_runtime_pollWait, which is internal/poll.runtime_pollWait, +// waits for a descriptor to be ready for reading or writing, +// according to mode, which is 'r' or 'w'. +// This returns an error code; the codes are defined above. //go:linkname poll_runtime_pollWait internal/poll.runtime_pollWait func poll_runtime_pollWait(pd *pollDesc, mode int) int { - err := netpollcheckerr(pd, int32(mode)) - if err != 0 { - return err + errcode := netpollcheckerr(pd, int32(mode)) + if errcode != pollNoError { + return errcode } // As for now only Solaris, illumos, and AIX use level-triggered IO. if GOOS == "solaris" || GOOS == "illumos" || GOOS == "aix" { netpollarm(pd, mode) } for !netpollblock(pd, int32(mode), false) { - err = netpollcheckerr(pd, int32(mode)) - if err != 0 { - return err + errcode = netpollcheckerr(pd, int32(mode)) + if errcode != pollNoError { + return errcode } // Can happen if timeout has fired and unblocked us, // but before we had a chance to run, timeout has been reset. // Pretend it has not happened and retry. } - return 0 + return pollNoError } //go:linkname poll_runtime_pollWaitCanceled internal/poll.runtime_pollWaitCanceled @@ -359,18 +375,18 @@ func netpollready(toRun *gList, pd *pollDesc, mode int32) { func netpollcheckerr(pd *pollDesc, mode int32) int { if pd.closing { - return 1 // ErrFileClosing or ErrNetClosing + return pollErrClosing } if (mode == 'r' && pd.rd < 0) || (mode == 'w' && pd.wd < 0) { - return 2 // ErrTimeout + return pollErrTimeout } // Report an event scanning error only on a read event. // An error on a write event will be captured in a subsequent // write call that is able to report a more specific error. if mode == 'r' && pd.everr { - return 3 // ErrNotPollable + return pollErrNotPollable } - return 0 + return pollNoError } func netpollblockcommit(gp *g, gpp unsafe.Pointer) bool { -- cgit v1.2.3-54-g00ecf From a3d8c210ad7d6dea9996200fc1596c310b9775b5 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Tue, 31 Mar 2020 01:57:52 +1100 Subject: cmd/asm,cmd/internal/obj/riscv: provide branch pseudo-instructions Implement various branch pseudo-instructions for riscv64. These make it easier to read/write assembly and will also make it easier for the compiler to generate optimised code. Change-Id: Ic31a7748c0e1495522ebecf34b440842b8d12c04 Reviewed-on: https://go-review.googlesource.com/c/go/+/226397 Run-TryBot: Cherry Zhang Reviewed-by: Cherry Zhang TryBot-Result: Gobot Gobot --- src/cmd/asm/internal/arch/arch.go | 3 +- src/cmd/asm/internal/asm/testdata/riscvenc.s | 13 +++ src/cmd/internal/obj/riscv/anames.go | 10 ++ src/cmd/internal/obj/riscv/asm_test.go | 18 ++++ src/cmd/internal/obj/riscv/cpu.go | 10 ++ src/cmd/internal/obj/riscv/obj.go | 64 ++++++++++-- .../obj/riscv/testdata/testbranch/branch_test.go | 94 +++++++++++++++++ .../obj/riscv/testdata/testbranch/branch_test.s | 111 +++++++++++++++++++++ 8 files changed, 311 insertions(+), 12 deletions(-) create mode 100644 src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go create mode 100644 src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s diff --git a/src/cmd/asm/internal/arch/arch.go b/src/cmd/asm/internal/arch/arch.go index d9ba6670e8..2e5d0ff991 100644 --- a/src/cmd/asm/internal/arch/arch.go +++ b/src/cmd/asm/internal/arch/arch.go @@ -88,7 +88,8 @@ func jumpX86(word string) bool { func jumpRISCV(word string) bool { switch word { - case "BEQ", "BNE", "BLT", "BGE", "BLTU", "BGEU", "CALL", "JAL", "JALR", "JMP": + case "BEQ", "BEQZ", "BGE", "BGEU", "BGEZ", "BGT", "BGTU", "BGTZ", "BLE", "BLEU", "BLEZ", + "BLT", "BLTU", "BLTZ", "BNE", "BNEZ", "CALL", "JAL", "JALR", "JMP": return true } return false diff --git a/src/cmd/asm/internal/asm/testdata/riscvenc.s b/src/cmd/asm/internal/asm/testdata/riscvenc.s index 74bc43d727..8d301f2dd5 100644 --- a/src/cmd/asm/internal/asm/testdata/riscvenc.s +++ b/src/cmd/asm/internal/asm/testdata/riscvenc.s @@ -330,6 +330,19 @@ start: CALL asmtest(SB) // 970f0000 JMP asmtest(SB) // 970f0000 + // Branch pseudo-instructions + BEQZ X5, start // BEQZ X5, 2 // e38a02c2 + BGEZ X5, start // BGEZ X5, 2 // e3d802c2 + BGT X5, X6, start // BGT X5, X6, 2 // e3c662c2 + BGTU X5, X6, start // BGTU X5, X6, 2 // e3e462c2 + BGTZ X5, start // BGTZ X5, 2 // e34250c2 + BLE X5, X6, start // BLE X5, X6, 2 // e3d062c2 + BLEU X5, X6, start // BLEU X5, X6, 2 // e3fe62c0 + BLEZ X5, start // BLEZ X5, 2 // e35c50c0 + BLTZ X5, start // BLTZ X5, 2 // e3ca02c0 + BNEZ X5, start // BNEZ X5, 2 // e39802c0 + + // Set pseudo-instructions SEQZ X15, X15 // 93b71700 SNEZ X15, X15 // b337f000 diff --git a/src/cmd/internal/obj/riscv/anames.go b/src/cmd/internal/obj/riscv/anames.go index fa236d81e5..6581bb3402 100644 --- a/src/cmd/internal/obj/riscv/anames.go +++ b/src/cmd/internal/obj/riscv/anames.go @@ -226,6 +226,16 @@ var Anames = []string{ "HFENCEGVMA", "HFENCEVVMA", "WORD", + "BEQZ", + "BGEZ", + "BGT", + "BGTU", + "BGTZ", + "BLE", + "BLEU", + "BLEZ", + "BLTZ", + "BNEZ", "FNEGD", "FNEGS", "FNED", diff --git a/src/cmd/internal/obj/riscv/asm_test.go b/src/cmd/internal/obj/riscv/asm_test.go index 849a87b706..f8f7b4f2ce 100644 --- a/src/cmd/internal/obj/riscv/asm_test.go +++ b/src/cmd/internal/obj/riscv/asm_test.go @@ -12,6 +12,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "testing" ) @@ -131,3 +132,20 @@ TEXT _stub(SB),$0-0 t.Errorf("%v\n%s", err, out) } } + +func TestBranch(t *testing.T) { + if testing.Short() { + t.Skip("Skipping in short mode") + } + if runtime.GOARCH != "riscv64" { + t.Skip("Requires riscv64 to run") + } + + testenv.MustHaveGoBuild(t) + + cmd := exec.Command(testenv.GoToolPath(t), "test") + cmd.Dir = "testdata/testbranch" + if out, err := testenv.CleanCmdEnv(cmd).CombinedOutput(); err != nil { + t.Errorf("Branch test failed: %v\n%s", err, out) + } +} diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go index 76457dd8d2..482f9e0b6d 100644 --- a/src/cmd/internal/obj/riscv/cpu.go +++ b/src/cmd/internal/obj/riscv/cpu.go @@ -576,6 +576,16 @@ const ( // Pseudo-instructions. These get translated by the assembler into other // instructions, based on their operands. + ABEQZ + ABGEZ + ABGT + ABGTU + ABGTZ + ABLE + ABLEU + ABLEZ + ABLTZ + ABNEZ AFNEGD AFNEGS AFNED diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index ed5d533402..73fe8c284f 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -406,20 +406,40 @@ func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) { } // InvertBranch inverts the condition of a conditional branch. -func InvertBranch(i obj.As) obj.As { - switch i { +func InvertBranch(as obj.As) obj.As { + switch as { case ABEQ: return ABNE - case ABNE: - return ABEQ - case ABLT: - return ABGE + case ABEQZ: + return ABNEZ case ABGE: return ABLT - case ABLTU: - return ABGEU case ABGEU: return ABLTU + case ABGEZ: + return ABLTZ + case ABGT: + return ABLE + case ABGTU: + return ABLEU + case ABGTZ: + return ABLEZ + case ABLE: + return ABGT + case ABLEU: + return ABGTU + case ABLEZ: + return ABGTZ + case ABLT: + return ABGE + case ABLTU: + return ABGEU + case ABLTZ: + return ABGEZ + case ABNE: + return ABEQ + case ABNEZ: + return ABEQZ default: panic("InvertBranch: not a branch") } @@ -860,7 +880,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { for p := cursym.Func.Text; p != nil; p = p.Link { switch p.As { - case ABEQ, ABNE, ABLT, ABGE, ABLTU, ABGEU: + case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ: if p.To.Type != obj.TYPE_BRANCH { panic("assemble: instruction with branch-like opcode lacks destination") } @@ -917,7 +937,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // instructions will break everything--don't do it! for p := cursym.Func.Text; p != nil; p = p.Link { switch p.As { - case AJAL, ABEQ, ABNE, ABLT, ABLTU, ABGE, ABGEU: + case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ, AJAL: switch p.To.Type { case obj.TYPE_BRANCH: p.To.Type, p.To.Offset = obj.TYPE_CONST, p.Pcond.Pc-p.Pc @@ -1778,7 +1798,29 @@ func instructionsForProg(p *obj.Prog) []*instruction { ins.rd, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE ins.imm = p.To.Offset - case ABEQ, ABNE, ABLT, ABGE, ABLTU, ABGEU: + case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ: + switch ins.as { + case ABEQZ: + ins.as, ins.rs1, ins.rs2 = ABEQ, REG_ZERO, uint32(p.From.Reg) + case ABGEZ: + ins.as, ins.rs1, ins.rs2 = ABGE, REG_ZERO, uint32(p.From.Reg) + case ABGT: + ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.Reg), uint32(p.From.Reg) + case ABGTU: + ins.as, ins.rs1, ins.rs2 = ABLTU, uint32(p.Reg), uint32(p.From.Reg) + case ABGTZ: + ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), REG_ZERO + case ABLE: + ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.Reg), uint32(p.From.Reg) + case ABLEU: + ins.as, ins.rs1, ins.rs2 = ABGEU, uint32(p.Reg), uint32(p.From.Reg) + case ABLEZ: + ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), REG_ZERO + case ABLTZ: + ins.as, ins.rs1, ins.rs2 = ABLT, REG_ZERO, uint32(p.From.Reg) + case ABNEZ: + ins.as, ins.rs1, ins.rs2 = ABNE, REG_ZERO, uint32(p.From.Reg) + } ins.imm = p.To.Offset case ALW, ALWU, ALH, ALHU, ALB, ALBU, ALD, AFLW, AFLD: diff --git a/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go new file mode 100644 index 0000000000..b0ab5f72aa --- /dev/null +++ b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go @@ -0,0 +1,94 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build riscv64 + +package testbranch + +import ( + "testing" +) + +func testBEQZ(a int64) (r bool) +func testBGEZ(a int64) (r bool) +func testBGT(a, b int64) (r bool) +func testBGTU(a, b int64) (r bool) +func testBGTZ(a int64) (r bool) +func testBLE(a, b int64) (r bool) +func testBLEU(a, b int64) (r bool) +func testBLEZ(a int64) (r bool) +func testBLTZ(a int64) (r bool) +func testBNEZ(a int64) (r bool) + +func TestBranchCondition(t *testing.T) { + tests := []struct{ + ins string + a int64 + b int64 + fn func(a, b int64) bool + want bool + }{ + {"BGT", 0, 1, testBGT, true}, + {"BGT", 0, 0, testBGT, false}, + {"BGT", 0, -1, testBGT, false}, + {"BGT", -1, 0, testBGT, true}, + {"BGT", 1, 0, testBGT, false}, + {"BGTU", 0, 1, testBGTU, true}, + {"BGTU", 0, -1, testBGTU, true}, + {"BGTU", -1, 0, testBGTU, false}, + {"BGTU", 1, 0, testBGTU, false}, + {"BLE", 0, 1, testBLE, false}, + {"BLE", 0, -1, testBLE, true}, + {"BLE", 0, 0, testBLE, true}, + {"BLE", -1, 0, testBLE, false}, + {"BLE", 1, 0, testBLE, true}, + {"BLEU", 0, 1, testBLEU, false}, + {"BLEU", 0, -1, testBLEU, false}, + {"BLEU", 0, 0, testBLEU, true}, + {"BLEU", -1, 0, testBLEU, true}, + {"BLEU", 1, 0, testBLEU, true}, + } + for _, test := range tests { + t.Run(test.ins, func(t *testing.T) { + if got := test.fn(test.a, test.b); got != test.want { + t.Errorf("%v %v, %v = %v, want %v", test.ins, test.a, test.b, got, test.want) + } + }) + } +} + +func TestBranchZero(t *testing.T) { + tests := []struct{ + ins string + a int64 + fn func(a int64) bool + want bool + }{ + {"BEQZ", -1, testBEQZ, false}, + {"BEQZ", 0, testBEQZ, true}, + {"BEQZ", 1, testBEQZ, false}, + {"BGEZ", -1, testBGEZ, false}, + {"BGEZ", 0, testBGEZ, true}, + {"BGEZ", 1, testBGEZ, true}, + {"BGTZ", -1, testBGTZ, false}, + {"BGTZ", 0, testBGTZ, false}, + {"BGTZ", 1, testBGTZ, true}, + {"BLEZ", -1, testBLEZ, true}, + {"BLEZ", 0, testBLEZ, true}, + {"BLEZ", 1, testBLEZ, false}, + {"BLTZ", -1, testBLTZ, true}, + {"BLTZ", 0, testBLTZ, false}, + {"BLTZ", 1, testBLTZ, false}, + {"BNEZ", -1, testBNEZ, true}, + {"BNEZ", 0, testBNEZ, false}, + {"BNEZ", 1, testBNEZ, true}, + } + for _, test := range tests { + t.Run(test.ins, func(t *testing.T) { + if got := test.fn(test.a); got != test.want { + t.Errorf("%v %v = %v, want %v", test.ins, test.a, got, test.want) + } + }) + } +} diff --git a/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s new file mode 100644 index 0000000000..6cff235848 --- /dev/null +++ b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s @@ -0,0 +1,111 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build riscv64 + +#include "textflag.h" + +// func testBEQZ(a int64) (r bool) +TEXT ·testBEQZ(SB),NOSPLIT,$0-0 + MOV a+0(FP), X5 + MOV $1, X6 + BEQZ X5, b + MOV $0, X6 +b: + MOV X6, r+8(FP) + RET + +// func testBGEZ(a int64) (r bool) +TEXT ·testBGEZ(SB),NOSPLIT,$0-0 + MOV a+0(FP), X5 + MOV $1, X6 + BGEZ X5, b + MOV $0, X6 +b: + MOV X6, r+8(FP) + RET + +// func testBGT(a, b int64) (r bool) +TEXT ·testBGT(SB),NOSPLIT,$0-0 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BGT X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + +// func testBGTU(a, b int64) (r bool) +TEXT ·testBGTU(SB),NOSPLIT,$0-0 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BGTU X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + +// func testBGTZ(a int64) (r bool) +TEXT ·testBGTZ(SB),NOSPLIT,$0-0 + MOV a+0(FP), X5 + MOV $1, X6 + BGTZ X5, b + MOV $0, X6 +b: + MOV X6, r+8(FP) + RET + +// func testBLE(a, b int64) (r bool) +TEXT ·testBLE(SB),NOSPLIT,$0-0 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BLE X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + +// func testBLEU(a, b int64) (r bool) +TEXT ·testBLEU(SB),NOSPLIT,$0-0 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BLEU X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + +// func testBLEZ(a int64) (r bool) +TEXT ·testBLEZ(SB),NOSPLIT,$0-0 + MOV a+0(FP), X5 + MOV $1, X6 + BLEZ X5, b + MOV $0, X6 +b: + MOV X6, r+8(FP) + RET + +// func testBLTZ(a int64) (r bool) +TEXT ·testBLTZ(SB),NOSPLIT,$0-0 + MOV a+0(FP), X5 + MOV $1, X6 + BLTZ X5, b + MOV $0, X6 +b: + MOV X6, r+8(FP) + RET + +// func testBNEZ(a int64) (r bool) +TEXT ·testBNEZ(SB),NOSPLIT,$0-0 + MOV a+0(FP), X5 + MOV $1, X6 + BNEZ X5, b + MOV $0, X6 +b: + MOV X6, r+8(FP) + RET -- cgit v1.2.3-54-g00ecf From 5970480c68fc7ecb6eaf3a5f90f49ae4504fa060 Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Tue, 31 Mar 2020 15:35:28 +0000 Subject: Revert "cmd/asm: align an instruction or a function's address" This reverts CL 212767. Reason for revert: new test is persistently failing on freebsd-arm64-dmgk builder. Change-Id: Ifd1227628e0e747688ddb4dc580170b2a103a89e Reviewed-on: https://go-review.googlesource.com/c/go/+/226597 Run-TryBot: Bryan C. Mills Reviewed-by: Cherry Zhang TryBot-Result: Gobot Gobot --- src/cmd/internal/goobj/read.go | 2 - src/cmd/internal/goobj/readnew.go | 1 - src/cmd/internal/goobj2/funcinfo.go | 3 - src/cmd/internal/obj/arm64/asm7.go | 33 ++++++---- src/cmd/internal/obj/arm64/asm_test.go | 27 +-------- src/cmd/internal/obj/link.go | 1 - src/cmd/internal/obj/objfile.go | 1 - src/cmd/internal/obj/objfile2.go | 1 - src/cmd/internal/obj/plist.go | 7 --- src/cmd/internal/obj/textflag.go | 5 -- src/cmd/link/internal/ld/data.go | 4 -- src/cmd/link/internal/loader/loader.go | 1 - src/cmd/link/internal/objfile/objfile.go | 1 - src/cmd/link/link_test.go | 101 ------------------------------- src/runtime/textflag.h | 5 -- 15 files changed, 25 insertions(+), 168 deletions(-) diff --git a/src/cmd/internal/goobj/read.go b/src/cmd/internal/goobj/read.go index 48537d2b1c..e61e95dcc8 100644 --- a/src/cmd/internal/goobj/read.go +++ b/src/cmd/internal/goobj/read.go @@ -95,7 +95,6 @@ type Var struct { type Func struct { Args int64 // size in bytes of argument frame: inputs and outputs Frame int64 // size in bytes of local variable frame - Align uint32 // alignment requirement in bytes for the address of the function Leaf bool // function omits save of link register (ARM) NoSplit bool // function omits stack split prologue TopFrame bool // function is the top of the call stack @@ -591,7 +590,6 @@ func (r *objReader) parseObject(prefix []byte) error { s.Func = f f.Args = r.readInt() f.Frame = r.readInt() - f.Align = uint32(r.readInt()) flags := r.readInt() f.Leaf = flags&(1<<0) != 0 f.TopFrame = flags&(1<<4) != 0 diff --git a/src/cmd/internal/goobj/readnew.go b/src/cmd/internal/goobj/readnew.go index 1acf18a594..3f9d0d1db6 100644 --- a/src/cmd/internal/goobj/readnew.go +++ b/src/cmd/internal/goobj/readnew.go @@ -149,7 +149,6 @@ func (r *objReader) readNew() { f := &Func{ Args: int64(info.Args), Frame: int64(info.Locals), - Align: info.Align, NoSplit: info.NoSplit != 0, Leaf: osym.Leaf(), TopFrame: osym.TopFrame(), diff --git a/src/cmd/internal/goobj2/funcinfo.go b/src/cmd/internal/goobj2/funcinfo.go index 946415b246..8620931970 100644 --- a/src/cmd/internal/goobj2/funcinfo.go +++ b/src/cmd/internal/goobj2/funcinfo.go @@ -18,7 +18,6 @@ type FuncInfo struct { Args uint32 Locals uint32 - Align uint32 Pcsp uint32 Pcfile uint32 @@ -43,7 +42,6 @@ func (a *FuncInfo) Write(w *bytes.Buffer) { writeUint32(a.Args) writeUint32(a.Locals) - writeUint32(a.Align) writeUint32(a.Pcsp) writeUint32(a.Pcfile) @@ -81,7 +79,6 @@ func (a *FuncInfo) Read(b []byte) { a.Args = readUint32() a.Locals = readUint32() - a.Align = readUint32() a.Pcsp = readUint32() a.Pcfile = readUint32() diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index dbe816e735..e8b092a2a8 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -886,10 +886,25 @@ const OP_NOOP = 0xd503201f // align code to a certain length by padding bytes. func pcAlignPadLength(pc int64, alignedValue int64, ctxt *obj.Link) int { - if !((alignedValue&(alignedValue-1) == 0) && 8 <= alignedValue && alignedValue <= 2048) { - ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", alignedValue) + switch alignedValue { + case 8: + if pc%8 == 4 { + return 4 + } + case 16: + switch pc % 16 { + case 4: + return 12 + case 8: + return 8 + case 12: + return 4 + } + default: + ctxt.Diag("Unexpected alignment: %d for PCALIGN directive\n", alignedValue) } - return int(-pc & (alignedValue - 1)) + + return 0 } func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { @@ -925,12 +940,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if m == 0 { switch p.As { case obj.APCALIGN: - alignedValue := p.From.Offset - m = pcAlignPadLength(pc, alignedValue, ctxt) - // Update the current text symbol ailgnment value. - if int32(alignedValue) > cursym.Func.Align { - cursym.Func.Align = int32(alignedValue) - } + a := p.From.Offset + m = pcAlignPadLength(pc, a, ctxt) break case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: continue @@ -1006,8 +1017,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if m == 0 { switch p.As { case obj.APCALIGN: - alignedValue := p.From.Offset - m = pcAlignPadLength(pc, alignedValue, ctxt) + a := p.From.Offset + m = pcAlignPadLength(pc, a, ctxt) break case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: continue diff --git a/src/cmd/internal/obj/arm64/asm_test.go b/src/cmd/internal/obj/arm64/asm_test.go index 9efdb0217f..1691828739 100644 --- a/src/cmd/internal/obj/arm64/asm_test.go +++ b/src/cmd/internal/obj/arm64/asm_test.go @@ -18,9 +18,7 @@ import ( // TestLarge generates a very large file to verify that large // program builds successfully, in particular, too-far -// conditional branches are fixed, and also verify that the -// instruction's pc can be correctly aligned even when branches -// need to be fixed. +// conditional branches are fixed. func TestLarge(t *testing.T) { if testing.Short() { t.Skip("Skip in short mode") @@ -43,27 +41,10 @@ func TestLarge(t *testing.T) { t.Fatalf("can't write output: %v\n", err) } - pattern := `0x0080\s00128\s\(.*\)\tMOVD\t\$3,\sR3` - - // assemble generated file - cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-S", "-o", filepath.Join(dir, "test.o"), tmpfile) - cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux") - out, err := cmd.CombinedOutput() - if err != nil { - t.Errorf("Assemble failed: %v, output: %s", err, out) - } - matched, err := regexp.MatchString(pattern, string(out)) - if err != nil { - t.Fatal(err) - } - if !matched { - t.Errorf("The alignment is not correct: %t, output:%s\n", matched, out) - } - // build generated file - cmd = exec.Command(testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) + cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux") - out, err = cmd.CombinedOutput() + out, err := cmd.CombinedOutput() if err != nil { t.Errorf("Build failed: %v, output: %s", err, out) } @@ -75,8 +56,6 @@ func gen(buf *bytes.Buffer) { fmt.Fprintln(buf, "TBZ $5, R0, label") fmt.Fprintln(buf, "CBZ R0, label") fmt.Fprintln(buf, "BEQ label") - fmt.Fprintln(buf, "PCALIGN $128") - fmt.Fprintln(buf, "MOVD $3, R3") for i := 0; i < 1<<19; i++ { fmt.Fprintln(buf, "MOVD R0, R1") } diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index 0879c611ba..d1cc536a8c 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -398,7 +398,6 @@ type LSym struct { type FuncInfo struct { Args int32 Locals int32 - Align int32 Text *Prog Autot map[*LSym]struct{} Pcln Pcln diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index 46e8a551ad..7fd97f7363 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -346,7 +346,6 @@ func (w *objWriter) writeSym(s *LSym) { w.writeInt(int64(s.Func.Args)) w.writeInt(int64(s.Func.Locals)) - w.writeInt(int64(s.Func.Align)) w.writeBool(s.NoSplit()) flags = int64(0) if s.Leaf() { diff --git a/src/cmd/internal/obj/objfile2.go b/src/cmd/internal/obj/objfile2.go index 626df56bd4..69019e033d 100644 --- a/src/cmd/internal/obj/objfile2.go +++ b/src/cmd/internal/obj/objfile2.go @@ -374,7 +374,6 @@ func genFuncInfoSyms(ctxt *Link) { NoSplit: nosplit, Args: uint32(s.Func.Args), Locals: uint32(s.Func.Locals), - Align: uint32(s.Func.Align), } pc := &s.Func.Pcln o.Pcsp = pcdataoff diff --git a/src/cmd/internal/obj/plist.go b/src/cmd/internal/obj/plist.go index be19221a13..7579dd0390 100644 --- a/src/cmd/internal/obj/plist.go +++ b/src/cmd/internal/obj/plist.go @@ -133,13 +133,6 @@ func (ctxt *Link) InitTextSym(s *LSym, flag int) { s.Set(AttrNeedCtxt, flag&NEEDCTXT != 0) s.Set(AttrNoFrame, flag&NOFRAME != 0) s.Set(AttrTopFrame, flag&TOPFRAME != 0) - if flag&ALIGN2048 != 0 { - if objabi.GOARCH != "arm64" { - ctxt.Diag("ALIGN2048 flag only works on ARM64 at present.") - } - s.Func.Align = 2048 - } - s.Type = objabi.STEXT ctxt.Text = append(ctxt.Text, s) diff --git a/src/cmd/internal/obj/textflag.go b/src/cmd/internal/obj/textflag.go index 3681a3b67b..d2cec734b1 100644 --- a/src/cmd/internal/obj/textflag.go +++ b/src/cmd/internal/obj/textflag.go @@ -51,9 +51,4 @@ const ( // Function is the top of the call stack. Call stack unwinders should stop // at this function. TOPFRAME = 2048 - - // ALIGN2048 means that the address of the function must be aligned to a - // 2048 bytes boundary. - // Only works on arm64 at present. - ALIGN2048 = 4096 ) diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go index 31613e5cef..7ca01c8c25 100644 --- a/src/cmd/link/internal/ld/data.go +++ b/src/cmd/link/internal/ld/data.go @@ -2119,10 +2119,6 @@ func assignAddress(ctxt *Link, sect *sym.Section, n int, s *sym.Symbol, va uint6 funcsize = uint64(s.Size) } - if sect.Align < s.Align { - sect.Align = s.Align - } - // On ppc64x a text section should not be larger than 2^26 bytes due to the size of // call target offset field in the bl instruction. Splitting into smaller text // sections smaller than this limit allows the GNU linker to modify the long calls diff --git a/src/cmd/link/internal/loader/loader.go b/src/cmd/link/internal/loader/loader.go index bd9c6b4fe9..0adc395fef 100644 --- a/src/cmd/link/internal/loader/loader.go +++ b/src/cmd/link/internal/loader/loader.go @@ -1227,7 +1227,6 @@ func loadObjFull(l *Loader, r *oReader) { info.Pcdata = append(info.Pcdata, info.PcdataEnd) // for the ease of knowing where it ends pc.Args = int32(info.Args) pc.Locals = int32(info.Locals) - s.Align = int32(info.Align) npc := len(info.Pcdata) - 1 // -1 as we appended one above pc.Pcdata = pcDataBatch[:npc:npc] diff --git a/src/cmd/link/internal/objfile/objfile.go b/src/cmd/link/internal/objfile/objfile.go index 295acb2d29..a15d3c3e07 100644 --- a/src/cmd/link/internal/objfile/objfile.go +++ b/src/cmd/link/internal/objfile/objfile.go @@ -312,7 +312,6 @@ overwrite: pc.Args = r.readInt32() pc.Locals = r.readInt32() - s.Align = r.readInt32() if r.readUint8() != 0 { s.Attr |= sym.AttrNoSplit } diff --git a/src/cmd/link/link_test.go b/src/cmd/link/link_test.go index 025e882106..4f792bd1f1 100644 --- a/src/cmd/link/link_test.go +++ b/src/cmd/link/link_test.go @@ -3,7 +3,6 @@ package main import ( "bufio" "bytes" - "cmd/internal/objabi" "debug/macho" "internal/testenv" "io/ioutil" @@ -448,103 +447,3 @@ func TestStrictDup(t *testing.T) { t.Errorf("unexpected output:\n%s", out) } } - -const testFuncAlignSrc = ` -package main -import ( - "fmt" - "reflect" -) -func alignFunc() -func alignPc() - -func main() { - addr1 := reflect.ValueOf(alignFunc).Pointer() - addr2 := reflect.ValueOf(alignPc).Pointer() - switch { - case (addr1 % 2048) != 0 && (addr2 % 512) != 0: - fmt.Printf("expected 2048 bytes alignment, got %v; expected 512 bytes alignment, got %v\n", addr1, addr2) - case (addr2 % 512) != 0: - fmt.Printf("expected 512 bytes alignment, got %v\n", addr2) - case (addr1 % 2048) != 0: - fmt.Printf("expected 2048 bytes alignment, got %v\n", addr1) - default: - fmt.Printf("PASS") - } -} -` - -const testFuncAlignAsmSrc = ` -#include "textflag.h" -TEXT ·alignFunc(SB),NOSPLIT|ALIGN2048, $0-0 - MOVD $1, R0 - MOVD $2, R1 - RET - -TEXT ·alignPc(SB),NOSPLIT, $0-0 - MOVD $2, R0 - PCALIGN $512 - MOVD $3, R1 - RET -` - -// TestFuncAlign verifies that the address of a function can be aligned -// with a specfic value on arm64. -func TestFuncAlign(t *testing.T) { - if objabi.GOARCH != "arm64" { - t.Skipf("Skipping FuncAlign test on %s", objabi.GOARCH) - } - testenv.MustHaveGoBuild(t) - - tmpdir, err := ioutil.TempDir("", "TestFuncAlign") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(tmpdir) - - src := filepath.Join(tmpdir, "falign.go") - err = ioutil.WriteFile(src, []byte(testFuncAlignSrc), 0666) - if err != nil { - t.Fatal(err) - } - src = filepath.Join(tmpdir, "falign.s") - err = ioutil.WriteFile(src, []byte(testFuncAlignAsmSrc), 0666) - if err != nil { - t.Fatal(err) - } - - // Build and run with old object file format. - cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "falign") - cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux") - cmd.Dir = tmpdir - out, err := cmd.CombinedOutput() - if err != nil { - t.Errorf("build failed: %v", err) - } - cmd = exec.Command(tmpdir + "/falign") - out, err = cmd.CombinedOutput() - if err != nil { - t.Errorf("failed to run with err %v, output: %s", err, out) - } - if string(out) != "PASS" { - t.Errorf("unexpected output: %s\n", out) - } - - // Build and run with new object file format. - cmd = exec.Command(testenv.GoToolPath(t), "build", "-o", "falign", "-gcflags=all=-newobj", "-asmflags=all=-newobj", "-ldflags=-newobj") - cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux") - cmd.Dir = tmpdir - out, err = cmd.CombinedOutput() - if err != nil { - t.Errorf("build with newobj failed: %v", err) - } - cmd = exec.Command(tmpdir + "/falign") - out, err = cmd.CombinedOutput() - if err != nil { - t.Errorf("failed to run with -newobj, err: %v, output: %s", err, out) - } - if string(out) != "PASS" { - t.Errorf("unexpected output with -newobj: %s\n", out) - } - -} diff --git a/src/runtime/textflag.h b/src/runtime/textflag.h index bbbef6357a..daca36d948 100644 --- a/src/runtime/textflag.h +++ b/src/runtime/textflag.h @@ -35,8 +35,3 @@ // Function is the top of the call stack. Call stack unwinders should stop // at this function. #define TOPFRAME 2048 -// ALIGN2048 means that the address of the function must be aligned to a -// 2048 bytes boundary. -// Only works on arm64 at present. -#define ALIGN2048 4096 - -- cgit v1.2.3-54-g00ecf From 5db079d2e5f97952be288c28a3a0690a523efdce Mon Sep 17 00:00:00 2001 From: Roland Shoemaker Date: Sun, 29 Mar 2020 03:04:51 +0000 Subject: crypto/rsa: reject invalid length PKCS#1v1.5 signatures Per RFC 8017, reject signatures which are not the same length as the RSA modulus. This matches the behavior of SignPKCS1v15 which properly left pads the signatures it generates to the size of the modulus. Fixes #21896 Change-Id: I2c42a0b24cf7fff158ece604b6f0c521a856d932 GitHub-Last-Rev: 6040f7990633630a0ad157cb17e016bb7db98a27 GitHub-Pull-Request: golang/go#38140 Reviewed-on: https://go-review.googlesource.com/c/go/+/226203 Reviewed-by: Filippo Valsorda Run-TryBot: Filippo Valsorda TryBot-Result: Gobot Gobot --- src/crypto/rsa/pkcs1v15.go | 7 +++++++ src/crypto/rsa/pkcs1v15_test.go | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/src/crypto/rsa/pkcs1v15.go b/src/crypto/rsa/pkcs1v15.go index 37790acb98..499242ffc5 100644 --- a/src/crypto/rsa/pkcs1v15.go +++ b/src/crypto/rsa/pkcs1v15.go @@ -277,6 +277,13 @@ func VerifyPKCS1v15(pub *PublicKey, hash crypto.Hash, hashed []byte, sig []byte) return ErrVerification } + // RFC 8017 Section 8.2.2: If the length of the signature S is not k + // octets (where k is the length in octets of the RSA modulus n), output + // "invalid signature" and stop. + if k != len(sig) { + return ErrVerification + } + c := new(big.Int).SetBytes(sig) m := encrypt(new(big.Int), pub, c) em := leftPad(m.Bytes(), k) diff --git a/src/crypto/rsa/pkcs1v15_test.go b/src/crypto/rsa/pkcs1v15_test.go index 7e62560a04..26b8c5f26f 100644 --- a/src/crypto/rsa/pkcs1v15_test.go +++ b/src/crypto/rsa/pkcs1v15_test.go @@ -9,6 +9,7 @@ import ( "crypto" "crypto/rand" "crypto/sha1" + "crypto/sha256" "encoding/base64" "encoding/hex" "io" @@ -296,3 +297,20 @@ var rsaPrivateKey = &PrivateKey{ fromBase10("94560208308847015747498523884063394671606671904944666360068158221458669711639"), }, } + +func TestShortPKCS1v15Signature(t *testing.T) { + pub := &PublicKey{ + E: 65537, + N: fromBase10("8272693557323587081220342447407965471608219912416565371060697606400726784709760494166080686904546560026343451112103559482851304715739629410219358933351333"), + } + sig, err := hex.DecodeString("193a310d0dcf64094c6e3a00c8219b80ded70535473acff72c08e1222974bb24a93a535b1dc4c59fc0e65775df7ba2007dd20e9193f4c4025a18a7070aee93") + if err != nil { + t.Fatalf("failed to decode signature: %s", err) + } + + h := sha256.Sum256([]byte("hello")) + err = VerifyPKCS1v15(pub, crypto.SHA256, h[:], sig) + if err == nil { + t.Fatal("VerifyPKCS1v15 accepted a truncated signature") + } +} -- cgit v1.2.3-54-g00ecf From 5d2ddcd3f51c1ff7aa0a84604b1d8610a17a7933 Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Mon, 30 Mar 2020 14:04:08 -0400 Subject: context: fix a flaky timeout in TestLayersTimeout In CL 223019, I reduced the short timeout in the testLayers helper to be even shorter than it was. That exposed a racy (time-dependent) select later in the function, which failed in one of the slower builders (android-386-emu). Also streamline the test to make it easier to test with a very high -count flag: - Run tests that sleep for shortDuration in parallel to reduce latency. - Use shorter durations in examples to reduce test running time. - Avoid mutating global state (in package math/rand) in testLayers. After this change (but not before it), 'go test -run=TestLayersTimeout -count=100000 context' passes on my workstation. Fixes #38161 Change-Id: Iaf4abe7ac308b2100d8828267cda9f4f8ae4be82 Reviewed-on: https://go-review.googlesource.com/c/go/+/226457 Reviewed-by: Ian Lance Taylor --- src/context/context_test.go | 27 ++++++++++++++++++--------- src/context/example_test.go | 6 ++++-- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/context/context_test.go b/src/context/context_test.go index 98c6683335..6b392a29da 100644 --- a/src/context/context_test.go +++ b/src/context/context_test.go @@ -27,6 +27,7 @@ type testingT interface { Log(args ...interface{}) Logf(format string, args ...interface{}) Name() string + Parallel() Skip(args ...interface{}) SkipNow() Skipf(format string, args ...interface{}) @@ -284,6 +285,8 @@ func testDeadline(c Context, name string, t testingT) { } func XTestDeadline(t testingT) { + t.Parallel() + c, _ := WithDeadline(Background(), time.Now().Add(shortDuration)) if got, prefix := fmt.Sprint(c), "context.Background.WithDeadline("; !strings.HasPrefix(got, prefix) { t.Errorf("c.String() = %q want prefix %q", got, prefix) @@ -307,6 +310,8 @@ func XTestDeadline(t testingT) { } func XTestTimeout(t testingT) { + t.Parallel() + c, _ := WithTimeout(Background(), shortDuration) if got, prefix := fmt.Sprint(c), "context.Background.WithDeadline("; !strings.HasPrefix(got, prefix) { t.Errorf("c.String() = %q want prefix %q", got, prefix) @@ -417,9 +422,9 @@ func XTestAllocs(t testingT, testingShort func() bool, testingAllocsPerRun func( gccgoLimit: 3, }, { - desc: "WithTimeout(bg, 15*time.Millisecond)", + desc: "WithTimeout(bg, 1*time.Nanosecond)", f: func() { - c, _ := WithTimeout(bg, 15*time.Millisecond) + c, _ := WithTimeout(bg, 1*time.Nanosecond) <-c.Done() }, limit: 12, @@ -545,7 +550,9 @@ func XTestLayersTimeout(t testingT) { } func testLayers(t testingT, seed int64, testTimeout bool) { - rand.Seed(seed) + t.Parallel() + + r := rand.New(rand.NewSource(seed)) errorf := func(format string, a ...interface{}) { t.Errorf(fmt.Sprintf("seed=%d: %s", seed, format), a...) } @@ -560,7 +567,7 @@ func testLayers(t testingT, seed int64, testTimeout bool) { ctx = Background() ) for i := 0; i < minLayers || numTimers == 0 || len(cancels) == 0 || len(vals) == 0; i++ { - switch rand.Intn(3) { + switch r.Intn(3) { case 0: v := new(value) ctx = WithValue(ctx, v, v) @@ -587,10 +594,12 @@ func testLayers(t testingT, seed int64, testTimeout bool) { } } } - select { - case <-ctx.Done(): - errorf("ctx should not be canceled yet") - default: + if !testTimeout { + select { + case <-ctx.Done(): + errorf("ctx should not be canceled yet") + default: + } } if s, prefix := fmt.Sprint(ctx), "context.Background."; !strings.HasPrefix(s, prefix) { t.Errorf("ctx.String() = %q want prefix %q", s, prefix) @@ -608,7 +617,7 @@ func testLayers(t testingT, seed int64, testTimeout bool) { } checkValues("after timeout") } else { - cancel := cancels[rand.Intn(len(cancels))] + cancel := cancels[r.Intn(len(cancels))] cancel() select { case <-ctx.Done(): diff --git a/src/context/example_test.go b/src/context/example_test.go index b91a8acef3..72ac5d2e49 100644 --- a/src/context/example_test.go +++ b/src/context/example_test.go @@ -10,6 +10,8 @@ import ( "time" ) +const shortDuration = 1 * time.Millisecond // a reasonable duration to block in an example + // This example demonstrates the use of a cancelable context to prevent a // goroutine leak. By the end of the example function, the goroutine started // by gen will return without leaking. @@ -55,7 +57,7 @@ func ExampleWithCancel() { // This example passes a context with an arbitrary deadline to tell a blocking // function that it should abandon its work as soon as it gets to it. func ExampleWithDeadline() { - d := time.Now().Add(50 * time.Millisecond) + d := time.Now().Add(shortDuration) ctx, cancel := context.WithDeadline(context.Background(), d) // Even though ctx will be expired, it is good practice to call its @@ -79,7 +81,7 @@ func ExampleWithDeadline() { func ExampleWithTimeout() { // Pass a context with a timeout to tell a blocking function that it // should abandon its work after the timeout elapses. - ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) + ctx, cancel := context.WithTimeout(context.Background(), shortDuration) defer cancel() select { -- cgit v1.2.3-54-g00ecf From 2d77d3330537e11a0d9a233ba5f4facf262e9d8c Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Thu, 26 Mar 2020 21:28:37 -0400 Subject: net/http: treat a nil Body from a custom RoundTripper as an empty one Fixes #38095 Change-Id: I4f65ce01e7aed22240eee979c41535d0b8b9a8dc Reviewed-on: https://go-review.googlesource.com/c/go/+/225717 Run-TryBot: Bryan C. Mills TryBot-Result: Gobot Gobot Reviewed-by: Russ Cox --- src/net/http/client.go | 15 ++++++++++++++- src/net/http/client_test.go | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/src/net/http/client.go b/src/net/http/client.go index 638ff500a4..3860d97d8f 100644 --- a/src/net/http/client.go +++ b/src/net/http/client.go @@ -269,7 +269,20 @@ func send(ireq *Request, rt RoundTripper, deadline time.Time) (resp *Response, d return nil, didTimeout, fmt.Errorf("http: RoundTripper implementation (%T) returned a nil *Response with a nil error", rt) } if resp.Body == nil { - return nil, didTimeout, fmt.Errorf("http: RoundTripper implementation (%T) returned a *Response with a nil Body", rt) + // The documentation on the Body field says “The http Client and Transport + // guarantee that Body is always non-nil, even on responses without a body + // or responses with a zero-length body.” Unfortunately, we didn't document + // that same constraint for arbitrary RoundTripper implementations, and + // RoundTripper implementations in the wild (mostly in tests) assume that + // they can use a nil Body to mean an empty one (similar to Request.Body). + // (See https://golang.org/issue/38095.) + // + // If the ContentLength allows the Body to be empty, fill in an empty one + // here to ensure that it is non-nil. + if resp.ContentLength > 0 && req.Method != "HEAD" { + return nil, didTimeout, fmt.Errorf("http: RoundTripper implementation (%T) returned a *Response with content length %d but a nil Body", rt, resp.ContentLength) + } + resp.Body = ioutil.NopCloser(strings.NewReader("")) } if !deadline.IsZero() { resp.Body = &cancelTimerBody{ diff --git a/src/net/http/client_test.go b/src/net/http/client_test.go index 2b4f53f802..80807fae7a 100644 --- a/src/net/http/client_test.go +++ b/src/net/http/client_test.go @@ -1991,3 +1991,38 @@ func testClientDoCanceledVsTimeout(t *testing.T, h2 bool) { }) } } + +type nilBodyRoundTripper struct{} + +func (nilBodyRoundTripper) RoundTrip(req *Request) (*Response, error) { + return &Response{ + StatusCode: StatusOK, + Status: StatusText(StatusOK), + Body: nil, + Request: req, + }, nil +} + +func TestClientPopulatesNilResponseBody(t *testing.T) { + c := &Client{Transport: nilBodyRoundTripper{}} + + resp, err := c.Get("http://localhost/anything") + if err != nil { + t.Fatalf("Client.Get rejected Response with nil Body: %v", err) + } + + if resp.Body == nil { + t.Fatalf("Client failed to provide a non-nil Body as documented") + } + defer func() { + if err := resp.Body.Close(); err != nil { + t.Fatalf("error from Close on substitute Response.Body: %v", err) + } + }() + + if b, err := ioutil.ReadAll(resp.Body); err != nil { + t.Errorf("read error from substitute Response.Body: %v", err) + } else if len(b) != 0 { + t.Errorf("substitute Response.Body was unexpectedly non-empty: %q", b) + } +} -- cgit v1.2.3-54-g00ecf From 2cb80bdee0dcb4ff55f46ab7025a37546aef6b7a Mon Sep 17 00:00:00 2001 From: Richard Miller Date: Tue, 31 Mar 2020 19:44:19 +0100 Subject: runtime: skip gdb tests on Plan 9 There's no gdb on Plan 9. Change-Id: Ibeb0fbd3c096a69181c19b1fb2bc6291612b6da3 Reviewed-on: https://go-review.googlesource.com/c/go/+/226657 Reviewed-by: David du Colombier <0intro@gmail.com> --- src/runtime/runtime-gdb_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/runtime/runtime-gdb_test.go b/src/runtime/runtime-gdb_test.go index 79b4621614..5dbe4bf88a 100644 --- a/src/runtime/runtime-gdb_test.go +++ b/src/runtime/runtime-gdb_test.go @@ -41,6 +41,8 @@ func checkGdbEnvironment(t *testing.T) { if testing.Short() { t.Skip("skipping gdb tests on AIX; see https://golang.org/issue/35710") } + case "plan9": + t.Skip("there is no gdb on Plan 9") } if final := os.Getenv("GOROOT_FINAL"); final != "" && runtime.GOROOT() != final { t.Skip("gdb test can fail with GOROOT_FINAL pending") -- cgit v1.2.3-54-g00ecf From 3ff9c4f2a6670edaee3962571ef6241c1bfcc2fc Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Mon, 30 Mar 2020 15:43:08 -0400 Subject: os/signal: make TestStop resilient to initially-blocked signals For reasons unknown, SIGUSR1 appears to be blocked at process start for tests on the android-arm-corellium and android-arm64-corellium builders. (This has been observed before, too: see CL 203957.) Make the test resilient to blocked signals by always calling Notify and waiting for potential signal delivery after sending any signal that is not known to be unblocked. Also remove the initial SIGWINCH signal from testCancel. The behavior of an unhandled SIGWINCH is already tested in TestStop, so we don't need to re-test that same case: waiting for an unhandled signal takes a comparatively long time (because we necessarily don't know when it has been delivered), so this redundancy makes the overall test binary needlessly slow, especially since it is called from both TestReset and TestIgnore. Since each signal is always unblocked while we have a notification channel registered for it, we don't need to modify any other tests: TestStop and testCancel are the only functions that send signals without a registered channel. Fixes #38165 Updates #33174 Updates #15661 Change-Id: I215880894e954b62166024085050d34323431b63 Reviewed-on: https://go-review.googlesource.com/c/go/+/226461 Run-TryBot: Bryan C. Mills Reviewed-by: Ian Lance Taylor TryBot-Result: Gobot Gobot --- src/os/signal/signal_test.go | 102 +++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 53 deletions(-) diff --git a/src/os/signal/signal_test.go b/src/os/signal/signal_test.go index bec5c1599e..e5dcda4a2b 100644 --- a/src/os/signal/signal_test.go +++ b/src/os/signal/signal_test.go @@ -152,10 +152,6 @@ func TestStress(t *testing.T) { } func testCancel(t *testing.T, ignore bool) { - // Send SIGWINCH. By default this signal should be ignored. - syscall.Kill(syscall.Getpid(), syscall.SIGWINCH) - quiesce() - // Ask to be notified on c1 when a SIGWINCH is received. c1 := make(chan os.Signal, 1) Notify(c1, syscall.SIGWINCH) @@ -175,25 +171,16 @@ func testCancel(t *testing.T, ignore bool) { waitSig(t, c2, syscall.SIGHUP) // Ignore, or reset the signal handlers for, SIGWINCH and SIGHUP. + // Either way, this should undo both calls to Notify above. if ignore { Ignore(syscall.SIGWINCH, syscall.SIGHUP) + // Don't bother deferring a call to Reset: it is documented to undo Notify, + // but its documentation says nothing about Ignore, and (as of the time of + // writing) it empirically does not undo an Ignore. } else { Reset(syscall.SIGWINCH, syscall.SIGHUP) } - // At this point we do not expect any further signals on c1. - // However, it is just barely possible that the initial SIGWINCH - // at the start of this function was delivered after we called - // Notify on c1. In that case the waitSig for SIGWINCH may have - // picked up that initial SIGWINCH, and the second SIGWINCH may - // then have been delivered on the channel. This sequence of events - // may have caused issue 15661. - // So, read any possible signal from the channel now. - select { - case <-c1: - default: - } - // Send this process a SIGWINCH. It should be ignored. syscall.Kill(syscall.Getpid(), syscall.SIGWINCH) @@ -206,20 +193,24 @@ func testCancel(t *testing.T, ignore bool) { select { case s := <-c1: - t.Fatalf("unexpected signal %v", s) + t.Errorf("unexpected signal %v", s) default: // nothing to read - good } select { case s := <-c2: - t.Fatalf("unexpected signal %v", s) + t.Errorf("unexpected signal %v", s) default: // nothing to read - good } - // Reset the signal handlers for all signals. - Reset() + // One or both of the signals may have been blocked for this process + // by the calling process. + // Discard any queued signals now to avoid interfering with other tests. + Notify(c1, syscall.SIGWINCH) + Notify(c2, syscall.SIGHUP) + quiesce() } // Test that Reset cancels registration for listed signals on all channels. @@ -313,61 +304,66 @@ func TestStop(t *testing.T) { // Test the three different signals concurrently. t.Parallel() - // Send the signal. + // If the signal is not ignored, send the signal before registering a + // channel to verify the behavior of the default Go handler. // If it's SIGWINCH or SIGUSR1 we should not see it. // If it's SIGHUP, maybe we'll die. Let the flag tell us what to do. - switch sig { - case syscall.SIGHUP: - if *sendUncaughtSighup == 1 { - syscall.Kill(syscall.Getpid(), sig) - for *dieFromSighup { - quiesce() - } - } - default: + mayHaveBlockedSignal := false + if !Ignored(sig) && (sig != syscall.SIGHUP || *sendUncaughtSighup == 1) { syscall.Kill(syscall.Getpid(), sig) + quiesce() + + // We don't know whether sig is blocked for this process; see + // https://golang.org/issue/38165. Assume that it could be. + mayHaveBlockedSignal = true } - quiesce() // Ask for signal c := make(chan os.Signal, 1) Notify(c, sig) - // Send this process that signal + // Send this process the signal again. syscall.Kill(syscall.Getpid(), sig) waitSig(t, c, sig) + if mayHaveBlockedSignal { + // We may have received a queued initial signal in addition to the one + // that we sent after Notify. If so, waitSig may have observed that + // initial signal instead of the second one, and we may need to wait for + // the second signal to clear. Do that now. + quiesce() + select { + case <-c: + default: + } + } + // Stop watching for the signal and send it again. // If it's SIGHUP, maybe we'll die. Let the flag tell us what to do. Stop(c) - switch sig { - case syscall.SIGHUP: - if *sendUncaughtSighup == 2 { - syscall.Kill(syscall.Getpid(), sig) - for *dieFromSighup { - quiesce() - } - } - default: + if sig != syscall.SIGHUP || *sendUncaughtSighup == 2 { syscall.Kill(syscall.Getpid(), sig) - } + quiesce() - quiesce() - select { - case s := <-c: - if sig == syscall.SIGUSR1 && s == syscall.SIGUSR1 && runtime.GOOS == "android" { - testenv.SkipFlaky(t, 38165) + select { + case s := <-c: + t.Errorf("unexpected signal %v", s) + default: + // nothing to read - good } - t.Fatalf("unexpected signal %v", s) - default: - // nothing to read - good + + // If we're going to receive a signal, it has almost certainly been + // received by now. However, it may have been blocked for this process — + // we don't know. Explicitly unblock it and wait for it to clear now. + Notify(c, sig) + quiesce() + Stop(c) } }) } } -// Test that when run under nohup, an uncaught SIGHUP does not kill the program, -// but a +// Test that when run under nohup, an uncaught SIGHUP does not kill the program. func TestNohup(t *testing.T) { // Ugly: ask for SIGHUP so that child will not have no-hup set // even if test is running under nohup environment. -- cgit v1.2.3-54-g00ecf From faa53e17d1c91d97aa2b780ac41190d151aa6b0e Mon Sep 17 00:00:00 2001 From: Jay Conrod Date: Thu, 12 Mar 2020 18:56:40 -0400 Subject: cmd/go: add support for GOPROXY fallback on unexpected errors URLs in GOPROXY may now be separated with commas (,) or pipes (|). If a request to a proxy fails with any error (including connection errors and timeouts) and the proxy URL is followed by a pipe, the go command will try the request with the next proxy in the list. If the proxy is followed by a comma, the go command will only try the next proxy if the error a 404 or 410 HTTP response. The go command will determine how to connect to the checksum database using the same logic. Before accessing the checksum database, the go command sends a request to /sumdb//supported. If a proxy responds with 404 or 410, or if any other error occurs and the proxy URL in GOPROXY is followed by a pipe, the go command will try the request with the next proxy. If all proxies respond with 404 or 410 or are configured to fall back on errors, the go command will connect to the checksum database directly. This CL does not change the default value or meaning of GOPROXY. Fixes #37367 Change-Id: I35dd218823fe8cb9383e9ac7bbfec2cc8a358748 Reviewed-on: https://go-review.googlesource.com/c/go/+/226460 Run-TryBot: Jay Conrod TryBot-Result: Gobot Gobot Reviewed-by: Bryan C. Mills --- doc/go1.15.html | 12 +++ src/cmd/go/alldocs.go | 18 ++-- src/cmd/go/internal/modfetch/proxy.go | 109 +++++++++++++++++-------- src/cmd/go/internal/modfetch/sumdb.go | 82 ++++++++++--------- src/cmd/go/internal/modload/help.go | 18 ++-- src/cmd/go/testdata/script/mod_proxy_list.txt | 14 +++- src/cmd/go/testdata/script/mod_sumdb_proxy.txt | 17 ++++ 7 files changed, 174 insertions(+), 96 deletions(-) diff --git a/doc/go1.15.html b/doc/go1.15.html index aa951eefad..c59fc4f151 100644 --- a/doc/go1.15.html +++ b/doc/go1.15.html @@ -43,6 +43,18 @@ TODO

    Go command

    +

    + The GOPROXY environment variable now supports skipping proxies + that return errors. Proxy URLs may now be separated with either commas + (,) or pipe characters (|). If a proxy URL is + followed by a comma, the go command will only try the next proxy + in the list after a 404 or 410 HTTP response. If a proxy URL is followed by a + pipe character, the go command will try the next proxy in the + list after any error. Note that the default value of GOPROXY + remains https://proxy.golang.org,direct, which does not fall + back to direct in case of errors. +

    +

    TODO

    diff --git a/src/cmd/go/alldocs.go b/src/cmd/go/alldocs.go index ef054c8938..a20a92d03d 100644 --- a/src/cmd/go/alldocs.go +++ b/src/cmd/go/alldocs.go @@ -2694,15 +2694,15 @@ // Go module mirror run by Google and fall back to a direct connection // if the proxy reports that it does not have the module (HTTP error 404 or 410). // See https://proxy.golang.org/privacy for the service's privacy policy. -// If GOPROXY is set to the string "direct", downloads use a direct connection -// to source control servers. Setting GOPROXY to "off" disallows downloading -// modules from any source. Otherwise, GOPROXY is expected to be a comma-separated -// list of the URLs of module proxies, in which case the go command will fetch -// modules from those proxies. For each request, the go command tries each proxy -// in sequence, only moving to the next if the current proxy returns a 404 or 410 -// HTTP response. The string "direct" may appear in the proxy list, -// to cause a direct connection to be attempted at that point in the search. -// Any proxies listed after "direct" are never consulted. +// +// If GOPROXY is set to the string "direct", downloads use a direct connection to +// source control servers. Setting GOPROXY to "off" disallows downloading modules +// from any source. Otherwise, GOPROXY is expected to be list of module proxy URLs +// separated by either comma (,) or pipe (|) characters, which control error +// fallback behavior. For each request, the go command tries each proxy in +// sequence. If there is an error, the go command will try the next proxy in the +// list if the error is a 404 or 410 HTTP response or if the current proxy is +// followed by a pipe character, indicating it is safe to fall back on any error. // // The GOPRIVATE and GONOPROXY environment variables allow bypassing // the proxy for selected modules. See 'go help module-private' for details. diff --git a/src/cmd/go/internal/modfetch/proxy.go b/src/cmd/go/internal/modfetch/proxy.go index dcea71adb3..67b06cbcd6 100644 --- a/src/cmd/go/internal/modfetch/proxy.go +++ b/src/cmd/go/internal/modfetch/proxy.go @@ -101,27 +101,51 @@ cached module versions with GOPROXY=https://example.com/proxy. var proxyOnce struct { sync.Once - list []string + list []proxySpec err error } -func proxyURLs() ([]string, error) { +type proxySpec struct { + // url is the proxy URL or one of "off", "direct", "noproxy". + url string + + // fallBackOnError is true if a request should be attempted on the next proxy + // in the list after any error from this proxy. If fallBackOnError is false, + // the request will only be attempted on the next proxy if the error is + // equivalent to os.ErrNotFound, which is true for 404 and 410 responses. + fallBackOnError bool +} + +func proxyList() ([]proxySpec, error) { proxyOnce.Do(func() { if cfg.GONOPROXY != "" && cfg.GOPROXY != "direct" { - proxyOnce.list = append(proxyOnce.list, "noproxy") + proxyOnce.list = append(proxyOnce.list, proxySpec{url: "noproxy"}) } - for _, proxyURL := range strings.Split(cfg.GOPROXY, ",") { - proxyURL = strings.TrimSpace(proxyURL) - if proxyURL == "" { + + goproxy := cfg.GOPROXY + for goproxy != "" { + var url string + fallBackOnError := false + if i := strings.IndexAny(goproxy, ",|"); i >= 0 { + url = goproxy[:i] + fallBackOnError = goproxy[i] == '|' + goproxy = goproxy[i+1:] + } else { + url = goproxy + goproxy = "" + } + + url = strings.TrimSpace(url) + if url == "" { continue } - if proxyURL == "off" { + if url == "off" { // "off" always fails hard, so can stop walking list. - proxyOnce.list = append(proxyOnce.list, "off") + proxyOnce.list = append(proxyOnce.list, proxySpec{url: "off"}) break } - if proxyURL == "direct" { - proxyOnce.list = append(proxyOnce.list, "direct") + if url == "direct" { + proxyOnce.list = append(proxyOnce.list, proxySpec{url: "direct"}) // For now, "direct" is the end of the line. We may decide to add some // sort of fallback behavior for them in the future, so ignore // subsequent entries for forward-compatibility. @@ -131,18 +155,21 @@ func proxyURLs() ([]string, error) { // Single-word tokens are reserved for built-in behaviors, and anything // containing the string ":/" or matching an absolute file path must be a // complete URL. For all other paths, implicitly add "https://". - if strings.ContainsAny(proxyURL, ".:/") && !strings.Contains(proxyURL, ":/") && !filepath.IsAbs(proxyURL) && !path.IsAbs(proxyURL) { - proxyURL = "https://" + proxyURL + if strings.ContainsAny(url, ".:/") && !strings.Contains(url, ":/") && !filepath.IsAbs(url) && !path.IsAbs(url) { + url = "https://" + url } // Check that newProxyRepo accepts the URL. // It won't do anything with the path. - _, err := newProxyRepo(proxyURL, "golang.org/x/text") - if err != nil { + if _, err := newProxyRepo(url, "golang.org/x/text"); err != nil { proxyOnce.err = err return } - proxyOnce.list = append(proxyOnce.list, proxyURL) + + proxyOnce.list = append(proxyOnce.list, proxySpec{ + url: url, + fallBackOnError: fallBackOnError, + }) } }) @@ -150,15 +177,16 @@ func proxyURLs() ([]string, error) { } // TryProxies iterates f over each configured proxy (including "noproxy" and -// "direct" if applicable) until f returns an error that is not -// equivalent to os.ErrNotExist. +// "direct" if applicable) until f returns no error or until f returns an +// error that is not equivalent to os.ErrNotExist on a proxy configured +// not to fall back on errors. // // TryProxies then returns that final error. // // If GOPROXY is set to "off", TryProxies invokes f once with the argument // "off". func TryProxies(f func(proxy string) error) error { - proxies, err := proxyURLs() + proxies, err := proxyList() if err != nil { return err } @@ -166,28 +194,39 @@ func TryProxies(f func(proxy string) error) error { return f("off") } - var lastAttemptErr error + // We try to report the most helpful error to the user. "direct" and "noproxy" + // errors are best, followed by proxy errors other than ErrNotExist, followed + // by ErrNotExist. Note that errProxyOff, errNoproxy, and errUseProxy are + // equivalent to ErrNotExist. + const ( + notExistRank = iota + proxyRank + directRank + ) + var bestErr error + bestErrRank := notExistRank for _, proxy := range proxies { - err = f(proxy) - if !errors.Is(err, os.ErrNotExist) { - lastAttemptErr = err - break + err := f(proxy.url) + if err == nil { + return nil + } + isNotExistErr := errors.Is(err, os.ErrNotExist) + + if proxy.url == "direct" || proxy.url == "noproxy" { + bestErr = err + bestErrRank = directRank + } else if bestErrRank <= proxyRank && !isNotExistErr { + bestErr = err + bestErrRank = proxyRank + } else if bestErrRank == notExistRank { + bestErr = err } - // The error indicates that the module does not exist. - // In general we prefer to report the last such error, - // because it indicates the error that occurs after all other - // options have been exhausted. - // - // However, for modules in the NOPROXY list, the most useful error occurs - // first (with proxy set to "noproxy"), and the subsequent errors are all - // errNoProxy (which is not particularly helpful). Do not overwrite a more - // useful error with errNoproxy. - if lastAttemptErr == nil || !errors.Is(err, errNoproxy) { - lastAttemptErr = err + if !proxy.fallBackOnError && !isNotExistErr { + break } } - return lastAttemptErr + return bestErr } type proxyRepo struct { diff --git a/src/cmd/go/internal/modfetch/sumdb.go b/src/cmd/go/internal/modfetch/sumdb.go index 1ed71dfb85..ff81ef687e 100644 --- a/src/cmd/go/internal/modfetch/sumdb.go +++ b/src/cmd/go/internal/modfetch/sumdb.go @@ -26,6 +26,7 @@ import ( "cmd/go/internal/lockedfile" "cmd/go/internal/str" "cmd/go/internal/web" + "golang.org/x/mod/module" "golang.org/x/mod/sumdb" "golang.org/x/mod/sumdb/note" @@ -146,49 +147,50 @@ func (c *dbClient) initBase() { } // Try proxies in turn until we find out how to connect to this database. - urls, err := proxyURLs() - if err != nil { - c.baseErr = err - return - } - for _, proxyURL := range urls { - if proxyURL == "noproxy" { - continue - } - if proxyURL == "direct" || proxyURL == "off" { - break - } - proxy, err := url.Parse(proxyURL) - if err != nil { - c.baseErr = err - return - } - // Quoting https://golang.org/design/25530-sumdb#proxying-a-checksum-database: - // - // Before accessing any checksum database URL using a proxy, - // the proxy client should first fetch /sumdb//supported. - // If that request returns a successful (HTTP 200) response, then the proxy supports - // proxying checksum database requests. In that case, the client should use - // the proxied access method only, never falling back to a direct connection to the database. - // If the /sumdb//supported check fails with a “not found” (HTTP 404) - // or “gone” (HTTP 410) response, the proxy is unwilling to proxy the checksum database, - // and the client should connect directly to the database. - // Any other response is treated as the database being unavailable. - _, err = web.GetBytes(web.Join(proxy, "sumdb/"+c.name+"/supported")) - if err == nil { + // + // Before accessing any checksum database URL using a proxy, the proxy + // client should first fetch /sumdb//supported. + // + // If that request returns a successful (HTTP 200) response, then the proxy + // supports proxying checksum database requests. In that case, the client + // should use the proxied access method only, never falling back to a direct + // connection to the database. + // + // If the /sumdb//supported check fails with a “not found” (HTTP + // 404) or “gone” (HTTP 410) response, or if the proxy is configured to fall + // back on errors, the client will try the next proxy. If there are no + // proxies left or if the proxy is "direct" or "off", the client should + // connect directly to that database. + // + // Any other response is treated as the database being unavailable. + // + // See https://golang.org/design/25530-sumdb#proxying-a-checksum-database. + err := TryProxies(func(proxy string) error { + switch proxy { + case "noproxy": + return errUseProxy + case "direct", "off": + return errProxyOff + default: + proxyURL, err := url.Parse(proxy) + if err != nil { + return err + } + if _, err := web.GetBytes(web.Join(proxyURL, "sumdb/"+c.name+"/supported")); err != nil { + return err + } // Success! This proxy will help us. - c.base = web.Join(proxy, "sumdb/"+c.name) - return - } - // If the proxy serves a non-404/410, give up. - if !errors.Is(err, os.ErrNotExist) { - c.baseErr = err - return + c.base = web.Join(proxyURL, "sumdb/"+c.name) + return nil } + }) + if errors.Is(err, os.ErrNotExist) { + // No proxies, or all proxies failed (with 404, 410, or were were allowed + // to fall back), or we reached an explicit "direct" or "off". + c.base = c.direct + } else if err != nil { + c.baseErr = err } - - // No proxies, or all proxies said 404, or we reached an explicit "direct". - c.base = c.direct } // ReadConfig reads the key from c.key diff --git a/src/cmd/go/internal/modload/help.go b/src/cmd/go/internal/modload/help.go index bd19bb43aa..d80206b194 100644 --- a/src/cmd/go/internal/modload/help.go +++ b/src/cmd/go/internal/modload/help.go @@ -363,15 +363,15 @@ variable (see 'go help env'). The default setting for GOPROXY is Go module mirror run by Google and fall back to a direct connection if the proxy reports that it does not have the module (HTTP error 404 or 410). See https://proxy.golang.org/privacy for the service's privacy policy. -If GOPROXY is set to the string "direct", downloads use a direct connection -to source control servers. Setting GOPROXY to "off" disallows downloading -modules from any source. Otherwise, GOPROXY is expected to be a comma-separated -list of the URLs of module proxies, in which case the go command will fetch -modules from those proxies. For each request, the go command tries each proxy -in sequence, only moving to the next if the current proxy returns a 404 or 410 -HTTP response. The string "direct" may appear in the proxy list, -to cause a direct connection to be attempted at that point in the search. -Any proxies listed after "direct" are never consulted. + +If GOPROXY is set to the string "direct", downloads use a direct connection to +source control servers. Setting GOPROXY to "off" disallows downloading modules +from any source. Otherwise, GOPROXY is expected to be list of module proxy URLs +separated by either comma (,) or pipe (|) characters, which control error +fallback behavior. For each request, the go command tries each proxy in +sequence. If there is an error, the go command will try the next proxy in the +list if the error is a 404 or 410 HTTP response or if the current proxy is +followed by a pipe character, indicating it is safe to fall back on any error. The GOPRIVATE and GONOPROXY environment variables allow bypassing the proxy for selected modules. See 'go help module-private' for details. diff --git a/src/cmd/go/testdata/script/mod_proxy_list.txt b/src/cmd/go/testdata/script/mod_proxy_list.txt index a48622814a..849cf2c476 100644 --- a/src/cmd/go/testdata/script/mod_proxy_list.txt +++ b/src/cmd/go/testdata/script/mod_proxy_list.txt @@ -10,17 +10,25 @@ stderr '404 Not Found' env GOPROXY=$proxy/404,$proxy/410,$proxy go get rsc.io/quote@v1.1.0 -# get should not walk past other 4xx errors. +# get should not walk past other 4xx errors if proxies are separated with ','. env GOPROXY=$proxy/403,$proxy ! go get rsc.io/quote@v1.2.0 stderr 'reading.*/403/rsc.io/.*: 403 Forbidden' -# get should not walk past non-4xx errors. +# get should not walk past non-4xx errors if proxies are separated with ','. env GOPROXY=$proxy/500,$proxy ! go get rsc.io/quote@v1.3.0 stderr 'reading.*/500/rsc.io/.*: 500 Internal Server Error' -# get should return the final 404/410 if that's all we have. +# get should walk past other 4xx errors if proxies are separated with '|'. +env GOPROXY=$proxy/403|https://0.0.0.0|$proxy +go get rsc.io/quote@v1.2.0 + +# get should walk past non-4xx errors if proxies are separated with '|'. +env GOPROXY=$proxy/500|https://0.0.0.0|$proxy +go get rsc.io/quote@v1.3.0 + +# get should return the final error if that's all we have. env GOPROXY=$proxy/404,$proxy/410 ! go get rsc.io/quote@v1.4.0 stderr 'reading.*/410/rsc.io/.*: 410 Gone' diff --git a/src/cmd/go/testdata/script/mod_sumdb_proxy.txt b/src/cmd/go/testdata/script/mod_sumdb_proxy.txt index 28166913fd..7bbc3f9e19 100644 --- a/src/cmd/go/testdata/script/mod_sumdb_proxy.txt +++ b/src/cmd/go/testdata/script/mod_sumdb_proxy.txt @@ -46,5 +46,22 @@ stderr '503 Service Unavailable' rm $GOPATH/pkg/mod/cache/download/sumdb rm go.sum +# the error from the last attempted proxy should be returned. +cp go.mod.orig go.mod +env GOSUMDB=$sumdb +env GOPROXY=$proxy/sumdb-404,$proxy/sumdb-503 +! go get -d rsc.io/fortune@v1.0.0 +stderr '503 Service Unavailable' +rm $GOPATH/pkg/mod/cache/download/sumdb +rm go.sum + +# if proxies are separated with '|', fallback is allowed on any error. +cp go.mod.orig go.mod +env GOSUMDB=$sumdb +env GOPROXY=$proxy/sumdb-503|https://0.0.0.0|$proxy +go get -d rsc.io/fortune@v1.0.0 +rm $GOPATH/pkg/mod/cache/download/sumdb +rm go.sum + -- go.mod.orig -- module m -- cgit v1.2.3-54-g00ecf From 34314280e46da1558bc7f9cd7e8a9ed610cf417b Mon Sep 17 00:00:00 2001 From: Matthew Dempsky Date: Fri, 27 Mar 2020 14:33:54 -0700 Subject: cmd/compile: fix constant conversion involving complex types In CL 187657, I refactored constant conversion logic without realizing that conversions between int/float and complex types are allowed for constants (assuming the constant values are representable by the destination type), but are never allowed for non-constant expressions. This CL expands convertop to take an extra srcConstant parameter to indicate whether the source expression is a constant; and if so, to allow any numeric-to-numeric conversion. (Conversions of values that cannot be represented in the destination type are rejected by evconst.) Fixes #38117. Change-Id: Id7077d749a14c8fd910be38da170fa5254819f2b Reviewed-on: https://go-review.googlesource.com/c/go/+/226197 Run-TryBot: Matthew Dempsky TryBot-Result: Gobot Gobot Reviewed-by: Robert Griesemer --- src/cmd/compile/internal/gc/subr.go | 12 ++++++++++-- src/cmd/compile/internal/gc/typecheck.go | 2 +- test/fixedbugs/issue38117.go | 17 +++++++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 test/fixedbugs/issue38117.go diff --git a/src/cmd/compile/internal/gc/subr.go b/src/cmd/compile/internal/gc/subr.go index 0a2a11663e..7c1ab89b78 100644 --- a/src/cmd/compile/internal/gc/subr.go +++ b/src/cmd/compile/internal/gc/subr.go @@ -542,7 +542,7 @@ func methtype(t *types.Type) *types.Type { // Is type src assignment compatible to type dst? // If so, return op code to use in conversion. // If not, return OXXX. -func assignop(src *types.Type, dst *types.Type, why *string) Op { +func assignop(src, dst *types.Type, why *string) Op { if why != nil { *why = "" } @@ -665,7 +665,8 @@ func assignop(src *types.Type, dst *types.Type, why *string) Op { // Can we convert a value of type src to a value of type dst? // If so, return op code to use in conversion (maybe OCONVNOP). // If not, return OXXX. -func convertop(src *types.Type, dst *types.Type, why *string) Op { +// srcConstant indicates whether the value of type src is a constant. +func convertop(srcConstant bool, src, dst *types.Type, why *string) Op { if why != nil { *why = "" } @@ -741,6 +742,13 @@ func convertop(src *types.Type, dst *types.Type, why *string) Op { return OCONV } + // Special case for constant conversions: any numeric + // conversion is potentially okay. We'll validate further + // within evconst. See #38117. + if srcConstant && (src.IsInteger() || src.IsFloat() || src.IsComplex()) && (dst.IsInteger() || dst.IsFloat() || dst.IsComplex()) { + return OCONV + } + // 6. src is an integer or has type []byte or []rune // and dst is a string type. if src.IsInteger() && dst.IsString() { diff --git a/src/cmd/compile/internal/gc/typecheck.go b/src/cmd/compile/internal/gc/typecheck.go index 2ca7f2dbe4..6e04908b46 100644 --- a/src/cmd/compile/internal/gc/typecheck.go +++ b/src/cmd/compile/internal/gc/typecheck.go @@ -1634,7 +1634,7 @@ func typecheck1(n *Node, top int) (res *Node) { return n } var why string - n.Op = convertop(t, n.Type, &why) + n.Op = convertop(n.Left.Op == OLITERAL, t, n.Type, &why) if n.Op == 0 { if !n.Diag() && !n.Type.Broke() && !n.Left.Diag() { yyerror("cannot convert %L to type %v%s", n.Left, n.Type, why) diff --git a/test/fixedbugs/issue38117.go b/test/fixedbugs/issue38117.go new file mode 100644 index 0000000000..11edef7f25 --- /dev/null +++ b/test/fixedbugs/issue38117.go @@ -0,0 +1,17 @@ +// errorcheck + +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// cmd/compile erroneously rejected conversions of constant values +// between int/float and complex types. + +package p + +const ( + _ = int(complex64(int(0))) + _ = float64(complex128(float64(0))) + + _ = int8(complex128(1000)) // ERROR "overflow" +) -- cgit v1.2.3-54-g00ecf From 8114242359a32dbbfe44cf6cc83c48cca7d6c126 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Tue, 24 Mar 2020 22:14:02 -0700 Subject: cmd/compile, runtime: use more registers for amd64 write barrier calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compiler-inserted write barrier calls use a special ABI for speed and to minimize the binary size impact. runtime.gcWriteBarrier takes its args in DI and AX. This change adds gcWriteBarrier wrapper functions, varying only in the register used for the second argument. (Allowing variation in the first argument doesn't offer improvements, which is convenient, as it avoids quadratic API growth.) This reduces the number of register copies. The goals are reduced binary size via reduced register pressure/copies. One downside to this change is that when the write barrier is on, we may bounce through several different write barrier wrappers, which is bad for the instruction cache. Package runtime write barrier benchmarks for this change: name old time/op new time/op delta WriteBarrier-8 16.6ns ± 6% 15.6ns ± 6% -5.73% (p=0.000 n=97+99) BulkWriteBarrier-8 4.37ns ± 7% 4.22ns ± 8% -3.45% (p=0.000 n=96+99) However, I don't particularly trust these numbers. I ran runtime.BenchmarkWriteBarrier multiple times as I rebased this change, and noticed that the results have high variance depending on the parent change, perhaps due to aligment. This change was stress tested with GOGC=1 GODEBUG=gccheckmark=1 go test std. This change reduces binary sizes: file before after Δ % addr2line 4308720 4296688 -12032 -0.279% api 5965592 5945368 -20224 -0.339% asm 5148088 5025464 -122624 -2.382% buildid 2848760 2844904 -3856 -0.135% cgo 4828968 4812840 -16128 -0.334% compile 19754720 19529744 -224976 -1.139% cover 5256840 5236600 -20240 -0.385% dist 3670312 3658264 -12048 -0.328% doc 4669608 4657576 -12032 -0.258% fix 3377976 3365944 -12032 -0.356% link 6614888 6586472 -28416 -0.430% nm 4258368 4254528 -3840 -0.090% objdump 4656336 4644304 -12032 -0.258% pack 2295176 2295432 +256 +0.011% pprof 14762356 14709364 -52992 -0.359% test2json 2824456 2820600 -3856 -0.137% trace 11684404 11643700 -40704 -0.348% vet 8284760 8252248 -32512 -0.392% total 115210328 114580040 -630288 -0.547% This change improves compiler performance: name old time/op new time/op delta Template 208ms ± 3% 207ms ± 3% -0.40% (p=0.030 n=43+44) Unicode 80.2ms ± 3% 81.3ms ± 3% +1.25% (p=0.000 n=41+44) GoTypes 699ms ± 3% 694ms ± 2% -0.71% (p=0.016 n=42+37) Compiler 3.26s ± 2% 3.23s ± 2% -0.86% (p=0.000 n=43+45) SSA 6.97s ± 1% 6.93s ± 1% -0.63% (p=0.000 n=43+45) Flate 134ms ± 3% 133ms ± 2% ~ (p=0.139 n=45+42) GoParser 165ms ± 2% 164ms ± 1% -0.79% (p=0.000 n=45+40) Reflect 434ms ± 4% 435ms ± 4% ~ (p=0.937 n=44+44) Tar 181ms ± 2% 181ms ± 2% ~ (p=0.702 n=43+45) XML 244ms ± 2% 244ms ± 2% ~ (p=0.237 n=45+44) [Geo mean] 403ms 402ms -0.29% name old user-time/op new user-time/op delta Template 271ms ± 2% 268ms ± 1% -1.40% (p=0.000 n=42+42) Unicode 117ms ± 3% 116ms ± 5% ~ (p=0.066 n=45+45) GoTypes 948ms ± 2% 936ms ± 2% -1.30% (p=0.000 n=41+40) Compiler 4.26s ± 1% 4.21s ± 2% -1.25% (p=0.000 n=37+45) SSA 9.52s ± 2% 9.41s ± 1% -1.18% (p=0.000 n=44+45) Flate 167ms ± 2% 165ms ± 2% -1.15% (p=0.000 n=44+41) GoParser 201ms ± 2% 198ms ± 1% -1.40% (p=0.000 n=43+43) Reflect 563ms ± 8% 560ms ± 7% ~ (p=0.206 n=45+44) Tar 224ms ± 2% 222ms ± 2% -0.81% (p=0.000 n=45+45) XML 308ms ± 2% 304ms ± 1% -1.17% (p=0.000 n=42+43) [Geo mean] 525ms 519ms -1.08% name old alloc/op new alloc/op delta Template 36.3MB ± 0% 36.3MB ± 0% ~ (p=0.421 n=5+5) Unicode 28.4MB ± 0% 28.3MB ± 0% ~ (p=0.056 n=5+5) GoTypes 121MB ± 0% 121MB ± 0% -0.14% (p=0.008 n=5+5) Compiler 567MB ± 0% 567MB ± 0% -0.06% (p=0.016 n=4+5) SSA 1.26GB ± 0% 1.26GB ± 0% -0.07% (p=0.008 n=5+5) Flate 22.9MB ± 0% 22.8MB ± 0% ~ (p=0.310 n=5+5) GoParser 28.0MB ± 0% 27.9MB ± 0% -0.09% (p=0.008 n=5+5) Reflect 78.4MB ± 0% 78.4MB ± 0% -0.03% (p=0.008 n=5+5) Tar 34.2MB ± 0% 34.2MB ± 0% -0.05% (p=0.008 n=5+5) XML 44.4MB ± 0% 44.4MB ± 0% -0.04% (p=0.016 n=5+5) [Geo mean] 76.4MB 76.3MB -0.05% name old allocs/op new allocs/op delta Template 356k ± 0% 356k ± 0% -0.13% (p=0.008 n=5+5) Unicode 326k ± 0% 326k ± 0% -0.07% (p=0.008 n=5+5) GoTypes 1.24M ± 0% 1.24M ± 0% -0.24% (p=0.008 n=5+5) Compiler 5.30M ± 0% 5.28M ± 0% -0.34% (p=0.008 n=5+5) SSA 11.9M ± 0% 11.9M ± 0% -0.16% (p=0.008 n=5+5) Flate 226k ± 0% 225k ± 0% -0.12% (p=0.008 n=5+5) GoParser 287k ± 0% 286k ± 0% -0.29% (p=0.008 n=5+5) Reflect 930k ± 0% 929k ± 0% -0.05% (p=0.008 n=5+5) Tar 332k ± 0% 331k ± 0% -0.12% (p=0.008 n=5+5) XML 411k ± 0% 411k ± 0% -0.12% (p=0.008 n=5+5) [Geo mean] 771k 770k -0.16% For some packages, this change significantly reduces the size of executable text. Examples: file before after Δ % cmd/internal/obj/arm.s 68658 66855 -1803 -2.626% cmd/internal/obj/mips.s 57486 56272 -1214 -2.112% cmd/internal/obj/arm64.s 152107 147163 -4944 -3.250% cmd/internal/obj/ppc64.s 125544 120456 -5088 -4.053% cmd/vendor/golang.org/x/tools/go/cfg.s 31699 30742 -957 -3.019% Full listing: file before after Δ % container/ring.s 1890 1870 -20 -1.058% container/list.s 5366 5390 +24 +0.447% internal/cpu.s 3298 3295 -3 -0.091% internal/testlog.s 1507 1501 -6 -0.398% image/color.s 8281 8248 -33 -0.399% runtime.s 480970 480075 -895 -0.186% sync.s 16497 16408 -89 -0.539% internal/singleflight.s 2591 2577 -14 -0.540% math/rand.s 10456 10438 -18 -0.172% cmd/go/internal/par.s 2801 2790 -11 -0.393% internal/reflectlite.s 28477 28417 -60 -0.211% errors.s 2750 2736 -14 -0.509% internal/oserror.s 446 434 -12 -2.691% sort.s 17061 17046 -15 -0.088% io.s 17063 16999 -64 -0.375% vendor/golang.org/x/crypto/hkdf.s 1962 1936 -26 -1.325% text/tabwriter.s 9617 9574 -43 -0.447% hash/crc64.s 3414 3408 -6 -0.176% hash/crc32.s 6657 6651 -6 -0.090% bytes.s 31932 31863 -69 -0.216% strconv.s 53158 52799 -359 -0.675% strings.s 42829 42665 -164 -0.383% encoding/ascii85.s 4833 4791 -42 -0.869% vendor/golang.org/x/text/transform.s 16810 16724 -86 -0.512% path.s 6848 6845 -3 -0.044% encoding/base32.s 9658 9592 -66 -0.683% bufio.s 23051 22908 -143 -0.620% compress/bzip2.s 11773 11764 -9 -0.076% image.s 37565 37502 -63 -0.168% syscall.s 82359 82279 -80 -0.097% regexp/syntax.s 83573 82930 -643 -0.769% image/jpeg.s 36535 36490 -45 -0.123% regexp.s 64396 64214 -182 -0.283% time.s 82724 82622 -102 -0.123% plugin.s 6539 6536 -3 -0.046% context.s 10959 10865 -94 -0.858% internal/poll.s 24286 24270 -16 -0.066% reflect.s 168304 167927 -377 -0.224% internal/fmtsort.s 7416 7376 -40 -0.539% os.s 52465 51787 -678 -1.292% cmd/go/internal/lockedfile/internal/filelock.s 2326 2317 -9 -0.387% os/signal.s 4657 4648 -9 -0.193% runtime/debug.s 6040 5998 -42 -0.695% encoding/binary.s 30838 30801 -37 -0.120% vendor/golang.org/x/net/route.s 23694 23491 -203 -0.857% path/filepath.s 17895 17889 -6 -0.034% cmd/vendor/golang.org/x/sys/unix.s 78125 78109 -16 -0.020% io/ioutil.s 6999 6996 -3 -0.043% encoding/base64.s 12094 12007 -87 -0.719% crypto/cipher.s 20466 20372 -94 -0.459% cmd/go/internal/robustio.s 2672 2669 -3 -0.112% encoding/pem.s 9302 9286 -16 -0.172% internal/obscuretestdata.s 1719 1695 -24 -1.396% crypto/aes.s 11014 11002 -12 -0.109% os/exec.s 29388 29231 -157 -0.534% cmd/internal/browser.s 2266 2260 -6 -0.265% internal/goroot.s 4601 4592 -9 -0.196% vendor/golang.org/x/crypto/chacha20poly1305.s 8945 8942 -3 -0.034% cmd/vendor/golang.org/x/crypto/ssh/terminal.s 27226 27195 -31 -0.114% index/suffixarray.s 36431 36411 -20 -0.055% fmt.s 77017 76709 -308 -0.400% encoding/hex.s 6241 6154 -87 -1.394% compress/lzw.s 7133 7069 -64 -0.897% database/sql/driver.s 18888 18877 -11 -0.058% net/url.s 29838 29739 -99 -0.332% debug/plan9obj.s 8329 8279 -50 -0.600% encoding/csv.s 12986 12902 -84 -0.647% debug/gosym.s 25403 25330 -73 -0.287% compress/flate.s 51192 50970 -222 -0.434% vendor/golang.org/x/net/dns/dnsmessage.s 86769 86208 -561 -0.647% compress/gzip.s 9791 9758 -33 -0.337% compress/zlib.s 7310 7277 -33 -0.451% archive/zip.s 42356 42166 -190 -0.449% debug/dwarf.s 108259 107730 -529 -0.489% encoding/json.s 106378 105910 -468 -0.440% os/user.s 14751 14724 -27 -0.183% database/sql.s 99011 98404 -607 -0.613% log.s 9466 9423 -43 -0.454% debug/pe.s 31272 31182 -90 -0.288% debug/macho.s 32764 32608 -156 -0.476% encoding/gob.s 136976 136517 -459 -0.335% vendor/golang.org/x/text/unicode/bidi.s 27318 27276 -42 -0.154% archive/tar.s 71416 70975 -441 -0.618% vendor/golang.org/x/net/http2/hpack.s 23892 23848 -44 -0.184% vendor/golang.org/x/text/secure/bidirule.s 3354 3351 -3 -0.089% mime/quotedprintable.s 5960 5925 -35 -0.587% net/http/internal.s 5874 5853 -21 -0.358% math/big.s 184147 183692 -455 -0.247% debug/elf.s 63775 63567 -208 -0.326% mime.s 39802 39709 -93 -0.234% encoding/xml.s 111038 110713 -325 -0.293% crypto/dsa.s 6044 6029 -15 -0.248% go/token.s 12139 12077 -62 -0.511% crypto/rand.s 6889 6866 -23 -0.334% go/scanner.s 19030 19008 -22 -0.116% flag.s 22320 22236 -84 -0.376% vendor/golang.org/x/text/unicode/norm.s 66652 66391 -261 -0.392% crypto/rsa.s 31671 31650 -21 -0.066% crypto/elliptic.s 51553 51403 -150 -0.291% internal/xcoff.s 22950 22822 -128 -0.558% go/constant.s 43750 43689 -61 -0.139% encoding/asn1.s 57086 57035 -51 -0.089% runtime/trace.s 2609 2603 -6 -0.230% crypto/x509/pkix.s 10458 10471 +13 +0.124% image/gif.s 27544 27385 -159 -0.577% vendor/golang.org/x/net/idna.s 24558 24502 -56 -0.228% image/png.s 42775 42685 -90 -0.210% vendor/golang.org/x/crypto/cryptobyte.s 33616 33493 -123 -0.366% go/ast.s 80684 80449 -235 -0.291% net/internal/socktest.s 16571 16535 -36 -0.217% crypto/ecdsa.s 11948 11936 -12 -0.100% text/template/parse.s 95138 94002 -1136 -1.194% runtime/pprof.s 59702 59639 -63 -0.106% testing.s 68427 68088 -339 -0.495% internal/testenv.s 5620 5596 -24 -0.427% testing/internal/testdeps.s 3312 3294 -18 -0.543% internal/trace.s 78473 78239 -234 -0.298% testing/iotest.s 4968 4908 -60 -1.208% os/signal/internal/pty.s 3011 2990 -21 -0.697% testing/quick.s 12179 12125 -54 -0.443% cmd/internal/bio.s 9286 9274 -12 -0.129% cmd/internal/src.s 17684 17663 -21 -0.119% cmd/internal/goobj2.s 12588 12558 -30 -0.238% cmd/internal/objabi.s 16408 16390 -18 -0.110% go/printer.s 77417 77308 -109 -0.141% go/parser.s 80045 79113 -932 -1.164% go/format.s 5434 5419 -15 -0.276% cmd/internal/goobj.s 26146 25954 -192 -0.734% runtime/pprof/internal/profile.s 102518 102178 -340 -0.332% text/template.s 95343 94935 -408 -0.428% cmd/internal/dwarf.s 31718 31572 -146 -0.460% cmd/vendor/golang.org/x/arch/arm/armasm.s 45240 45151 -89 -0.197% internal/lazytemplate.s 1470 1457 -13 -0.884% cmd/vendor/golang.org/x/arch/ppc64/ppc64asm.s 37253 37220 -33 -0.089% cmd/asm/internal/flags.s 2593 2590 -3 -0.116% cmd/asm/internal/lex.s 25068 24921 -147 -0.586% cmd/internal/buildid.s 18536 18263 -273 -1.473% cmd/vendor/golang.org/x/arch/x86/x86asm.s 80209 80105 -104 -0.130% go/doc.s 75140 74585 -555 -0.739% cmd/internal/edit.s 3893 3899 +6 +0.154% html/template.s 89377 88809 -568 -0.636% cmd/vendor/golang.org/x/arch/arm64/arm64asm.s 117998 117824 -174 -0.147% cmd/internal/obj.s 115015 114290 -725 -0.630% go/build.s 69379 68862 -517 -0.745% cmd/internal/objfile.s 48106 47982 -124 -0.258% cmd/cover.s 46239 46113 -126 -0.272% cmd/addr2line.s 2845 2833 -12 -0.422% cmd/internal/obj/arm.s 68658 66855 -1803 -2.626% cmd/internal/obj/mips.s 57486 56272 -1214 -2.112% cmd/internal/obj/riscv.s 63834 63006 -828 -1.297% cmd/compile/internal/syntax.s 146582 145456 -1126 -0.768% cmd/internal/obj/wasm.s 44117 44066 -51 -0.116% cmd/cgo.s 242645 241653 -992 -0.409% cmd/internal/obj/arm64.s 152107 147163 -4944 -3.250% net.s 295972 292010 -3962 -1.339% go/types.s 321371 319432 -1939 -0.603% vendor/golang.org/x/net/http/httpproxy.s 9450 9423 -27 -0.286% net/textproto.s 19455 19406 -49 -0.252% cmd/internal/obj/ppc64.s 125544 120456 -5088 -4.053% go/internal/srcimporter.s 6475 6409 -66 -1.019% log/syslog.s 8017 7929 -88 -1.098% cmd/compile/internal/logopt.s 10183 10162 -21 -0.206% net/mail.s 24085 23948 -137 -0.569% mime/multipart.s 21527 21420 -107 -0.497% cmd/internal/obj/s390x.s 127610 127757 +147 +0.115% go/internal/gcimporter.s 34913 34548 -365 -1.045% vendor/golang.org/x/net/nettest.s 28103 28016 -87 -0.310% cmd/go/internal/cfg.s 9967 9916 -51 -0.512% cmd/api.s 39703 39603 -100 -0.252% go/internal/gccgoimporter.s 56470 56120 -350 -0.620% go/importer.s 2077 2056 -21 -1.011% cmd/compile/internal/types.s 48202 47282 -920 -1.909% cmd/go/internal/str.s 4341 4320 -21 -0.484% cmd/internal/obj/x86.s 89440 88625 -815 -0.911% cmd/go/internal/base.s 12667 12580 -87 -0.687% cmd/go/internal/cache.s 30754 30571 -183 -0.595% cmd/doc.s 62976 62755 -221 -0.351% cmd/go/internal/search.s 20114 19993 -121 -0.602% cmd/vendor/golang.org/x/xerrors.s 17923 17855 -68 -0.379% cmd/go/internal/lockedfile.s 16451 16415 -36 -0.219% cmd/vendor/golang.org/x/mod/sumdb/note.s 18200 18150 -50 -0.275% cmd/vendor/golang.org/x/mod/module.s 17869 17851 -18 -0.101% cmd/asm/internal/arch.s 37533 37482 -51 -0.136% cmd/fix.s 87728 87492 -236 -0.269% cmd/vendor/golang.org/x/mod/sumdb/tlog.s 36394 36367 -27 -0.074% cmd/vendor/golang.org/x/mod/sumdb/dirhash.s 4990 4963 -27 -0.541% cmd/go/internal/imports.s 16499 16469 -30 -0.182% cmd/vendor/golang.org/x/mod/zip.s 18816 18745 -71 -0.377% cmd/go/internal/cmdflag.s 5126 5123 -3 -0.059% cmd/internal/test2json.s 9540 9452 -88 -0.922% cmd/go/internal/tool.s 3629 3623 -6 -0.165% cmd/go/internal/version.s 11232 11220 -12 -0.107% cmd/go/internal/mvs.s 25383 25179 -204 -0.804% cmd/nm.s 5815 5803 -12 -0.206% cmd/dist.s 210146 209140 -1006 -0.479% cmd/asm/internal/asm.s 68655 68549 -106 -0.154% cmd/vendor/golang.org/x/mod/modfile.s 72974 72510 -464 -0.636% cmd/go/internal/load.s 107548 106861 -687 -0.639% cmd/link/internal/sym.s 18708 18581 -127 -0.679% cmd/asm.s 3367 3343 -24 -0.713% cmd/gofmt.s 30795 30698 -97 -0.315% cmd/link/internal/objfile.s 21828 21630 -198 -0.907% cmd/pack.s 14878 14869 -9 -0.060% cmd/vendor/github.com/google/pprof/internal/elfexec.s 6788 6782 -6 -0.088% cmd/test2json.s 1647 1641 -6 -0.364% cmd/link/internal/loader.s 48677 48483 -194 -0.399% cmd/vendor/golang.org/x/tools/go/analysis/internal/analysisflags.s 16783 16773 -10 -0.060% cmd/link/internal/loadelf.s 35464 35126 -338 -0.953% cmd/link/internal/loadmacho.s 29438 29180 -258 -0.876% cmd/link/internal/loadpe.s 16440 16371 -69 -0.420% cmd/vendor/golang.org/x/tools/go/analysis/passes/internal/analysisutil.s 2106 2100 -6 -0.285% cmd/link/internal/loadxcoff.s 11711 11615 -96 -0.820% cmd/vendor/golang.org/x/tools/go/analysis/internal/facts.s 14954 14883 -71 -0.475% cmd/vendor/golang.org/x/tools/go/ast/inspector.s 5394 5374 -20 -0.371% cmd/vendor/golang.org/x/tools/go/analysis/passes/asmdecl.s 37029 36822 -207 -0.559% cmd/vendor/golang.org/x/tools/go/analysis/passes/inspect.s 340 337 -3 -0.882% cmd/vendor/golang.org/x/tools/go/analysis/passes/cgocall.s 9919 9858 -61 -0.615% cmd/vendor/golang.org/x/tools/go/analysis/passes/bools.s 6705 6690 -15 -0.224% cmd/vendor/golang.org/x/tools/go/analysis/passes/copylock.s 9783 9741 -42 -0.429% cmd/vendor/golang.org/x/tools/go/cfg.s 31699 30742 -957 -3.019% cmd/vendor/golang.org/x/tools/go/analysis/passes/ifaceassert.s 2768 2762 -6 -0.217% cmd/vendor/golang.org/x/tools/go/analysis/passes/loopclosure.s 3031 2998 -33 -1.089% cmd/vendor/golang.org/x/tools/go/analysis/passes/shift.s 4382 4376 -6 -0.137% cmd/vendor/golang.org/x/tools/go/analysis/passes/stdmethods.s 8654 8642 -12 -0.139% cmd/vendor/golang.org/x/tools/go/analysis/passes/stringintconv.s 3458 3446 -12 -0.347% cmd/vendor/golang.org/x/tools/go/analysis/passes/structtag.s 8011 7995 -16 -0.200% cmd/vendor/golang.org/x/tools/go/analysis/passes/tests.s 6205 6193 -12 -0.193% cmd/vendor/golang.org/x/tools/go/ast/astutil.s 66183 65861 -322 -0.487% cmd/vendor/github.com/google/pprof/profile.s 150844 150261 -583 -0.386% cmd/vendor/golang.org/x/tools/go/analysis/passes/unreachable.s 8057 8054 -3 -0.037% cmd/vendor/golang.org/x/tools/go/analysis/passes/unusedresult.s 3670 3667 -3 -0.082% cmd/vendor/github.com/google/pprof/internal/measurement.s 10464 10440 -24 -0.229% cmd/vendor/golang.org/x/tools/go/types/typeutil.s 12319 12274 -45 -0.365% cmd/vendor/golang.org/x/tools/go/analysis/unitchecker.s 13503 13342 -161 -1.192% cmd/vendor/golang.org/x/tools/go/analysis/passes/ctrlflow.s 5261 5218 -43 -0.817% cmd/vendor/golang.org/x/tools/go/analysis/passes/errorsas.s 1462 1459 -3 -0.205% cmd/vendor/golang.org/x/tools/go/analysis/passes/lostcancel.s 9594 9582 -12 -0.125% cmd/vendor/golang.org/x/tools/go/analysis/passes/printf.s 34397 34338 -59 -0.172% cmd/vendor/github.com/google/pprof/internal/graph.s 53225 52936 -289 -0.543% cmd/vendor/github.com/ianlancetaylor/demangle.s 177450 175329 -2121 -1.195% crypto/x509.s 147892 147388 -504 -0.341% cmd/go/internal/work.s 306465 304950 -1515 -0.494% cmd/go/internal/run.s 4664 4657 -7 -0.150% crypto/tls.s 313130 311833 -1297 -0.414% net/http/httptrace.s 3979 3905 -74 -1.860% net/smtp.s 14413 14344 -69 -0.479% cmd/link/internal/ld.s 545343 542279 -3064 -0.562% cmd/link/internal/mips.s 6218 6215 -3 -0.048% cmd/link/internal/mips64.s 6108 6103 -5 -0.082% cmd/link/internal/amd64.s 18154 18112 -42 -0.231% cmd/link/internal/arm64.s 22527 22494 -33 -0.146% cmd/link/internal/arm.s 22574 22494 -80 -0.354% cmd/link/internal/s390x.s 20779 20746 -33 -0.159% cmd/link/internal/wasm.s 16531 16493 -38 -0.230% cmd/link/internal/x86.s 18906 18849 -57 -0.301% cmd/link/internal/ppc64.s 26856 26778 -78 -0.290% net/http.s 559101 556513 -2588 -0.463% net/http/cookiejar.s 15912 15885 -27 -0.170% expvar.s 9531 9525 -6 -0.063% net/http/httptest.s 16616 16475 -141 -0.849% net/http/cgi.s 23624 23458 -166 -0.703% cmd/go/internal/web.s 16546 16489 -57 -0.344% cmd/vendor/golang.org/x/mod/sumdb.s 33197 33117 -80 -0.241% net/http/fcgi.s 19266 19169 -97 -0.503% net/http/httputil.s 39875 39728 -147 -0.369% cmd/vendor/github.com/google/pprof/internal/symbolz.s 5888 5867 -21 -0.357% net/rpc.s 34154 34003 -151 -0.442% cmd/vendor/github.com/google/pprof/internal/transport.s 2746 2716 -30 -1.092% cmd/vendor/github.com/google/pprof/internal/binutils.s 35999 35875 -124 -0.344% net/rpc/jsonrpc.s 6637 6598 -39 -0.588% cmd/vendor/github.com/google/pprof/internal/symbolizer.s 11533 11458 -75 -0.650% cmd/go/internal/get.s 62921 62803 -118 -0.188% cmd/vendor/github.com/google/pprof/internal/report.s 80364 80058 -306 -0.381% cmd/go/internal/modfetch/codehost.s 89680 89066 -614 -0.685% cmd/trace.s 117171 116701 -470 -0.401% cmd/vendor/github.com/google/pprof/internal/driver.s 144268 143297 -971 -0.673% cmd/go/internal/modfetch.s 126299 125860 -439 -0.348% cmd/vendor/github.com/google/pprof/driver.s 9042 9000 -42 -0.464% cmd/go/internal/modconv.s 17947 17889 -58 -0.323% cmd/pprof.s 12399 12326 -73 -0.589% cmd/go/internal/modload.s 151182 150389 -793 -0.525% cmd/go/internal/generate.s 11738 11636 -102 -0.869% cmd/go/internal/help.s 6571 6531 -40 -0.609% cmd/go/internal/clean.s 11174 11142 -32 -0.286% cmd/go/internal/vet.s 7897 7867 -30 -0.380% cmd/go/internal/envcmd.s 22176 22095 -81 -0.365% cmd/go/internal/list.s 15216 15067 -149 -0.979% cmd/go/internal/modget.s 38698 38519 -179 -0.463% cmd/go/internal/modcmd.s 46674 46441 -233 -0.499% cmd/go/internal/test.s 64664 64456 -208 -0.322% cmd/go.s 6730 6703 -27 -0.401% cmd/compile/internal/ssa.s 3592565 3582500 -10065 -0.280% cmd/compile/internal/gc.s 1549123 1537123 -12000 -0.775% cmd/compile/internal/riscv64.s 14579 14483 -96 -0.658% cmd/compile/internal/mips.s 20578 20419 -159 -0.773% cmd/compile/internal/ppc64.s 25524 25359 -165 -0.646% cmd/compile/internal/mips64.s 19795 19636 -159 -0.803% cmd/compile/internal/wasm.s 13329 13290 -39 -0.293% cmd/compile/internal/s390x.s 28097 27892 -205 -0.730% cmd/compile/internal/arm.s 31489 31321 -168 -0.534% cmd/compile/internal/arm64.s 29803 29590 -213 -0.715% cmd/compile/internal/amd64.s 32961 33221 +260 +0.789% cmd/compile/internal/x86.s 31029 30878 -151 -0.487% total 18534966 18440341 -94625 -0.511% Change-Id: I830d37364f14f0297800adc42c99f60a74c51aca Reviewed-on: https://go-review.googlesource.com/c/go/+/226367 Run-TryBot: Josh Bleecher Snyder TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/cmd/compile/internal/amd64/ssa.go | 3 +- src/cmd/compile/internal/gc/go.go | 3 ++ src/cmd/compile/internal/gc/ssa.go | 15 +++++++++ src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 2 +- src/cmd/compile/internal/ssa/opGen.go | 2 +- src/runtime/asm_amd64.s | 49 ++++++++++++++++++++++++++++ src/runtime/stubs_amd64.go | 9 +++++ test/codegen/structs.go | 2 +- 8 files changed, 81 insertions(+), 4 deletions(-) diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index b6c1039d9e..5d79095025 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -947,7 +947,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p := s.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN - p.To.Sym = v.Aux.(*obj.LSym) + // arg0 is in DI. Set sym to match where regalloc put arg1. + p.To.Sym = gc.GCWriteBarrierReg[v.Args[1].Reg()] case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC: p := s.Prog(obj.ACALL) diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go index 85c857c214..d2a1b21cbd 100644 --- a/src/cmd/compile/internal/gc/go.go +++ b/src/cmd/compile/internal/gc/go.go @@ -334,3 +334,6 @@ var ( WasmTruncU, SigPanic *obj.LSym ) + +// GCWriteBarrierReg maps from registers to gcWriteBarrier implementation LSyms. +var GCWriteBarrierReg map[int16]*obj.LSym diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index b7dc511fd3..00587aa3bf 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -16,6 +16,7 @@ import ( "cmd/compile/internal/ssa" "cmd/compile/internal/types" "cmd/internal/obj" + "cmd/internal/obj/x86" "cmd/internal/objabi" "cmd/internal/src" "cmd/internal/sys" @@ -104,6 +105,20 @@ func initssaconfig() { writeBarrier = sysvar("writeBarrier") // struct { bool; ... } zerobaseSym = sysvar("zerobase") + // asm funcs with special ABI + if thearch.LinkArch.Name == "amd64" { + GCWriteBarrierReg = map[int16]*obj.LSym{ + x86.REG_AX: sysvar("gcWriteBarrier"), + x86.REG_CX: sysvar("gcWriteBarrierCX"), + x86.REG_DX: sysvar("gcWriteBarrierDX"), + x86.REG_BX: sysvar("gcWriteBarrierBX"), + x86.REG_BP: sysvar("gcWriteBarrierBP"), + x86.REG_SI: sysvar("gcWriteBarrierSI"), + x86.REG_R8: sysvar("gcWriteBarrierR8"), + x86.REG_R9: sysvar("gcWriteBarrierR9"), + } + } + if thearch.LinkArch.Family == sys.Wasm { BoundsCheckFunc[ssa.BoundsIndex] = sysvar("goPanicIndex") BoundsCheckFunc[ssa.BoundsIndexU] = sysvar("goPanicIndexU") diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 08aa65b0a8..74cdf0283b 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -717,7 +717,7 @@ func init() { {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true}, // LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier // It saves all GP registers if necessary, but may clobber others. - {name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), ax}, clobbers: callerSave &^ gp}, clobberFlags: true, aux: "Sym", symEffect: "None"}, + {name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), buildReg("AX CX DX BX BP SI R8 R9")}, clobbers: callerSave &^ gp}, clobberFlags: true, aux: "Sym", symEffect: "None"}, // There are three of these functions so that they can have three different register inputs. // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index e2b83e20b3..5e91856e48 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -11420,7 +11420,7 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {0, 128}, // DI - {1, 1}, // AX + {1, 879}, // AX CX DX BX BP SI R8 R9 }, clobbers: 4294901760, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 }, diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index b872b8834d..ed7cec7233 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -1475,6 +1475,55 @@ flush: MOVQ 96(SP), R15 JMP ret +// gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX. +TEXT runtime·gcWriteBarrierCX(SB),NOSPLIT,$0 + XCHGQ CX, AX + CALL runtime·gcWriteBarrier(SB) + XCHGQ CX, AX + RET + +// gcWriteBarrierDX is gcWriteBarrier, but with args in DI and DX. +TEXT runtime·gcWriteBarrierDX(SB),NOSPLIT,$0 + XCHGQ DX, AX + CALL runtime·gcWriteBarrier(SB) + XCHGQ DX, AX + RET + +// gcWriteBarrierBX is gcWriteBarrier, but with args in DI and BX. +TEXT runtime·gcWriteBarrierBX(SB),NOSPLIT,$0 + XCHGQ BX, AX + CALL runtime·gcWriteBarrier(SB) + XCHGQ BX, AX + RET + +// gcWriteBarrierBP is gcWriteBarrier, but with args in DI and BP. +TEXT runtime·gcWriteBarrierBP(SB),NOSPLIT,$0 + XCHGQ BP, AX + CALL runtime·gcWriteBarrier(SB) + XCHGQ BP, AX + RET + +// gcWriteBarrierSI is gcWriteBarrier, but with args in DI and SI. +TEXT runtime·gcWriteBarrierSI(SB),NOSPLIT,$0 + XCHGQ SI, AX + CALL runtime·gcWriteBarrier(SB) + XCHGQ SI, AX + RET + +// gcWriteBarrierR8 is gcWriteBarrier, but with args in DI and R8. +TEXT runtime·gcWriteBarrierR8(SB),NOSPLIT,$0 + XCHGQ R8, AX + CALL runtime·gcWriteBarrier(SB) + XCHGQ R8, AX + RET + +// gcWriteBarrierR9 is gcWriteBarrier, but with args in DI and R9. +TEXT runtime·gcWriteBarrierR9(SB),NOSPLIT,$0 + XCHGQ R9, AX + CALL runtime·gcWriteBarrier(SB) + XCHGQ R9, AX + RET + DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large" GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below diff --git a/src/runtime/stubs_amd64.go b/src/runtime/stubs_amd64.go index 5b79d66762..8c14bc2271 100644 --- a/src/runtime/stubs_amd64.go +++ b/src/runtime/stubs_amd64.go @@ -4,6 +4,15 @@ package runtime +// Called from compiled code; declared for vet; do NOT call from Go. +func gcWriteBarrierCX() +func gcWriteBarrierDX() +func gcWriteBarrierBX() +func gcWriteBarrierBP() +func gcWriteBarrierSI() +func gcWriteBarrierR8() +func gcWriteBarrierR9() + // stackcheck checks that SP is in range [g->stack.lo, g->stack.hi). func stackcheck() diff --git a/test/codegen/structs.go b/test/codegen/structs.go index b81ad67c44..9eddc5b16e 100644 --- a/test/codegen/structs.go +++ b/test/codegen/structs.go @@ -28,7 +28,7 @@ type Z2 struct { func Zero2(t *Z2) { // amd64:`XORPS\tX., X`,`MOVUPS\tX., \(.*\)`,`MOVQ\t\$0, 16\(.*\)` - // amd64:`.*runtime[.]gcWriteBarrier\(SB\)` + // amd64:`.*runtime[.]gcWriteBarrier.*\(SB\)` *t = Z2{} } -- cgit v1.2.3-54-g00ecf From 7b30a2d268ccb56221d0d8b149300548ce0308e1 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Sun, 29 Mar 2020 01:19:50 +0700 Subject: cmd/compile: make isSmallMakeSlice checks slice cap only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If slice cap is not set, it will be equal to slice len. So isSmallMakeSlice only needs to check whether slice cap is constant. While at it, also add test to make sure panicmakeslicecap is called when make slice contains invalid non-constant len. For this benchmark: func BenchmarkMakeSliceNonConstantLen(b *testing.B) { len := 1 for i := 0; i < b.N; i++ { s := make([]int, len, 2) _ = s } } Result compare with parent: name old time/op new time/op delta MakeSliceNonConstantLen-12 18.4ns ± 1% 0.2ns ± 2% -98.66% (p=0.008 n=5+5) Fixes #37975 Change-Id: I4bc926361bc2ffeab4cfaa888ef0a30cbc3b80e8 Reviewed-on: https://go-review.googlesource.com/c/go/+/226278 Run-TryBot: Cuong Manh Le TryBot-Result: Gobot Gobot Reviewed-by: Matthew Dempsky Reviewed-by: Keith Randall --- src/cmd/compile/internal/gc/builtin.go | 1 + src/cmd/compile/internal/gc/builtin/runtime.go | 1 + src/cmd/compile/internal/gc/walk.go | 18 +++++++-- test/escape_slice.go | 22 +++++++---- test/fixedbugs/issue37975.go | 54 ++++++++++++++++++++++++++ 5 files changed, 86 insertions(+), 10 deletions(-) create mode 100644 test/fixedbugs/issue37975.go diff --git a/src/cmd/compile/internal/gc/builtin.go b/src/cmd/compile/internal/gc/builtin.go index b6b47440ce..deefed7f19 100644 --- a/src/cmd/compile/internal/gc/builtin.go +++ b/src/cmd/compile/internal/gc/builtin.go @@ -13,6 +13,7 @@ var runtimeDecls = [...]struct { {"panicdivide", funcTag, 5}, {"panicshift", funcTag, 5}, {"panicmakeslicelen", funcTag, 5}, + {"panicmakeslicecap", funcTag, 5}, {"throwinit", funcTag, 5}, {"panicwrap", funcTag, 5}, {"gopanic", funcTag, 7}, diff --git a/src/cmd/compile/internal/gc/builtin/runtime.go b/src/cmd/compile/internal/gc/builtin/runtime.go index afeae3e794..9bcb3688b4 100644 --- a/src/cmd/compile/internal/gc/builtin/runtime.go +++ b/src/cmd/compile/internal/gc/builtin/runtime.go @@ -18,6 +18,7 @@ func newobject(typ *byte) *any func panicdivide() func panicshift() func panicmakeslicelen() +func panicmakeslicecap() func throwinit() func panicwrap() diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go index 289a75b59c..dfc9d9aa22 100644 --- a/src/cmd/compile/internal/gc/walk.go +++ b/src/cmd/compile/internal/gc/walk.go @@ -354,14 +354,13 @@ func isSmallMakeSlice(n *Node) bool { if n.Op != OMAKESLICE { return false } - l := n.Left r := n.Right if r == nil { - r = l + r = n.Left } t := n.Type - return smallintconst(l) && smallintconst(r) && (t.Elem().Width == 0 || r.Int64() < maxImplicitStackVarSize/t.Elem().Width) + return smallintconst(r) && (t.Elem().Width == 0 || r.Int64() < maxImplicitStackVarSize/t.Elem().Width) } // walk the whole tree of the body of an @@ -1338,6 +1337,19 @@ opswitch: if i < 0 { Fatalf("walkexpr: invalid index %v", r) } + + // if len < 0 { panicmakeslicelen } + nif := nod(OIF, nod(OLT, l, nodintconst(0)), nil) + nif.Nbody.Set1(mkcall("panicmakeslicelen", nil, init)) + nif = typecheck(nif, ctxStmt) + init.Append(nif) + + // if len > cap { panicmakeslicecap } + nif = nod(OIF, nod(OGT, conv(l, types.Types[TUINT64]), nodintconst(i)), nil) + nif.Nbody.Set1(mkcall("panicmakeslicecap", nil, init)) + nif = typecheck(nif, ctxStmt) + init.Append(nif) + t = types.NewArray(t.Elem(), i) // [r]T var_ := temp(t) a := nod(OAS, var_, nil) // zero temp diff --git a/test/escape_slice.go b/test/escape_slice.go index 03053cf326..d2cdaa6a01 100644 --- a/test/escape_slice.go +++ b/test/escape_slice.go @@ -18,28 +18,28 @@ var sink interface{} func slice0() { var s []*int // BAD: i should not escape - i := 0 // ERROR "moved to heap: i" + i := 0 // ERROR "moved to heap: i" s = append(s, &i) _ = s } func slice1() *int { var s []*int - i := 0 // ERROR "moved to heap: i" + i := 0 // ERROR "moved to heap: i" s = append(s, &i) return s[0] } func slice2() []*int { var s []*int - i := 0 // ERROR "moved to heap: i" + i := 0 // ERROR "moved to heap: i" s = append(s, &i) return s } func slice3() *int { var s []*int - i := 0 // ERROR "moved to heap: i" + i := 0 // ERROR "moved to heap: i" s = append(s, &i) for _, p := range s { return p @@ -48,7 +48,7 @@ func slice3() *int { } func slice4(s []*int) { // ERROR "s does not escape" - i := 0 // ERROR "moved to heap: i" + i := 0 // ERROR "moved to heap: i" s[0] = &i } @@ -56,14 +56,14 @@ func slice5(s []*int) { // ERROR "s does not escape" if s != nil { s = make([]*int, 10) // ERROR "make\(\[\]\*int, 10\) does not escape" } - i := 0 // ERROR "moved to heap: i" + i := 0 // ERROR "moved to heap: i" s[0] = &i } func slice6() { s := make([]*int, 10) // ERROR "make\(\[\]\*int, 10\) does not escape" // BAD: i should not escape - i := 0 // ERROR "moved to heap: i" + i := 0 // ERROR "moved to heap: i" s[0] = &i _ = s } @@ -93,6 +93,14 @@ func slice10() []*int { return s } +func slice11() { + i := 2 + s := make([]int, 2, 3) // ERROR "make\(\[\]int, 2, 3\) does not escape" + s = make([]int, i, 3) // ERROR "make\(\[\]int, i, 3\) does not escape" + s = make([]int, i, 1) // ERROR "make\(\[\]int, i, 1\) does not escape" + _ = s +} + func envForDir(dir string) []string { // ERROR "dir does not escape" env := os.Environ() return mergeEnvLists([]string{"PWD=" + dir}, env) // ERROR ".PWD=. \+ dir escapes to heap" "\[\]string literal does not escape" diff --git a/test/fixedbugs/issue37975.go b/test/fixedbugs/issue37975.go new file mode 100644 index 0000000000..a4e8f1f14a --- /dev/null +++ b/test/fixedbugs/issue37975.go @@ -0,0 +1,54 @@ +// run + +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Make sure runtime.panicmakeslice* are called. + +package main + +import "strings" + +func main() { + // Test typechecking passes if len is valid + // but cap is out of range for len's type. + var x byte + _ = make([]int, x, 300) + + capOutOfRange := func() { + i := 2 + s := make([]int, i, 1) + s[0] = 1 + } + lenOutOfRange := func() { + i := -1 + s := make([]int, i, 3) + s[0] = 1 + } + + tests := []struct { + f func() + panicStr string + }{ + {capOutOfRange, "cap out of range"}, + {lenOutOfRange, "len out of range"}, + } + + for _, tc := range tests { + shouldPanic(tc.panicStr, tc.f) + } + +} + +func shouldPanic(str string, f func()) { + defer func() { + err := recover() + runtimeErr := err.(error).Error() + if !strings.Contains(runtimeErr, str) { + panic("got panic " + runtimeErr + ", want " + str) + } + }() + + f() +} -- cgit v1.2.3-54-g00ecf From 82253ddc7a6b85240fd74cc5138f685ca931f355 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Fri, 28 Feb 2020 12:36:03 -0800 Subject: cmd/compile: constant fold CtzNN Change-Id: I3ecd2c7ed3c8ae35c2bb9562aed09f7ade5c8cdd Reviewed-on: https://go-review.googlesource.com/c/go/+/221609 Run-TryBot: Josh Bleecher Snyder TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/gen/generic.rules | 10 ++ src/cmd/compile/internal/ssa/rewrite.go | 7 +- src/cmd/compile/internal/ssa/rewritegeneric.go | 152 +++++++++++++++++++++++++ 3 files changed, 166 insertions(+), 3 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index 8a3c8eeaab..c7f6a232c6 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -137,6 +137,16 @@ (Xor32 (Const32 [c]) (Const32 [d])) -> (Const32 [int64(int32(c^d))]) (Xor64 (Const64 [c]) (Const64 [d])) -> (Const64 [c^d]) +(Ctz64 (Const64 [c])) && config.PtrSize == 4 -> (Const32 [ntz(c)]) +(Ctz32 (Const32 [c])) && config.PtrSize == 4 -> (Const32 [ntz32(c)]) +(Ctz16 (Const16 [c])) && config.PtrSize == 4 -> (Const32 [ntz16(c)]) +(Ctz8 (Const8 [c])) && config.PtrSize == 4 -> (Const32 [ntz8(c)]) + +(Ctz64 (Const64 [c])) && config.PtrSize == 8 -> (Const64 [ntz(c)]) +(Ctz32 (Const32 [c])) && config.PtrSize == 8 -> (Const64 [ntz32(c)]) +(Ctz16 (Const16 [c])) && config.PtrSize == 8 -> (Const64 [ntz16(c)]) +(Ctz8 (Const8 [c])) && config.PtrSize == 8 -> (Const64 [ntz8(c)]) + (Div8 (Const8 [c]) (Const8 [d])) && d != 0 -> (Const8 [int64(int8(c)/int8(d))]) (Div16 (Const16 [c]) (Const16 [d])) && d != 0 -> (Const16 [int64(int16(c)/int16(d))]) (Div32 (Const32 [c]) (Const32 [d])) && d != 0 -> (Const32 [int64(int32(c)/int32(d))]) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 51dba5eb71..727fd2402d 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -347,9 +347,10 @@ func nlz(x int64) int64 { } // ntz returns the number of trailing zeros. -func ntz(x int64) int64 { - return int64(bits.TrailingZeros64(uint64(x))) -} +func ntz(x int64) int64 { return int64(bits.TrailingZeros64(uint64(x))) } +func ntz32(x int64) int64 { return int64(bits.TrailingZeros32(uint32(x))) } +func ntz16(x int64) int64 { return int64(bits.TrailingZeros16(uint16(x))) } +func ntz8(x int64) int64 { return int64(bits.TrailingZeros8(uint8(x))) } func oneBit(x int64) bool { return bits.OnesCount64(uint64(x)) == 1 diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index d6213e8741..5d954784e3 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -50,6 +50,14 @@ func rewriteValuegeneric(v *Value) bool { return rewriteValuegeneric_OpConstString(v) case OpConvert: return rewriteValuegeneric_OpConvert(v) + case OpCtz16: + return rewriteValuegeneric_OpCtz16(v) + case OpCtz32: + return rewriteValuegeneric_OpCtz32(v) + case OpCtz64: + return rewriteValuegeneric_OpCtz64(v) + case OpCtz8: + return rewriteValuegeneric_OpCtz8(v) case OpCvt32Fto32: return rewriteValuegeneric_OpCvt32Fto32(v) case OpCvt32Fto64: @@ -4048,6 +4056,150 @@ func rewriteValuegeneric_OpConvert(v *Value) bool { } return false } +func rewriteValuegeneric_OpCtz16(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (Ctz16 (Const16 [c])) + // cond: config.PtrSize == 4 + // result: (Const32 [ntz16(c)]) + for { + if v_0.Op != OpConst16 { + break + } + c := v_0.AuxInt + if !(config.PtrSize == 4) { + break + } + v.reset(OpConst32) + v.AuxInt = ntz16(c) + return true + } + // match: (Ctz16 (Const16 [c])) + // cond: config.PtrSize == 8 + // result: (Const64 [ntz16(c)]) + for { + if v_0.Op != OpConst16 { + break + } + c := v_0.AuxInt + if !(config.PtrSize == 8) { + break + } + v.reset(OpConst64) + v.AuxInt = ntz16(c) + return true + } + return false +} +func rewriteValuegeneric_OpCtz32(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (Ctz32 (Const32 [c])) + // cond: config.PtrSize == 4 + // result: (Const32 [ntz32(c)]) + for { + if v_0.Op != OpConst32 { + break + } + c := v_0.AuxInt + if !(config.PtrSize == 4) { + break + } + v.reset(OpConst32) + v.AuxInt = ntz32(c) + return true + } + // match: (Ctz32 (Const32 [c])) + // cond: config.PtrSize == 8 + // result: (Const64 [ntz32(c)]) + for { + if v_0.Op != OpConst32 { + break + } + c := v_0.AuxInt + if !(config.PtrSize == 8) { + break + } + v.reset(OpConst64) + v.AuxInt = ntz32(c) + return true + } + return false +} +func rewriteValuegeneric_OpCtz64(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (Ctz64 (Const64 [c])) + // cond: config.PtrSize == 4 + // result: (Const32 [ntz(c)]) + for { + if v_0.Op != OpConst64 { + break + } + c := v_0.AuxInt + if !(config.PtrSize == 4) { + break + } + v.reset(OpConst32) + v.AuxInt = ntz(c) + return true + } + // match: (Ctz64 (Const64 [c])) + // cond: config.PtrSize == 8 + // result: (Const64 [ntz(c)]) + for { + if v_0.Op != OpConst64 { + break + } + c := v_0.AuxInt + if !(config.PtrSize == 8) { + break + } + v.reset(OpConst64) + v.AuxInt = ntz(c) + return true + } + return false +} +func rewriteValuegeneric_OpCtz8(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (Ctz8 (Const8 [c])) + // cond: config.PtrSize == 4 + // result: (Const32 [ntz8(c)]) + for { + if v_0.Op != OpConst8 { + break + } + c := v_0.AuxInt + if !(config.PtrSize == 4) { + break + } + v.reset(OpConst32) + v.AuxInt = ntz8(c) + return true + } + // match: (Ctz8 (Const8 [c])) + // cond: config.PtrSize == 8 + // result: (Const64 [ntz8(c)]) + for { + if v_0.Op != OpConst8 { + break + } + c := v_0.AuxInt + if !(config.PtrSize == 8) { + break + } + v.reset(OpConst64) + v.AuxInt = ntz8(c) + return true + } + return false +} func rewriteValuegeneric_OpCvt32Fto32(v *Value) bool { v_0 := v.Args[0] // match: (Cvt32Fto32 (Const32F [c])) -- cgit v1.2.3-54-g00ecf From 6edd7971bb3e83356544b2cd6e7a93fdabff1246 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Wed, 1 Apr 2020 05:27:49 +0700 Subject: cmd/compile: optimize len check when make slice In CL 226278, we did: if len < 0 { panicmakeslicelen } if len > cap { panicmakeslicecap } But due to the fact that cap is constrained to [0,2^31), so it is safe to do: if uint64(len) > cap { if len < 0 { panicmakeslicelen() } panicmakeslicecap() } save us a comparison in common case when len is within range. Passes toolstash-check. Change-Id: I0ebd52914ccde4cbb45f16c9e020b0c8f42e0663 Reviewed-on: https://go-review.googlesource.com/c/go/+/226737 Run-TryBot: Cuong Manh Le TryBot-Result: Gobot Gobot Reviewed-by: Matthew Dempsky --- src/cmd/compile/internal/gc/walk.go | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go index dfc9d9aa22..14d088c7fd 100644 --- a/src/cmd/compile/internal/gc/walk.go +++ b/src/cmd/compile/internal/gc/walk.go @@ -1338,15 +1338,16 @@ opswitch: Fatalf("walkexpr: invalid index %v", r) } - // if len < 0 { panicmakeslicelen } - nif := nod(OIF, nod(OLT, l, nodintconst(0)), nil) - nif.Nbody.Set1(mkcall("panicmakeslicelen", nil, init)) - nif = typecheck(nif, ctxStmt) - init.Append(nif) - - // if len > cap { panicmakeslicecap } - nif = nod(OIF, nod(OGT, conv(l, types.Types[TUINT64]), nodintconst(i)), nil) - nif.Nbody.Set1(mkcall("panicmakeslicecap", nil, init)) + // cap is constrained to [0,2^31), so it's safe to do: + // + // if uint64(len) > cap { + // if len < 0 { panicmakeslicelen() } + // panicmakeslicecap() + // } + nif := nod(OIF, nod(OGT, conv(l, types.Types[TUINT64]), nodintconst(i)), nil) + niflen := nod(OIF, nod(OLT, l, nodintconst(0)), nil) + niflen.Nbody.Set1(mkcall("panicmakeslicelen", nil, init)) + nif.Nbody.Append(niflen, mkcall("panicmakeslicecap", nil, init)) nif = typecheck(nif, ctxStmt) init.Append(nif) -- cgit v1.2.3-54-g00ecf From 7939c43748932c0caf1a1538410eb70fcd5a705f Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Tue, 31 Mar 2020 19:13:39 -0400 Subject: runtime: generate dummy duffcopy Although duffcopy is not used on PPC64, duff_ppc64x.s and mkduff.go don't match. Make it so. Fixes #38188. Change-Id: Ic6c08e335795ea407880efd449f4229696af7744 Reviewed-on: https://go-review.googlesource.com/c/go/+/226719 Run-TryBot: Cherry Zhang Reviewed-by: Josh Bleecher Snyder TryBot-Result: Gobot Gobot --- src/runtime/duff_ppc64x.s | 6 ++---- src/runtime/mkduff.go | 4 +++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/runtime/duff_ppc64x.s b/src/runtime/duff_ppc64x.s index 0c62d0afe9..d6b89ba940 100644 --- a/src/runtime/duff_ppc64x.s +++ b/src/runtime/duff_ppc64x.s @@ -137,7 +137,5 @@ TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 MOVDU R0, 8(R3) RET -// TODO: Implement runtime·duffcopy. -TEXT runtime·duffcopy(SB),NOSPLIT|NOFRAME,$0-0 - MOVD R0, 0(R0) - RET +TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0 + UNDEF diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go index 6ac5e7da44..6c7a4cf8dc 100644 --- a/src/runtime/mkduff.go +++ b/src/runtime/mkduff.go @@ -194,7 +194,9 @@ func zeroPPC64x(w io.Writer) { } func copyPPC64x(w io.Writer) { - fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.") + // duffcopy is not used on PPC64. + fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") + fmt.Fprintln(w, "\tUNDEF") } func tagsMIPS64x(w io.Writer) { -- cgit v1.2.3-54-g00ecf From afc480bab447c0fefe26a69e4221d93673021e98 Mon Sep 17 00:00:00 2001 From: Bradford Lamson-Scribner Date: Sun, 29 Mar 2020 13:17:46 -0600 Subject: cmd/compile: combine ssa.html columns with identical contents Combine columns in ssa.html output if they are identical. There can now be multiple titles per column which are all clickable to expand and collapse their column. Give collapsed columns some padding for better readability. Some of the work in this CL was started by Josh Bleecher Snyder and mailed to me in order to continue to completion. Updates #37766 Change-Id: I313b0917dc1bafe1eb99d91798ea915e5bcfaae9 Reviewed-on: https://go-review.googlesource.com/c/go/+/226209 Reviewed-by: Alberto Donizetti Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/html.go | 85 ++++++++++++++++++++++++----------- src/cmd/compile/internal/ssa/print.go | 8 ++++ 2 files changed, 66 insertions(+), 27 deletions(-) diff --git a/src/cmd/compile/internal/ssa/html.go b/src/cmd/compile/internal/ssa/html.go index 1eed224934..66fff88d7c 100644 --- a/src/cmd/compile/internal/ssa/html.go +++ b/src/cmd/compile/internal/ssa/html.go @@ -19,9 +19,12 @@ import ( type HTMLWriter struct { Logger - w io.WriteCloser - path string - dot *dotWriter + w io.WriteCloser + path string + dot *dotWriter + prevHash []byte + pendingPhases []string + pendingTitles []string } func NewHTMLWriter(path string, logger Logger, funcname, cfgMask string) *HTMLWriter { @@ -88,27 +91,22 @@ th, td { td > h2 { cursor: pointer; font-size: 120%; + margin: 5px 0px 5px 0px; } td.collapsed { font-size: 12px; width: 12px; border: 1px solid white; - padding: 0; + padding: 2px; cursor: pointer; background: #fafafa; } -td.collapsed div { - -moz-transform: rotate(-90.0deg); /* FF3.5+ */ - -o-transform: rotate(-90.0deg); /* Opera 10.5 */ - -webkit-transform: rotate(-90.0deg); /* Saf3.1+, Chrome */ - filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=0.083); /* IE6,IE7 */ - -ms-filter: "progid:DXImageTransform.Microsoft.BasicImage(rotation=0.083)"; /* IE8 */ - margin-top: 10.3em; - margin-left: -10em; - margin-right: -10em; - text-align: right; +td.collapsed div { + /* TODO: Flip the direction of the phase's title 90 degrees on a collapsed column. */ + writing-mode: vertical-lr; + white-space: pre; } code, pre, .lines, .ast { @@ -481,7 +479,7 @@ window.onload = function() { "deadcode", "opt", "lower", - "late deadcode", + "late-deadcode", "regalloc", "genssa", ]; @@ -503,15 +501,34 @@ window.onload = function() { } // Go through all columns and collapse needed phases. - var td = document.getElementsByTagName("td"); - for (var i = 0; i < td.length; i++) { - var id = td[i].id; - var phase = id.substr(0, id.length-4); - var show = expandedDefault.indexOf(phase) !== -1 + const td = document.getElementsByTagName("td"); + for (let i = 0; i < td.length; i++) { + const id = td[i].id; + const phase = id.substr(0, id.length-4); + let show = expandedDefault.indexOf(phase) !== -1 + + // If show == false, check to see if this is a combined column (multiple phases). + // If combined, check each of the phases to see if they are in our expandedDefaults. + // If any are found, that entire combined column gets shown. + if (!show) { + const combined = phase.split('--+--'); + const len = combined.length; + if (len > 1) { + for (let i = 0; i < len; i++) { + if (expandedDefault.indexOf(combined[i]) !== -1) { + show = true; + break; + } + } + } + } if (id.endsWith("-exp")) { - var h2 = td[i].getElementsByTagName("h2"); - if (h2 && h2[0]) { - h2[0].addEventListener('click', toggler(phase)); + const h2Els = td[i].getElementsByTagName("h2"); + const len = h2Els.length; + if (len > 0) { + for (let i = 0; i < len; i++) { + h2Els[i].addEventListener('click', toggler(phase)); + } } } else { td[i].addEventListener('click', toggler(phase)); @@ -738,8 +755,16 @@ func (w *HTMLWriter) WriteFunc(phase, title string, f *Func) { if w == nil { return // avoid generating HTML just to discard it } - //w.WriteColumn(phase, title, "", f.HTML()) - w.WriteColumn(phase, title, "", f.HTML(phase, w.dot)) + hash := hashFunc(f) + w.pendingPhases = append(w.pendingPhases, phase) + w.pendingTitles = append(w.pendingTitles, title) + if !bytes.Equal(hash, w.prevHash) { + phases := strings.Join(w.pendingPhases, " + ") + w.WriteMultiTitleColumn(phases, w.pendingTitles, fmt.Sprintf("hash-%x", hash), f.HTML(phase, w.dot)) + w.pendingPhases = w.pendingPhases[:0] + w.pendingTitles = w.pendingTitles[:0] + } + w.prevHash = hash } // FuncLines contains source code for a function to be displayed @@ -853,6 +878,10 @@ func (w *HTMLWriter) WriteAST(phase string, buf *bytes.Buffer) { // WriteColumn writes raw HTML in a column headed by title. // It is intended for pre- and post-compilation log output. func (w *HTMLWriter) WriteColumn(phase, title, class, html string) { + w.WriteMultiTitleColumn(phase, []string{title}, class, html) +} + +func (w *HTMLWriter) WriteMultiTitleColumn(phase string, titles []string, class, html string) { if w == nil { return } @@ -865,9 +894,11 @@ func (w *HTMLWriter) WriteColumn(phase, title, class, html string) { } else { w.Printf("", id, class) } - w.WriteString("

    " + title + "

    ") + for _, title := range titles { + w.WriteString("

    " + title + "

    ") + } w.WriteString(html) - w.WriteString("") + w.WriteString("\n") } func (w *HTMLWriter) Printf(msg string, v ...interface{}) { diff --git a/src/cmd/compile/internal/ssa/print.go b/src/cmd/compile/internal/ssa/print.go index 58e4c3bbbe..36f09c3ad9 100644 --- a/src/cmd/compile/internal/ssa/print.go +++ b/src/cmd/compile/internal/ssa/print.go @@ -6,6 +6,7 @@ package ssa import ( "bytes" + "crypto/sha256" "fmt" "io" ) @@ -14,6 +15,13 @@ func printFunc(f *Func) { f.Logf("%s", f) } +func hashFunc(f *Func) []byte { + h := sha256.New() + p := stringFuncPrinter{w: h} + fprintFunc(p, f) + return h.Sum(nil) +} + func (f *Func) String() string { var buf bytes.Buffer p := stringFuncPrinter{w: &buf} -- cgit v1.2.3-54-g00ecf From e9850462aa1e35d92ff428915df7bb0c9e79df81 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Wed, 11 Mar 2020 12:51:44 +0700 Subject: cmd/compile: don't inline reflect.Value.UnsafeAddr/Pointer if enable checkptr Fixes #35073 Change-Id: I4b555bbc33d39a97544e6dd9c61d95ae212f472b Reviewed-on: https://go-review.googlesource.com/c/go/+/222878 Run-TryBot: Cuong Manh Le TryBot-Result: Gobot Gobot Reviewed-by: Matthew Dempsky --- src/cmd/compile/internal/gc/inl.go | 6 ++++++ test/fixedbugs/issue35073.go | 23 +++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 test/fixedbugs/issue35073.go diff --git a/src/cmd/compile/internal/gc/inl.go b/src/cmd/compile/internal/gc/inl.go index 68acf876f8..b9460ed6d6 100644 --- a/src/cmd/compile/internal/gc/inl.go +++ b/src/cmd/compile/internal/gc/inl.go @@ -575,6 +575,12 @@ func inlnode(n *Node, maxCost int32) *Node { // so escape analysis can avoid more heapmoves. case OCLOSURE: return n + case OCALLMETH: + // Prevent inlining some reflect.Value methods when using checkptr, + // even when package reflect was compiled without it (#35073). + if s := n.Left.Sym; Debug_checkptr != 0 && s.Pkg.Path == "reflect" && (s.Name == "Value.UnsafeAddr" || s.Name == "Value.Pointer") { + return n + } } lno := setlineno(n) diff --git a/test/fixedbugs/issue35073.go b/test/fixedbugs/issue35073.go new file mode 100644 index 0000000000..dc8ce3a987 --- /dev/null +++ b/test/fixedbugs/issue35073.go @@ -0,0 +1,23 @@ +// run -gcflags=-d=checkptr + +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Test that reflect.Value.UnsafeAddr/Pointer is handled +// correctly by -d=checkptr + +package main + +import ( + "reflect" + "unsafe" +) + +func main() { + n := 10 + m := make(map[string]string) + + _ = unsafe.Pointer(reflect.ValueOf(&n).Elem().UnsafeAddr()) + _ = unsafe.Pointer(reflect.ValueOf(&m).Elem().Pointer()) +} -- cgit v1.2.3-54-g00ecf From 8e6a8d9e282a6f9aeab5635c75c223129f5ab61f Mon Sep 17 00:00:00 2001 From: maronghe Date: Wed, 1 Apr 2020 07:15:49 +0000 Subject: runtime: fix typo in loadFactor comment Fixes #38174 Change-Id: Iacdbbcd0b4586302daf082e59d833b7aa58b1a6a GitHub-Last-Rev: f0c96819ebb9928879a03957244f2de655708cbb GitHub-Pull-Request: golang/go#38191 Reviewed-on: https://go-review.googlesource.com/c/go/+/226758 Reviewed-by: Alberto Donizetti --- src/runtime/map.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/map.go b/src/runtime/map.go index e456c32556..399c1b071f 100644 --- a/src/runtime/map.go +++ b/src/runtime/map.go @@ -66,7 +66,7 @@ const ( bucketCnt = 1 << bucketCntBits // Maximum average load of a bucket that triggers growth is 6.5. - // Represent as loadFactorNum/loadFactDen, to allow integer math. + // Represent as loadFactorNum/loadFactorDen, to allow integer math. loadFactorNum = 13 loadFactorDen = 2 -- cgit v1.2.3-54-g00ecf From 7ffbea9fd838be851c287b2a21ee6ce1e2776b54 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Tue, 3 Mar 2020 18:07:32 +0000 Subject: reflect: when Converting between float32s, don't lose signal NaNs Trying this CL again, with a test that skips 387. When converting from float32->float64->float32, any signal NaNs get converted to quiet NaNs. Avoid that so using reflect.Value.Convert between two float32 types keeps the signal bit of NaNs. Skip the test on 387. I don't see any sane way of ensuring that a float load + float store is faithful on that platform. Fixes #36400 Change-Id: Ic316c74ddc155632e40424e207375b5d50dcd853 Reviewed-on: https://go-review.googlesource.com/c/go/+/221792 Run-TryBot: Keith Randall TryBot-Result: Gobot Gobot Reviewed-by: Josh Bleecher Snyder --- src/reflect/all_test.go | 31 +++++++++++++++++++++++++++++++ src/reflect/value.go | 14 ++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/src/reflect/all_test.go b/src/reflect/all_test.go index 00c18104eb..66d9661aeb 100644 --- a/src/reflect/all_test.go +++ b/src/reflect/all_test.go @@ -4163,6 +4163,37 @@ func TestConvert(t *testing.T) { } } +var gFloat32 float32 + +func TestConvertNaNs(t *testing.T) { + const snan uint32 = 0x7f800001 + + // Test to see if a store followed by a load of a signaling NaN + // maintains the signaling bit. The only platform known to fail + // this test is 386,GO386=387. The real test below will always fail + // if the platform can't even store+load a float without mucking + // with the bits. + gFloat32 = math.Float32frombits(snan) + runtime.Gosched() // make sure we don't optimize the store/load away + r := math.Float32bits(gFloat32) + if r != snan { + // This should only happen on 386,GO386=387. We have no way to + // test for 387, so we just make sure we're at least on 386. + if runtime.GOARCH != "386" { + t.Errorf("store/load of sNaN not faithful") + } + t.Skip("skipping test, float store+load not faithful") + } + + type myFloat32 float32 + x := V(myFloat32(math.Float32frombits(snan))) + y := x.Convert(TypeOf(float32(0))) + z := y.Interface().(float32) + if got := math.Float32bits(z); got != snan { + t.Errorf("signaling nan conversion got %x, want %x", got, snan) + } +} + type ComparableStruct struct { X int } diff --git a/src/reflect/value.go b/src/reflect/value.go index 51e7d195fe..08f0d259de 100644 --- a/src/reflect/value.go +++ b/src/reflect/value.go @@ -2541,6 +2541,14 @@ func makeFloat(f flag, v float64, t Type) Value { return Value{typ, ptr, f | flagIndir | flag(typ.Kind())} } +// makeFloat returns a Value of type t equal to v, where t is a float32 type. +func makeFloat32(f flag, v float32, t Type) Value { + typ := t.common() + ptr := unsafe_New(typ) + *(*float32)(ptr) = v + return Value{typ, ptr, f | flagIndir | flag(typ.Kind())} +} + // makeComplex returns a Value of type t equal to v (possibly truncated to complex64), // where t is a complex64 or complex128 type. func makeComplex(f flag, v complex128, t Type) Value { @@ -2613,6 +2621,12 @@ func cvtUintFloat(v Value, t Type) Value { // convertOp: floatXX -> floatXX func cvtFloat(v Value, t Type) Value { + if v.Type().Kind() == Float32 && t.Kind() == Float32 { + // Don't do any conversion if both types have underlying type float32. + // This avoids converting to float64 and back, which will + // convert a signaling NaN to a quiet NaN. See issue 36400. + return makeFloat32(v.flag.ro(), *(*float32)(v.ptr), t) + } return makeFloat(v.flag.ro(), v.Float(), t) } -- cgit v1.2.3-54-g00ecf From bba88467f86472764a656e61f5f3265ed6853692 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Thu, 19 Mar 2020 17:48:42 -0700 Subject: cmd/compile: add indexed-load CMP instructions Things like CMPQ 4(AX)(BX*8), CX Fixes #37955 Change-Id: Icbed430f65c91a0e3f38a633d8321d79433ad8b3 Reviewed-on: https://go-review.googlesource.com/c/go/+/224219 Run-TryBot: Keith Randall TryBot-Result: Gobot Gobot Reviewed-by: David Chase --- src/cmd/compile/internal/amd64/ssa.go | 13 + src/cmd/compile/internal/ssa/addressingmodes.go | 26 ++ src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 19 ++ .../compile/internal/ssa/gen/AMD64splitload.rules | 10 + src/cmd/compile/internal/ssa/opGen.go | 225 +++++++++++++ .../compile/internal/ssa/rewriteAMD64splitload.go | 371 +++++++++++++++++++++ test/codegen/memops.go | 60 ++++ 7 files changed, 724 insertions(+) diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 5d79095025..210ac13092 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -681,6 +681,19 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { gc.AddAux2(&p.From, v, sc.Off()) p.To.Type = obj.TYPE_CONST p.To.Offset = sc.Val() + case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1: + p := s.Prog(v.Op.Asm()) + memIdx(&p.From, v) + gc.AddAux(&p.From, v) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Args[2].Reg() + case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1: + sc := v.AuxValAndOff() + p := s.Prog(v.Op.Asm()) + memIdx(&p.From, v) + gc.AddAux2(&p.From, v, sc.Off()) + p.To.Type = obj.TYPE_CONST + p.To.Offset = sc.Val() case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: x := v.Reg() diff --git a/src/cmd/compile/internal/ssa/addressingmodes.go b/src/cmd/compile/internal/ssa/addressingmodes.go index 2af8a4d1fc..f06f82420d 100644 --- a/src/cmd/compile/internal/ssa/addressingmodes.go +++ b/src/cmd/compile/internal/ssa/addressingmodes.go @@ -162,6 +162,32 @@ var combine = map[[2]Op]Op{ [2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1, [2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8, + [2]Op{OpAMD64CMPBload, OpAMD64ADDQ}: OpAMD64CMPBloadidx1, + [2]Op{OpAMD64CMPWload, OpAMD64ADDQ}: OpAMD64CMPWloadidx1, + [2]Op{OpAMD64CMPLload, OpAMD64ADDQ}: OpAMD64CMPLloadidx1, + [2]Op{OpAMD64CMPQload, OpAMD64ADDQ}: OpAMD64CMPQloadidx1, + + [2]Op{OpAMD64CMPBload, OpAMD64LEAQ1}: OpAMD64CMPBloadidx1, + [2]Op{OpAMD64CMPWload, OpAMD64LEAQ1}: OpAMD64CMPWloadidx1, + [2]Op{OpAMD64CMPWload, OpAMD64LEAQ2}: OpAMD64CMPWloadidx2, + [2]Op{OpAMD64CMPLload, OpAMD64LEAQ1}: OpAMD64CMPLloadidx1, + [2]Op{OpAMD64CMPLload, OpAMD64LEAQ4}: OpAMD64CMPLloadidx4, + [2]Op{OpAMD64CMPQload, OpAMD64LEAQ1}: OpAMD64CMPQloadidx1, + [2]Op{OpAMD64CMPQload, OpAMD64LEAQ8}: OpAMD64CMPQloadidx8, + + [2]Op{OpAMD64CMPBconstload, OpAMD64ADDQ}: OpAMD64CMPBconstloadidx1, + [2]Op{OpAMD64CMPWconstload, OpAMD64ADDQ}: OpAMD64CMPWconstloadidx1, + [2]Op{OpAMD64CMPLconstload, OpAMD64ADDQ}: OpAMD64CMPLconstloadidx1, + [2]Op{OpAMD64CMPQconstload, OpAMD64ADDQ}: OpAMD64CMPQconstloadidx1, + + [2]Op{OpAMD64CMPBconstload, OpAMD64LEAQ1}: OpAMD64CMPBconstloadidx1, + [2]Op{OpAMD64CMPWconstload, OpAMD64LEAQ1}: OpAMD64CMPWconstloadidx1, + [2]Op{OpAMD64CMPWconstload, OpAMD64LEAQ2}: OpAMD64CMPWconstloadidx2, + [2]Op{OpAMD64CMPLconstload, OpAMD64LEAQ1}: OpAMD64CMPLconstloadidx1, + [2]Op{OpAMD64CMPLconstload, OpAMD64LEAQ4}: OpAMD64CMPLconstloadidx4, + [2]Op{OpAMD64CMPQconstload, OpAMD64LEAQ1}: OpAMD64CMPQconstloadidx1, + [2]Op{OpAMD64CMPQconstload, OpAMD64LEAQ8}: OpAMD64CMPQconstloadidx8, + // 386 [2]Op{Op386MOVBload, Op386ADDL}: Op386MOVBloadidx1, [2]Op{Op386MOVWload, Op386ADDL}: Op386MOVWloadidx1, diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 74cdf0283b..bf949abc20 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -127,6 +127,7 @@ func init() { gp1flags = regInfo{inputs: []regMask{gpsp}} gp0flagsLoad = regInfo{inputs: []regMask{gpspsb, 0}} gp1flagsLoad = regInfo{inputs: []regMask{gpspsb, gpsp, 0}} + gp2flagsLoad = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}} flagsgp = regInfo{inputs: nil, outputs: gponly} gp11flags = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}} @@ -299,6 +300,24 @@ func init() { {name: "CMPWconstload", argLength: 2, reg: gp0flagsLoad, asm: "CMPW", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, {name: "CMPBconstload", argLength: 2, reg: gp0flagsLoad, asm: "CMPB", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true}, + // compare *(arg0+N*arg1+auxint+aux) to arg2 (in that order). arg3=mem. + {name: "CMPQloadidx8", argLength: 4, reg: gp2flagsLoad, asm: "CMPQ", scale: 8, aux: "SymOff", typ: "Flags", symEffect: "Read"}, + {name: "CMPQloadidx1", argLength: 4, reg: gp2flagsLoad, asm: "CMPQ", scale: 1, commutative: true, aux: "SymOff", typ: "Flags", symEffect: "Read"}, + {name: "CMPLloadidx4", argLength: 4, reg: gp2flagsLoad, asm: "CMPL", scale: 4, aux: "SymOff", typ: "Flags", symEffect: "Read"}, + {name: "CMPLloadidx1", argLength: 4, reg: gp2flagsLoad, asm: "CMPL", scale: 1, commutative: true, aux: "SymOff", typ: "Flags", symEffect: "Read"}, + {name: "CMPWloadidx2", argLength: 4, reg: gp2flagsLoad, asm: "CMPW", scale: 2, aux: "SymOff", typ: "Flags", symEffect: "Read"}, + {name: "CMPWloadidx1", argLength: 4, reg: gp2flagsLoad, asm: "CMPW", scale: 1, commutative: true, aux: "SymOff", typ: "Flags", symEffect: "Read"}, + {name: "CMPBloadidx1", argLength: 4, reg: gp2flagsLoad, asm: "CMPB", scale: 1, commutative: true, aux: "SymOff", typ: "Flags", symEffect: "Read"}, + + // compare *(arg0+N*arg1+ValAndOff(AuxInt).Off()+aux) to ValAndOff(AuxInt).Val() (in that order). arg2=mem. + {name: "CMPQconstloadidx8", argLength: 3, reg: gp1flagsLoad, asm: "CMPQ", scale: 8, aux: "SymValAndOff", typ: "Flags", symEffect: "Read"}, + {name: "CMPQconstloadidx1", argLength: 3, reg: gp1flagsLoad, asm: "CMPQ", scale: 1, commutative: true, aux: "SymValAndOff", typ: "Flags", symEffect: "Read"}, + {name: "CMPLconstloadidx4", argLength: 3, reg: gp1flagsLoad, asm: "CMPL", scale: 4, aux: "SymValAndOff", typ: "Flags", symEffect: "Read"}, + {name: "CMPLconstloadidx1", argLength: 3, reg: gp1flagsLoad, asm: "CMPL", scale: 1, commutative: true, aux: "SymValAndOff", typ: "Flags", symEffect: "Read"}, + {name: "CMPWconstloadidx2", argLength: 3, reg: gp1flagsLoad, asm: "CMPW", scale: 2, aux: "SymValAndOff", typ: "Flags", symEffect: "Read"}, + {name: "CMPWconstloadidx1", argLength: 3, reg: gp1flagsLoad, asm: "CMPW", scale: 1, commutative: true, aux: "SymValAndOff", typ: "Flags", symEffect: "Read"}, + {name: "CMPBconstloadidx1", argLength: 3, reg: gp1flagsLoad, asm: "CMPB", scale: 1, commutative: true, aux: "SymValAndOff", typ: "Flags", symEffect: "Read"}, + {name: "UCOMISS", argLength: 2, reg: fp2flags, asm: "UCOMISS", typ: "Flags"}, // arg0 compare to arg1, f32 {name: "UCOMISD", argLength: 2, reg: fp2flags, asm: "UCOMISD", typ: "Flags"}, // arg0 compare to arg1, f64 diff --git a/src/cmd/compile/internal/ssa/gen/AMD64splitload.rules b/src/cmd/compile/internal/ssa/gen/AMD64splitload.rules index e8e1b4d258..5fd4429a1b 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64splitload.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64splitload.rules @@ -14,3 +14,13 @@ (CMP(Q|L|W|B)load {sym} [off] ptr x mem) -> (CMP(Q|L|W|B) (MOV(Q|L|W|B)load {sym} [off] ptr mem) x) (CMP(Q|L|W|B)constload {sym} [vo] ptr mem) -> (CMP(Q|L|W|B)const (MOV(Q|L|W|B)load {sym} [offOnly(vo)] ptr mem) [valOnly(vo)]) + +(CMP(Q|L|W|B)loadidx1 {sym} [off] ptr idx x mem) -> (CMP(Q|L|W|B) (MOV(Q|L|W|B)loadidx1 {sym} [off] ptr idx mem) x) +(CMPQloadidx8 {sym} [off] ptr idx x mem) -> (CMPQ (MOVQloadidx8 {sym} [off] ptr idx mem) x) +(CMPLloadidx4 {sym} [off] ptr idx x mem) -> (CMPL (MOVLloadidx4 {sym} [off] ptr idx mem) x) +(CMPWloadidx2 {sym} [off] ptr idx x mem) -> (CMPW (MOVWloadidx2 {sym} [off] ptr idx mem) x) + +(CMP(Q|L|W|B)constloadidx1 {sym} [vo] ptr idx mem) -> (CMP(Q|L|W|B)const (MOV(Q|L|W|B)loadidx1 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)]) +(CMPQconstloadidx8 {sym} [vo] ptr idx mem) -> (CMPQconst (MOVQloadidx8 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)]) +(CMPLconstloadidx4 {sym} [vo] ptr idx mem) -> (CMPLconst (MOVLloadidx4 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)]) +(CMPWconstloadidx2 {sym} [vo] ptr idx mem) -> (CMPWconst (MOVWloadidx2 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)]) diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 5e91856e48..46ca7936dc 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -602,6 +602,20 @@ const ( OpAMD64CMPLconstload OpAMD64CMPWconstload OpAMD64CMPBconstload + OpAMD64CMPQloadidx8 + OpAMD64CMPQloadidx1 + OpAMD64CMPLloadidx4 + OpAMD64CMPLloadidx1 + OpAMD64CMPWloadidx2 + OpAMD64CMPWloadidx1 + OpAMD64CMPBloadidx1 + OpAMD64CMPQconstloadidx8 + OpAMD64CMPQconstloadidx1 + OpAMD64CMPLconstloadidx4 + OpAMD64CMPLconstloadidx1 + OpAMD64CMPWconstloadidx2 + OpAMD64CMPWconstloadidx1 + OpAMD64CMPBconstloadidx1 OpAMD64UCOMISS OpAMD64UCOMISD OpAMD64BTL @@ -7534,6 +7548,217 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "CMPQloadidx8", + auxType: auxSymOff, + argLen: 4, + symEffect: SymRead, + asm: x86.ACMPQ, + scale: 8, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "CMPQloadidx1", + auxType: auxSymOff, + argLen: 4, + commutative: true, + symEffect: SymRead, + asm: x86.ACMPQ, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "CMPLloadidx4", + auxType: auxSymOff, + argLen: 4, + symEffect: SymRead, + asm: x86.ACMPL, + scale: 4, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "CMPLloadidx1", + auxType: auxSymOff, + argLen: 4, + commutative: true, + symEffect: SymRead, + asm: x86.ACMPL, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "CMPWloadidx2", + auxType: auxSymOff, + argLen: 4, + symEffect: SymRead, + asm: x86.ACMPW, + scale: 2, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "CMPWloadidx1", + auxType: auxSymOff, + argLen: 4, + commutative: true, + symEffect: SymRead, + asm: x86.ACMPW, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "CMPBloadidx1", + auxType: auxSymOff, + argLen: 4, + commutative: true, + symEffect: SymRead, + asm: x86.ACMPB, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "CMPQconstloadidx8", + auxType: auxSymValAndOff, + argLen: 3, + symEffect: SymRead, + asm: x86.ACMPQ, + scale: 8, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "CMPQconstloadidx1", + auxType: auxSymValAndOff, + argLen: 3, + commutative: true, + symEffect: SymRead, + asm: x86.ACMPQ, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "CMPLconstloadidx4", + auxType: auxSymValAndOff, + argLen: 3, + symEffect: SymRead, + asm: x86.ACMPL, + scale: 4, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "CMPLconstloadidx1", + auxType: auxSymValAndOff, + argLen: 3, + commutative: true, + symEffect: SymRead, + asm: x86.ACMPL, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "CMPWconstloadidx2", + auxType: auxSymValAndOff, + argLen: 3, + symEffect: SymRead, + asm: x86.ACMPW, + scale: 2, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "CMPWconstloadidx1", + auxType: auxSymValAndOff, + argLen: 3, + commutative: true, + symEffect: SymRead, + asm: x86.ACMPW, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "CMPBconstloadidx1", + auxType: auxSymValAndOff, + argLen: 3, + commutative: true, + symEffect: SymRead, + asm: x86.ACMPB, + scale: 1, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, { name: "UCOMISS", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64splitload.go b/src/cmd/compile/internal/ssa/rewriteAMD64splitload.go index 40a7013744..6cdf8c89c2 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64splitload.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64splitload.go @@ -7,20 +7,48 @@ func rewriteValueAMD64splitload(v *Value) bool { switch v.Op { case OpAMD64CMPBconstload: return rewriteValueAMD64splitload_OpAMD64CMPBconstload(v) + case OpAMD64CMPBconstloadidx1: + return rewriteValueAMD64splitload_OpAMD64CMPBconstloadidx1(v) case OpAMD64CMPBload: return rewriteValueAMD64splitload_OpAMD64CMPBload(v) + case OpAMD64CMPBloadidx1: + return rewriteValueAMD64splitload_OpAMD64CMPBloadidx1(v) case OpAMD64CMPLconstload: return rewriteValueAMD64splitload_OpAMD64CMPLconstload(v) + case OpAMD64CMPLconstloadidx1: + return rewriteValueAMD64splitload_OpAMD64CMPLconstloadidx1(v) + case OpAMD64CMPLconstloadidx4: + return rewriteValueAMD64splitload_OpAMD64CMPLconstloadidx4(v) case OpAMD64CMPLload: return rewriteValueAMD64splitload_OpAMD64CMPLload(v) + case OpAMD64CMPLloadidx1: + return rewriteValueAMD64splitload_OpAMD64CMPLloadidx1(v) + case OpAMD64CMPLloadidx4: + return rewriteValueAMD64splitload_OpAMD64CMPLloadidx4(v) case OpAMD64CMPQconstload: return rewriteValueAMD64splitload_OpAMD64CMPQconstload(v) + case OpAMD64CMPQconstloadidx1: + return rewriteValueAMD64splitload_OpAMD64CMPQconstloadidx1(v) + case OpAMD64CMPQconstloadidx8: + return rewriteValueAMD64splitload_OpAMD64CMPQconstloadidx8(v) case OpAMD64CMPQload: return rewriteValueAMD64splitload_OpAMD64CMPQload(v) + case OpAMD64CMPQloadidx1: + return rewriteValueAMD64splitload_OpAMD64CMPQloadidx1(v) + case OpAMD64CMPQloadidx8: + return rewriteValueAMD64splitload_OpAMD64CMPQloadidx8(v) case OpAMD64CMPWconstload: return rewriteValueAMD64splitload_OpAMD64CMPWconstload(v) + case OpAMD64CMPWconstloadidx1: + return rewriteValueAMD64splitload_OpAMD64CMPWconstloadidx1(v) + case OpAMD64CMPWconstloadidx2: + return rewriteValueAMD64splitload_OpAMD64CMPWconstloadidx2(v) case OpAMD64CMPWload: return rewriteValueAMD64splitload_OpAMD64CMPWload(v) + case OpAMD64CMPWloadidx1: + return rewriteValueAMD64splitload_OpAMD64CMPWloadidx1(v) + case OpAMD64CMPWloadidx2: + return rewriteValueAMD64splitload_OpAMD64CMPWloadidx2(v) } return false } @@ -46,6 +74,30 @@ func rewriteValueAMD64splitload_OpAMD64CMPBconstload(v *Value) bool { return true } } +func rewriteValueAMD64splitload_OpAMD64CMPBconstloadidx1(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CMPBconstloadidx1 {sym} [vo] ptr idx mem) + // result: (CMPBconst (MOVBloadidx1 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)]) + for { + vo := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + mem := v_2 + v.reset(OpAMD64CMPBconst) + v.AuxInt = valOnly(vo) + v0 := b.NewValue0(v.Pos, OpAMD64MOVBloadidx1, typ.UInt8) + v0.AuxInt = offOnly(vo) + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + v.AddArg(v0) + return true + } +} func rewriteValueAMD64splitload_OpAMD64CMPBload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -69,6 +121,31 @@ func rewriteValueAMD64splitload_OpAMD64CMPBload(v *Value) bool { return true } } +func rewriteValueAMD64splitload_OpAMD64CMPBloadidx1(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CMPBloadidx1 {sym} [off] ptr idx x mem) + // result: (CMPB (MOVBloadidx1 {sym} [off] ptr idx mem) x) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + x := v_2 + mem := v_3 + v.reset(OpAMD64CMPB) + v0 := b.NewValue0(v.Pos, OpAMD64MOVBloadidx1, typ.UInt8) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + v.AddArg2(v0, x) + return true + } +} func rewriteValueAMD64splitload_OpAMD64CMPLconstload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -91,6 +168,54 @@ func rewriteValueAMD64splitload_OpAMD64CMPLconstload(v *Value) bool { return true } } +func rewriteValueAMD64splitload_OpAMD64CMPLconstloadidx1(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CMPLconstloadidx1 {sym} [vo] ptr idx mem) + // result: (CMPLconst (MOVLloadidx1 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)]) + for { + vo := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + mem := v_2 + v.reset(OpAMD64CMPLconst) + v.AuxInt = valOnly(vo) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, typ.UInt32) + v0.AuxInt = offOnly(vo) + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64splitload_OpAMD64CMPLconstloadidx4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CMPLconstloadidx4 {sym} [vo] ptr idx mem) + // result: (CMPLconst (MOVLloadidx4 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)]) + for { + vo := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + mem := v_2 + v.reset(OpAMD64CMPLconst) + v.AuxInt = valOnly(vo) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx4, typ.UInt32) + v0.AuxInt = offOnly(vo) + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + v.AddArg(v0) + return true + } +} func rewriteValueAMD64splitload_OpAMD64CMPLload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -114,6 +239,56 @@ func rewriteValueAMD64splitload_OpAMD64CMPLload(v *Value) bool { return true } } +func rewriteValueAMD64splitload_OpAMD64CMPLloadidx1(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CMPLloadidx1 {sym} [off] ptr idx x mem) + // result: (CMPL (MOVLloadidx1 {sym} [off] ptr idx mem) x) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + x := v_2 + mem := v_3 + v.reset(OpAMD64CMPL) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, typ.UInt32) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + v.AddArg2(v0, x) + return true + } +} +func rewriteValueAMD64splitload_OpAMD64CMPLloadidx4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CMPLloadidx4 {sym} [off] ptr idx x mem) + // result: (CMPL (MOVLloadidx4 {sym} [off] ptr idx mem) x) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + x := v_2 + mem := v_3 + v.reset(OpAMD64CMPL) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx4, typ.UInt32) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + v.AddArg2(v0, x) + return true + } +} func rewriteValueAMD64splitload_OpAMD64CMPQconstload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -136,6 +311,54 @@ func rewriteValueAMD64splitload_OpAMD64CMPQconstload(v *Value) bool { return true } } +func rewriteValueAMD64splitload_OpAMD64CMPQconstloadidx1(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CMPQconstloadidx1 {sym} [vo] ptr idx mem) + // result: (CMPQconst (MOVQloadidx1 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)]) + for { + vo := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + mem := v_2 + v.reset(OpAMD64CMPQconst) + v.AuxInt = valOnly(vo) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, typ.UInt64) + v0.AuxInt = offOnly(vo) + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64splitload_OpAMD64CMPQconstloadidx8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CMPQconstloadidx8 {sym} [vo] ptr idx mem) + // result: (CMPQconst (MOVQloadidx8 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)]) + for { + vo := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + mem := v_2 + v.reset(OpAMD64CMPQconst) + v.AuxInt = valOnly(vo) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx8, typ.UInt64) + v0.AuxInt = offOnly(vo) + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + v.AddArg(v0) + return true + } +} func rewriteValueAMD64splitload_OpAMD64CMPQload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -159,6 +382,56 @@ func rewriteValueAMD64splitload_OpAMD64CMPQload(v *Value) bool { return true } } +func rewriteValueAMD64splitload_OpAMD64CMPQloadidx1(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CMPQloadidx1 {sym} [off] ptr idx x mem) + // result: (CMPQ (MOVQloadidx1 {sym} [off] ptr idx mem) x) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + x := v_2 + mem := v_3 + v.reset(OpAMD64CMPQ) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, typ.UInt64) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + v.AddArg2(v0, x) + return true + } +} +func rewriteValueAMD64splitload_OpAMD64CMPQloadidx8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CMPQloadidx8 {sym} [off] ptr idx x mem) + // result: (CMPQ (MOVQloadidx8 {sym} [off] ptr idx mem) x) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + x := v_2 + mem := v_3 + v.reset(OpAMD64CMPQ) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx8, typ.UInt64) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + v.AddArg2(v0, x) + return true + } +} func rewriteValueAMD64splitload_OpAMD64CMPWconstload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -181,6 +454,54 @@ func rewriteValueAMD64splitload_OpAMD64CMPWconstload(v *Value) bool { return true } } +func rewriteValueAMD64splitload_OpAMD64CMPWconstloadidx1(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CMPWconstloadidx1 {sym} [vo] ptr idx mem) + // result: (CMPWconst (MOVWloadidx1 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)]) + for { + vo := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + mem := v_2 + v.reset(OpAMD64CMPWconst) + v.AuxInt = valOnly(vo) + v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, typ.UInt16) + v0.AuxInt = offOnly(vo) + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64splitload_OpAMD64CMPWconstloadidx2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CMPWconstloadidx2 {sym} [vo] ptr idx mem) + // result: (CMPWconst (MOVWloadidx2 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)]) + for { + vo := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + mem := v_2 + v.reset(OpAMD64CMPWconst) + v.AuxInt = valOnly(vo) + v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx2, typ.UInt16) + v0.AuxInt = offOnly(vo) + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + v.AddArg(v0) + return true + } +} func rewriteValueAMD64splitload_OpAMD64CMPWload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -204,6 +525,56 @@ func rewriteValueAMD64splitload_OpAMD64CMPWload(v *Value) bool { return true } } +func rewriteValueAMD64splitload_OpAMD64CMPWloadidx1(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CMPWloadidx1 {sym} [off] ptr idx x mem) + // result: (CMPW (MOVWloadidx1 {sym} [off] ptr idx mem) x) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + x := v_2 + mem := v_3 + v.reset(OpAMD64CMPW) + v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, typ.UInt16) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + v.AddArg2(v0, x) + return true + } +} +func rewriteValueAMD64splitload_OpAMD64CMPWloadidx2(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CMPWloadidx2 {sym} [off] ptr idx x mem) + // result: (CMPW (MOVWloadidx2 {sym} [off] ptr idx mem) x) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + x := v_2 + mem := v_3 + v.reset(OpAMD64CMPW) + v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx2, typ.UInt16) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + v.AddArg2(v0, x) + return true + } +} func rewriteBlockAMD64splitload(b *Block) bool { switch b.Kind { } diff --git a/test/codegen/memops.go b/test/codegen/memops.go index 0df191480d..bf5ffb6c4f 100644 --- a/test/codegen/memops.go +++ b/test/codegen/memops.go @@ -243,3 +243,63 @@ func idxStorePlusOp(x []int32, i int, v int32) { // 386: `XORL\t[$]77, 36\([A-Z]+\)\([A-Z]+\*4\)` x[i+9] ^= 77 } + +func idxCompare(i int) int { + // amd64: `CMPB\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*` + if x8[i+1] < x8[0] { + return 0 + } + // amd64: `CMPW\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*` + if x16[i+1] < x16[0] { + return 0 + } + // amd64: `CMPW\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*` + if x16[16*i+1] < x16[0] { + return 0 + } + // amd64: `CMPL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + if x32[i+1] < x32[0] { + return 0 + } + // amd64: `CMPL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*` + if x32[16*i+1] < x32[0] { + return 0 + } + // amd64: `CMPQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + if x64[i+1] < x64[0] { + return 0 + } + // amd64: `CMPQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*` + if x64[16*i+1] < x64[0] { + return 0 + } + // amd64: `CMPB\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), \$77` + if x8[i+2] < 77 { + return 0 + } + // amd64: `CMPW\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), \$77` + if x16[i+2] < 77 { + return 0 + } + // amd64: `CMPW\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), \$77` + if x16[16*i+2] < 77 { + return 0 + } + // amd64: `CMPL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), \$77` + if x32[i+2] < 77 { + return 0 + } + // amd64: `CMPL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), \$77` + if x32[16*i+2] < 77 { + return 0 + } + // amd64: `CMPQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), \$77` + if x64[i+2] < 77 { + return 0 + } + // amd64: `CMPQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), \$77` + if x64[16*i+2] < 77 { + return 0 + } + return 1 +} -- cgit v1.2.3-54-g00ecf From a1bc781503bf371262d4878e96cd60cdbb5e9ee9 Mon Sep 17 00:00:00 2001 From: Carlos Amedee Date: Wed, 8 Jan 2020 14:49:43 -0500 Subject: doc: update the minimum supported macOS version to 10.11 Update minimum macOS supported version from 10.10 to 10.11. Updates #23011 Change-Id: Ie10c40e882c9d309ff56041d9768afc288d0204f Reviewed-on: https://go-review.googlesource.com/c/go/+/213878 Reviewed-by: Alexander Rakoczy Reviewed-by: Dmitri Shuralyov Run-TryBot: Alexander Rakoczy TryBot-Result: Gobot Gobot --- doc/install.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/install.html b/doc/install.html index 40faadb2fa..706d66c007 100644 --- a/doc/install.html +++ b/doc/install.html @@ -17,7 +17,7 @@

    Official binary distributions are available for the FreeBSD (release 10-STABLE and above), -Linux, macOS (10.10 and above), and Windows operating systems and +Linux, macOS (10.11 and above), and Windows operating systems and the 32-bit (386) and 64-bit (amd64) x86 processor architectures.

    @@ -49,7 +49,7 @@ If your OS or architecture is not on the list, you may be able to
    FreeBSD 10.3 or later amd64, 386 Debian GNU/kFreeBSD not supported Linux 2.6.23 or later with glibc amd64, 386, arm, arm64,
    s390x, ppc64le CentOS/RHEL 5.x not supported.
    Install from source for other libc. -macOS 10.10 or later amd64 use the clang or gcc that comes with Xcode for cgo support +macOS 10.11 or later amd64 use the clang or gcc that comes with Xcode for cgo support Windows 7, Server 2008R2 or later amd64, 386 use MinGW (386) or MinGW-W64 (amd64) gcc.
    No need for cygwin or msys. -- cgit v1.2.3-54-g00ecf From 620208790ec55a8af69f09f5793173a23375f46e Mon Sep 17 00:00:00 2001 From: Katie Hockman Date: Wed, 1 Apr 2020 13:49:57 -0400 Subject: crypto/tls: add missing alert values Fixes #35911 Change-Id: I093d25aa169963769b51c37d2481bce71bd0fd2f Reviewed-on: https://go-review.googlesource.com/c/go/+/226858 Run-TryBot: Katie Hockman TryBot-Result: Gobot Gobot Reviewed-by: Filippo Valsorda --- src/crypto/tls/alert.go | 120 ++++++++++++++++++++++++++---------------------- 1 file changed, 66 insertions(+), 54 deletions(-) diff --git a/src/crypto/tls/alert.go b/src/crypto/tls/alert.go index 22b3eca92f..4790b73724 100644 --- a/src/crypto/tls/alert.go +++ b/src/crypto/tls/alert.go @@ -15,63 +15,75 @@ const ( ) const ( - alertCloseNotify alert = 0 - alertUnexpectedMessage alert = 10 - alertBadRecordMAC alert = 20 - alertDecryptionFailed alert = 21 - alertRecordOverflow alert = 22 - alertDecompressionFailure alert = 30 - alertHandshakeFailure alert = 40 - alertBadCertificate alert = 42 - alertUnsupportedCertificate alert = 43 - alertCertificateRevoked alert = 44 - alertCertificateExpired alert = 45 - alertCertificateUnknown alert = 46 - alertIllegalParameter alert = 47 - alertUnknownCA alert = 48 - alertAccessDenied alert = 49 - alertDecodeError alert = 50 - alertDecryptError alert = 51 - alertProtocolVersion alert = 70 - alertInsufficientSecurity alert = 71 - alertInternalError alert = 80 - alertInappropriateFallback alert = 86 - alertUserCanceled alert = 90 - alertNoRenegotiation alert = 100 - alertMissingExtension alert = 109 - alertUnsupportedExtension alert = 110 - alertUnrecognizedName alert = 112 - alertNoApplicationProtocol alert = 120 + alertCloseNotify alert = 0 + alertUnexpectedMessage alert = 10 + alertBadRecordMAC alert = 20 + alertDecryptionFailed alert = 21 + alertRecordOverflow alert = 22 + alertDecompressionFailure alert = 30 + alertHandshakeFailure alert = 40 + alertBadCertificate alert = 42 + alertUnsupportedCertificate alert = 43 + alertCertificateRevoked alert = 44 + alertCertificateExpired alert = 45 + alertCertificateUnknown alert = 46 + alertIllegalParameter alert = 47 + alertUnknownCA alert = 48 + alertAccessDenied alert = 49 + alertDecodeError alert = 50 + alertDecryptError alert = 51 + alertExportRestriction alert = 60 + alertProtocolVersion alert = 70 + alertInsufficientSecurity alert = 71 + alertInternalError alert = 80 + alertInappropriateFallback alert = 86 + alertUserCanceled alert = 90 + alertNoRenegotiation alert = 100 + alertMissingExtension alert = 109 + alertUnsupportedExtension alert = 110 + alertCertificateUnobtainable alert = 111 + alertUnrecognizedName alert = 112 + alertBadCertificateStatusResponse alert = 113 + alertBadCertificateHashValue alert = 114 + alertUnknownPSKIdentity alert = 115 + alertCertificateRequired alert = 116 + alertNoApplicationProtocol alert = 120 ) var alertText = map[alert]string{ - alertCloseNotify: "close notify", - alertUnexpectedMessage: "unexpected message", - alertBadRecordMAC: "bad record MAC", - alertDecryptionFailed: "decryption failed", - alertRecordOverflow: "record overflow", - alertDecompressionFailure: "decompression failure", - alertHandshakeFailure: "handshake failure", - alertBadCertificate: "bad certificate", - alertUnsupportedCertificate: "unsupported certificate", - alertCertificateRevoked: "revoked certificate", - alertCertificateExpired: "expired certificate", - alertCertificateUnknown: "unknown certificate", - alertIllegalParameter: "illegal parameter", - alertUnknownCA: "unknown certificate authority", - alertAccessDenied: "access denied", - alertDecodeError: "error decoding message", - alertDecryptError: "error decrypting message", - alertProtocolVersion: "protocol version not supported", - alertInsufficientSecurity: "insufficient security level", - alertInternalError: "internal error", - alertInappropriateFallback: "inappropriate fallback", - alertUserCanceled: "user canceled", - alertNoRenegotiation: "no renegotiation", - alertMissingExtension: "missing extension", - alertUnsupportedExtension: "unsupported extension", - alertUnrecognizedName: "unrecognized name", - alertNoApplicationProtocol: "no application protocol", + alertCloseNotify: "close notify", + alertUnexpectedMessage: "unexpected message", + alertBadRecordMAC: "bad record MAC", + alertDecryptionFailed: "decryption failed", + alertRecordOverflow: "record overflow", + alertDecompressionFailure: "decompression failure", + alertHandshakeFailure: "handshake failure", + alertBadCertificate: "bad certificate", + alertUnsupportedCertificate: "unsupported certificate", + alertCertificateRevoked: "revoked certificate", + alertCertificateExpired: "expired certificate", + alertCertificateUnknown: "unknown certificate", + alertIllegalParameter: "illegal parameter", + alertUnknownCA: "unknown certificate authority", + alertAccessDenied: "access denied", + alertDecodeError: "error decoding message", + alertDecryptError: "error decrypting message", + alertExportRestriction: "export restriction", + alertProtocolVersion: "protocol version not supported", + alertInsufficientSecurity: "insufficient security level", + alertInternalError: "internal error", + alertInappropriateFallback: "inappropriate fallback", + alertUserCanceled: "user canceled", + alertNoRenegotiation: "no renegotiation", + alertMissingExtension: "missing extension", + alertUnsupportedExtension: "unsupported extension", + alertCertificateUnobtainable: "certificate unobtainable", + alertUnrecognizedName: "unrecognized name", + alertBadCertificateStatusResponse: "bad certificate status response", + alertBadCertificateHashValue: "bad certificate hash value", + alertUnknownPSKIdentity: "unknown PSK identity", + alertCertificateRequired: "certificate required", + alertNoApplicationProtocol: "no application protocol", } func (e alert) String() string { -- cgit v1.2.3-54-g00ecf From 2681efaf0e0457ef7f4246033fe0e97e8d352172 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Tue, 31 Mar 2020 10:49:31 -0700 Subject: os/signal, runtime: remove runtime sigqueue initialization We can initialize the runtime sigqueue packages on first use. We don't require an explicit initialization step. So, remove it. Change-Id: I484e02dc2c67395fd5584f35ecda2e28b37168df Reviewed-on: https://go-review.googlesource.com/c/go/+/226540 Run-TryBot: Ian Lance Taylor Reviewed-by: Bryan C. Mills --- src/os/signal/signal.go | 14 ++++++++------ src/os/signal/signal_plan9.go | 4 +--- src/os/signal/signal_unix.go | 2 -- src/runtime/sigqueue.go | 9 +++------ src/runtime/sigqueue_plan9.go | 5 +---- 5 files changed, 13 insertions(+), 21 deletions(-) diff --git a/src/os/signal/signal.go b/src/os/signal/signal.go index 136dd9cc97..8e31aa2627 100644 --- a/src/os/signal/signal.go +++ b/src/os/signal/signal.go @@ -122,12 +122,6 @@ func Notify(c chan<- os.Signal, sig ...os.Signal) { panic("os/signal: Notify using nil channel") } - watchSignalLoopOnce.Do(func() { - if watchSignalLoop != nil { - go watchSignalLoop() - } - }) - handlers.Lock() defer handlers.Unlock() @@ -148,6 +142,14 @@ func Notify(c chan<- os.Signal, sig ...os.Signal) { h.set(n) if handlers.ref[n] == 0 { enableSignal(n) + + // The runtime requires that we enable a + // signal before starting the watcher. + watchSignalLoopOnce.Do(func() { + if watchSignalLoop != nil { + go watchSignalLoop() + } + }) } handlers.ref[n]++ } diff --git a/src/os/signal/signal_plan9.go b/src/os/signal/signal_plan9.go index 8408607c7f..7d4871518a 100644 --- a/src/os/signal/signal_plan9.go +++ b/src/os/signal/signal_plan9.go @@ -11,7 +11,7 @@ import ( var sigtab = make(map[os.Signal]int) -// In sig.s; jumps to runtime. +// Defined by the runtime package. func signal_disable(uint32) func signal_enable(uint32) func signal_ignore(uint32) @@ -19,8 +19,6 @@ func signal_ignored(uint32) bool func signal_recv() string func init() { - signal_enable(0) // first call - initialize - watchSignalLoop = loop } diff --git a/src/os/signal/signal_unix.go b/src/os/signal/signal_unix.go index 89ee2d9e18..90a1eca156 100644 --- a/src/os/signal/signal_unix.go +++ b/src/os/signal/signal_unix.go @@ -25,8 +25,6 @@ func loop() { } func init() { - signal_enable(0) // first call - initialize - watchSignalLoop = loop } diff --git a/src/runtime/sigqueue.go b/src/runtime/sigqueue.go index b2ebb2b457..3bf07cb5a6 100644 --- a/src/runtime/sigqueue.go +++ b/src/runtime/sigqueue.go @@ -192,16 +192,13 @@ func signalWaitUntilIdle() { //go:linkname signal_enable os/signal.signal_enable func signal_enable(s uint32) { if !sig.inuse { - // The first call to signal_enable is for us - // to use for initialization. It does not pass - // signal information in m. + // This is the first call to signal_enable. Initialize. sig.inuse = true // enable reception of signals; cannot disable if GOOS == "darwin" { sigNoteSetup(&sig.note) - return + } else { + noteclear(&sig.note) } - noteclear(&sig.note) - return } if s >= uint32(len(sig.wanted)*32) { diff --git a/src/runtime/sigqueue_plan9.go b/src/runtime/sigqueue_plan9.go index 934742a1f4..d5fe8f8b35 100644 --- a/src/runtime/sigqueue_plan9.go +++ b/src/runtime/sigqueue_plan9.go @@ -134,12 +134,9 @@ func signalWaitUntilIdle() { //go:linkname signal_enable os/signal.signal_enable func signal_enable(s uint32) { if !sig.inuse { - // The first call to signal_enable is for us - // to use for initialization. It does not pass - // signal information in m. + // This is the first call to signal_enable. Initialize. sig.inuse = true // enable reception of signals; cannot disable noteclear(&sig.note) - return } } -- cgit v1.2.3-54-g00ecf From 95773ab9b053edc43ba07a182f3d5e0e29775a45 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Wed, 1 Apr 2020 16:34:50 -0700 Subject: sync/atomic: fix TestSwapPointer test It plays way too loose with unsafe.Pointer rules. It runs afoul of the checkptr rules, so some race detector builds were failing. Fixes #38210 Change-Id: I5e1c78201d06295524fdedb3fe5b49d61446f443 Reviewed-on: https://go-review.googlesource.com/c/go/+/226880 Run-TryBot: Keith Randall TryBot-Result: Gobot Gobot Reviewed-by: Josh Bleecher Snyder --- src/sync/atomic/atomic_test.go | 68 +++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/src/sync/atomic/atomic_test.go b/src/sync/atomic/atomic_test.go index 286eadc6cd..83e7c8d763 100644 --- a/src/sync/atomic/atomic_test.go +++ b/src/sync/atomic/atomic_test.go @@ -153,6 +153,21 @@ func TestSwapUintptr(t *testing.T) { } } +var global [1024]byte + +func testPointers() []unsafe.Pointer { + var pointers []unsafe.Pointer + // globals + for i := 0; i < 10; i++ { + pointers = append(pointers, unsafe.Pointer(&global[1< delta; delta += delta { - k := SwapPointer(&x.i, unsafe.Pointer(delta)) - if uintptr(x.i) != delta || uintptr(k) != j { - t.Fatalf("delta=%d i=%d j=%d k=%d", delta, x.i, j, k) + var j unsafe.Pointer + + for _, p := range testPointers() { + k := SwapPointer(&x.i, p) + if x.i != p || k != j { + t.Fatalf("p=%p i=%p j=%p k=%p", p, x.i, j, k) } - j = delta + j = p } if x.before != magicptr || x.after != magicptr { t.Fatalf("wrong magic: %#x _ %#x != %#x _ %#x", x.before, x.after, magicptr, magicptr) @@ -456,20 +472,20 @@ func TestCompareAndSwapPointer(t *testing.T) { magicptr := uintptr(m) x.before = magicptr x.after = magicptr - for val := uintptr(1 << 16); val+val > val; val += val { - x.i = unsafe.Pointer(val) - if !CompareAndSwapPointer(&x.i, unsafe.Pointer(val), unsafe.Pointer(val+1)) { - t.Fatalf("should have swapped %#x %#x", val, val+1) + q := unsafe.Pointer(new(byte)) + for _, p := range testPointers() { + x.i = p + if !CompareAndSwapPointer(&x.i, p, q) { + t.Fatalf("should have swapped %p %p", p, q) } - if x.i != unsafe.Pointer(val+1) { - t.Fatalf("wrong x.i after swap: x.i=%#x val+1=%#x", x.i, val+1) + if x.i != q { + t.Fatalf("wrong x.i after swap: x.i=%p want %p", x.i, q) } - x.i = unsafe.Pointer(val + 1) - if CompareAndSwapPointer(&x.i, unsafe.Pointer(val), unsafe.Pointer(val+2)) { - t.Fatalf("should not have swapped %#x %#x", val, val+2) + if CompareAndSwapPointer(&x.i, p, nil) { + t.Fatalf("should not have swapped %p nil", p) } - if x.i != unsafe.Pointer(val+1) { - t.Fatalf("wrong x.i after swap: x.i=%#x val+1=%#x", x.i, val+1) + if x.i != q { + t.Fatalf("wrong x.i after swap: x.i=%p want %p", x.i, q) } } if x.before != magicptr || x.after != magicptr { @@ -595,12 +611,12 @@ func TestLoadPointer(t *testing.T) { magicptr := uintptr(m) x.before = magicptr x.after = magicptr - for delta := uintptr(1 << 16); delta+delta > delta; delta += delta { + for _, p := range testPointers() { + x.i = p k := LoadPointer(&x.i) - if k != x.i { - t.Fatalf("delta=%d i=%d k=%d", delta, x.i, k) + if k != p { + t.Fatalf("p=%x k=%x", p, k) } - x.i = unsafe.Pointer(uintptr(x.i) + delta) } if x.before != magicptr || x.after != magicptr { t.Fatalf("wrong magic: %#x _ %#x != %#x _ %#x", x.before, x.after, magicptr, magicptr) @@ -730,13 +746,11 @@ func TestStorePointer(t *testing.T) { magicptr := uintptr(m) x.before = magicptr x.after = magicptr - v := unsafe.Pointer(uintptr(0)) - for delta := uintptr(1 << 16); delta+delta > delta; delta += delta { - StorePointer(&x.i, unsafe.Pointer(v)) - if x.i != v { - t.Fatalf("delta=%d i=%d v=%d", delta, x.i, v) + for _, p := range testPointers() { + StorePointer(&x.i, p) + if x.i != p { + t.Fatalf("x.i=%p p=%p", x.i, p) } - v = unsafe.Pointer(uintptr(v) + delta) } if x.before != magicptr || x.after != magicptr { t.Fatalf("wrong magic: %#x _ %#x != %#x _ %#x", x.before, x.after, magicptr, magicptr) -- cgit v1.2.3-54-g00ecf From 9667294d8f5c8c6e2c48efa1ced98cb7e9cfaf51 Mon Sep 17 00:00:00 2001 From: Alex Brainman Date: Tue, 25 Feb 2020 18:44:55 +1100 Subject: syscall: fix windows WSASendto -d=checkptr violation WSASendto converts unsafe.Pointer to *syscall.RawSockaddrAny. But that violates every rule of https://golang.org/pkg/unsafe/#Pointer Implement WSASendto by calling Windows WSASendTo API by calling syscall.Syscall9 directly. This allows us to comply with (4) Conversion of a Pointer to a uintptr when calling syscall.Syscall rule. After this change, this commands succeeds: go test -a -short -gcflags=all=-d=checkptr -run=TestPacketConn net Updates #34972 Change-Id: Ib9a810bedf9e05251b7d3c7f69e15bfbd177ac62 Reviewed-on: https://go-review.googlesource.com/c/go/+/220544 Run-TryBot: Alex Brainman TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/syscall/syscall_windows.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/syscall/syscall_windows.go b/src/syscall/syscall_windows.go index 950c281e4d..922cf2cb2e 100644 --- a/src/syscall/syscall_windows.go +++ b/src/syscall/syscall_windows.go @@ -871,11 +871,19 @@ func Shutdown(fd Handle, how int) (err error) { } func WSASendto(s Handle, bufs *WSABuf, bufcnt uint32, sent *uint32, flags uint32, to Sockaddr, overlapped *Overlapped, croutine *byte) (err error) { - rsa, l, err := to.sockaddr() + rsa, len, err := to.sockaddr() if err != nil { return err } - return WSASendTo(s, bufs, bufcnt, sent, flags, (*RawSockaddrAny)(unsafe.Pointer(rsa)), l, overlapped, croutine) + r1, _, e1 := Syscall9(procWSASendTo.Addr(), 9, uintptr(s), uintptr(unsafe.Pointer(bufs)), uintptr(bufcnt), uintptr(unsafe.Pointer(sent)), uintptr(flags), uintptr(unsafe.Pointer(rsa)), uintptr(len), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(croutine))) + if r1 == socket_error { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = EINVAL + } + } + return err } func LoadGetAddrInfo() error { -- cgit v1.2.3-54-g00ecf From 801cd7c84d42dcf18256416524aa0d31d6305830 Mon Sep 17 00:00:00 2001 From: Alex Brainman Date: Sat, 7 Mar 2020 11:08:06 +1100 Subject: internal/syscall/windows: change WSAMsg.Name type The problem was discovered while running go test -a -short -gcflags=all=-d=checkptr -run=TestUDPConnSpecificMethods net WSAMsg is type defined by Windows. And WSAMsg.Name could point to two different structures for IPv4 and IPV6 sockets. Currently WSAMsg.Name is declared as *syscall.RawSockaddrAny. But that violates (1) Conversion of a *T1 to Pointer to *T2. rule of https://golang.org/pkg/unsafe/#Pointer When we convert *syscall.RawSockaddrInet4 into *syscall.RawSockaddrAny, syscall.RawSockaddrInet4 and syscall.RawSockaddrAny do not share an equivalent memory layout. Same for *syscall.SockaddrInet6 into *syscall.RawSockaddrAny. This CL changes WSAMsg.Name type to *syscall.Pointer. syscall.Pointer length is 0, and that at least makes type checker happy. After this change I was able to run go test -a -short -gcflags=all=-d=checkptr std cmd without type checker complaining. Updates #34972 Change-Id: Ic5c2321c20abd805c687ee16ef6f643a2f8cd93f Reviewed-on: https://go-review.googlesource.com/c/go/+/222457 Run-TryBot: Alex Brainman TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/internal/poll/fd_windows.go | 4 ++-- src/internal/syscall/windows/syscall_windows.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/internal/poll/fd_windows.go b/src/internal/poll/fd_windows.go index 4b2623ea8f..1a0bdb34fe 100644 --- a/src/internal/poll/fd_windows.go +++ b/src/internal/poll/fd_windows.go @@ -999,7 +999,7 @@ func (fd *FD) ReadMsg(p []byte, oob []byte) (int, int, int, syscall.Sockaddr, er o := &fd.rop o.InitMsg(p, oob) o.rsa = new(syscall.RawSockaddrAny) - o.msg.Name = o.rsa + o.msg.Name = (syscall.Pointer)(unsafe.Pointer(o.rsa)) o.msg.Namelen = int32(unsafe.Sizeof(*o.rsa)) n, err := execIO(o, func(o *operation) error { return windows.WSARecvMsg(o.fd.Sysfd, &o.msg, &o.qty, &o.o, nil) @@ -1030,7 +1030,7 @@ func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, err if err != nil { return 0, 0, err } - o.msg.Name = (*syscall.RawSockaddrAny)(rsa) + o.msg.Name = (syscall.Pointer)(rsa) o.msg.Namelen = len } n, err := execIO(o, func(o *operation) error { diff --git a/src/internal/syscall/windows/syscall_windows.go b/src/internal/syscall/windows/syscall_windows.go index dc641116ba..f299adc45f 100644 --- a/src/internal/syscall/windows/syscall_windows.go +++ b/src/internal/syscall/windows/syscall_windows.go @@ -176,7 +176,7 @@ var sendRecvMsgFunc struct { } type WSAMsg struct { - Name *syscall.RawSockaddrAny + Name syscall.Pointer Namelen int32 Buffers *syscall.WSABuf BufferCount uint32 -- cgit v1.2.3-54-g00ecf From a7a0f0305035948f4c86e08e6e64409ab11a6f67 Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Wed, 1 Apr 2020 03:21:03 -0700 Subject: cmd/compile: mark 'store multiple' as clobbering flags on s390x Store multiple instructions can clobber flags on s390x when the offset passed into the assembler is outside the range representable with a signed 20 bit integer. This is because the assembler uses the agfi instruction to implement the large offset. The assembler could use a different sequence of instructions, but for now just mark the instruction as 'clobberFlags' since this is risk free. Noticed while investigating #38195. No test yet since I'm not sure how to get this bug to trigger and I haven't seen it affect real code. Change-Id: I4a6ab96455a3ef8ffacb76ef0166b97eb40ff925 Reviewed-on: https://go-review.googlesource.com/c/go/+/226759 Run-TryBot: Michael Munday TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/ssa/gen/S390XOps.go | 6 ++++++ src/cmd/compile/internal/ssa/opGen.go | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/src/cmd/compile/internal/ssa/gen/S390XOps.go b/src/cmd/compile/internal/ssa/gen/S390XOps.go index 283a0fa6b5..65460bf6f7 100644 --- a/src/cmd/compile/internal/ssa/gen/S390XOps.go +++ b/src/cmd/compile/internal/ssa/gen/S390XOps.go @@ -630,6 +630,7 @@ func init() { asm: "STMG", faultOnNilArg0: true, symEffect: "Write", + clobberFlags: true, // TODO(mundaym): currently uses AGFI to handle large offsets }, { name: "STMG3", @@ -640,6 +641,7 @@ func init() { asm: "STMG", faultOnNilArg0: true, symEffect: "Write", + clobberFlags: true, // TODO(mundaym): currently uses AGFI to handle large offsets }, { name: "STMG4", @@ -657,6 +659,7 @@ func init() { asm: "STMG", faultOnNilArg0: true, symEffect: "Write", + clobberFlags: true, // TODO(mundaym): currently uses AGFI to handle large offsets }, { name: "STM2", @@ -667,6 +670,7 @@ func init() { asm: "STMY", faultOnNilArg0: true, symEffect: "Write", + clobberFlags: true, // TODO(mundaym): currently uses AGFI to handle large offsets }, { name: "STM3", @@ -677,6 +681,7 @@ func init() { asm: "STMY", faultOnNilArg0: true, symEffect: "Write", + clobberFlags: true, // TODO(mundaym): currently uses AGFI to handle large offsets }, { name: "STM4", @@ -694,6 +699,7 @@ func init() { asm: "STMY", faultOnNilArg0: true, symEffect: "Write", + clobberFlags: true, // TODO(mundaym): currently uses AGFI to handle large offsets }, // large move diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 46ca7936dc..bf48bff8f1 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -30110,6 +30110,7 @@ var opcodeTable = [...]opInfo{ name: "STMG2", auxType: auxSymOff, argLen: 4, + clobberFlags: true, faultOnNilArg0: true, symEffect: SymWrite, asm: s390x.ASTMG, @@ -30125,6 +30126,7 @@ var opcodeTable = [...]opInfo{ name: "STMG3", auxType: auxSymOff, argLen: 5, + clobberFlags: true, faultOnNilArg0: true, symEffect: SymWrite, asm: s390x.ASTMG, @@ -30141,6 +30143,7 @@ var opcodeTable = [...]opInfo{ name: "STMG4", auxType: auxSymOff, argLen: 6, + clobberFlags: true, faultOnNilArg0: true, symEffect: SymWrite, asm: s390x.ASTMG, @@ -30158,6 +30161,7 @@ var opcodeTable = [...]opInfo{ name: "STM2", auxType: auxSymOff, argLen: 4, + clobberFlags: true, faultOnNilArg0: true, symEffect: SymWrite, asm: s390x.ASTMY, @@ -30173,6 +30177,7 @@ var opcodeTable = [...]opInfo{ name: "STM3", auxType: auxSymOff, argLen: 5, + clobberFlags: true, faultOnNilArg0: true, symEffect: SymWrite, asm: s390x.ASTMY, @@ -30189,6 +30194,7 @@ var opcodeTable = [...]opInfo{ name: "STM4", auxType: auxSymOff, argLen: 6, + clobberFlags: true, faultOnNilArg0: true, symEffect: SymWrite, asm: s390x.ASTMY, -- cgit v1.2.3-54-g00ecf From 1dbcbcfca4692f67db7de2c1ff6a5ee59511cfa4 Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Thu, 19 Dec 2019 08:15:06 +0000 Subject: cmd/asm: align an instruction or a function's address Recently, the gVisor project needs an instruction's address with 128 bytes alignment to fit the architecture requirement for interrupt table. This patch allows aligning an instruction's address to be aligned to a specific value (2^n and in the range [8, 2048]) The main changes include: 1. Adds a new element in the FuncInfo structure defined in cmd/internal/obj/link.go file to record the alignment information. 2. Adds a new element in the Func structure defined in cmd/internal/goobj/read.go file to read the alignment information. 3. Adds the assembler support to align an intruction's offset with a specific value (2^n and in the range [8, 2048]). e.g. "PCALIGN $256" indicates that the next instruction should be aligned to 256 bytes. 4. An instruction's alignment is relative to the start of the function where this instruction is located, so the function's address must be aligned to the same or coarser boundary. This CL also adds a test. Change-Id: I9b365c111b3a12f767728f1b45aa0c00f073c37d Reviewed-on: https://go-review.googlesource.com/c/go/+/226997 Reviewed-by: Bryan C. Mills Reviewed-by: Cherry Zhang Run-TryBot: Bryan C. Mills TryBot-Result: Gobot Gobot --- src/cmd/internal/goobj/read.go | 2 + src/cmd/internal/obj/arm64/asm7.go | 33 +++++---------- src/cmd/internal/obj/arm64/asm_test.go | 27 ++++++++++-- src/cmd/internal/obj/link.go | 1 + src/cmd/internal/obj/objfile.go | 1 + src/cmd/link/internal/ld/data.go | 4 ++ src/cmd/link/internal/objfile/objfile.go | 1 + src/cmd/link/link_test.go | 70 ++++++++++++++++++++++++++++++++ 8 files changed, 114 insertions(+), 25 deletions(-) diff --git a/src/cmd/internal/goobj/read.go b/src/cmd/internal/goobj/read.go index e61e95dcc8..48537d2b1c 100644 --- a/src/cmd/internal/goobj/read.go +++ b/src/cmd/internal/goobj/read.go @@ -95,6 +95,7 @@ type Var struct { type Func struct { Args int64 // size in bytes of argument frame: inputs and outputs Frame int64 // size in bytes of local variable frame + Align uint32 // alignment requirement in bytes for the address of the function Leaf bool // function omits save of link register (ARM) NoSplit bool // function omits stack split prologue TopFrame bool // function is the top of the call stack @@ -590,6 +591,7 @@ func (r *objReader) parseObject(prefix []byte) error { s.Func = f f.Args = r.readInt() f.Frame = r.readInt() + f.Align = uint32(r.readInt()) flags := r.readInt() f.Leaf = flags&(1<<0) != 0 f.TopFrame = flags&(1<<4) != 0 diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index e8b092a2a8..8e5b598084 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -886,25 +886,10 @@ const OP_NOOP = 0xd503201f // align code to a certain length by padding bytes. func pcAlignPadLength(pc int64, alignedValue int64, ctxt *obj.Link) int { - switch alignedValue { - case 8: - if pc%8 == 4 { - return 4 - } - case 16: - switch pc % 16 { - case 4: - return 12 - case 8: - return 8 - case 12: - return 4 - } - default: - ctxt.Diag("Unexpected alignment: %d for PCALIGN directive\n", alignedValue) + if !((alignedValue&(alignedValue-1) == 0) && 8 <= alignedValue && alignedValue <= 2048) { + ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", alignedValue) } - - return 0 + return int(-pc & (alignedValue - 1)) } func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { @@ -940,8 +925,12 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if m == 0 { switch p.As { case obj.APCALIGN: - a := p.From.Offset - m = pcAlignPadLength(pc, a, ctxt) + alignedValue := p.From.Offset + m = pcAlignPadLength(pc, alignedValue, ctxt) + // Update the current text symbol alignment value. + if int32(alignedValue) > cursym.Func.Align { + cursym.Func.Align = int32(alignedValue) + } break case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: continue @@ -1017,8 +1006,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if m == 0 { switch p.As { case obj.APCALIGN: - a := p.From.Offset - m = pcAlignPadLength(pc, a, ctxt) + alignedValue := p.From.Offset + m = pcAlignPadLength(pc, alignedValue, ctxt) break case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: continue diff --git a/src/cmd/internal/obj/arm64/asm_test.go b/src/cmd/internal/obj/arm64/asm_test.go index 1691828739..9efdb0217f 100644 --- a/src/cmd/internal/obj/arm64/asm_test.go +++ b/src/cmd/internal/obj/arm64/asm_test.go @@ -18,7 +18,9 @@ import ( // TestLarge generates a very large file to verify that large // program builds successfully, in particular, too-far -// conditional branches are fixed. +// conditional branches are fixed, and also verify that the +// instruction's pc can be correctly aligned even when branches +// need to be fixed. func TestLarge(t *testing.T) { if testing.Short() { t.Skip("Skip in short mode") @@ -41,10 +43,27 @@ func TestLarge(t *testing.T) { t.Fatalf("can't write output: %v\n", err) } - // build generated file - cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) + pattern := `0x0080\s00128\s\(.*\)\tMOVD\t\$3,\sR3` + + // assemble generated file + cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-S", "-o", filepath.Join(dir, "test.o"), tmpfile) cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux") out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("Assemble failed: %v, output: %s", err, out) + } + matched, err := regexp.MatchString(pattern, string(out)) + if err != nil { + t.Fatal(err) + } + if !matched { + t.Errorf("The alignment is not correct: %t, output:%s\n", matched, out) + } + + // build generated file + cmd = exec.Command(testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile) + cmd.Env = append(os.Environ(), "GOARCH=arm64", "GOOS=linux") + out, err = cmd.CombinedOutput() if err != nil { t.Errorf("Build failed: %v, output: %s", err, out) } @@ -56,6 +75,8 @@ func gen(buf *bytes.Buffer) { fmt.Fprintln(buf, "TBZ $5, R0, label") fmt.Fprintln(buf, "CBZ R0, label") fmt.Fprintln(buf, "BEQ label") + fmt.Fprintln(buf, "PCALIGN $128") + fmt.Fprintln(buf, "MOVD $3, R3") for i := 0; i < 1<<19; i++ { fmt.Fprintln(buf, "MOVD R0, R1") } diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index d1cc536a8c..0879c611ba 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -398,6 +398,7 @@ type LSym struct { type FuncInfo struct { Args int32 Locals int32 + Align int32 Text *Prog Autot map[*LSym]struct{} Pcln Pcln diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index 7fd97f7363..46e8a551ad 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -346,6 +346,7 @@ func (w *objWriter) writeSym(s *LSym) { w.writeInt(int64(s.Func.Args)) w.writeInt(int64(s.Func.Locals)) + w.writeInt(int64(s.Func.Align)) w.writeBool(s.NoSplit()) flags = int64(0) if s.Leaf() { diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go index 7ca01c8c25..31613e5cef 100644 --- a/src/cmd/link/internal/ld/data.go +++ b/src/cmd/link/internal/ld/data.go @@ -2119,6 +2119,10 @@ func assignAddress(ctxt *Link, sect *sym.Section, n int, s *sym.Symbol, va uint6 funcsize = uint64(s.Size) } + if sect.Align < s.Align { + sect.Align = s.Align + } + // On ppc64x a text section should not be larger than 2^26 bytes due to the size of // call target offset field in the bl instruction. Splitting into smaller text // sections smaller than this limit allows the GNU linker to modify the long calls diff --git a/src/cmd/link/internal/objfile/objfile.go b/src/cmd/link/internal/objfile/objfile.go index a15d3c3e07..295acb2d29 100644 --- a/src/cmd/link/internal/objfile/objfile.go +++ b/src/cmd/link/internal/objfile/objfile.go @@ -312,6 +312,7 @@ overwrite: pc.Args = r.readInt32() pc.Locals = r.readInt32() + s.Align = r.readInt32() if r.readUint8() != 0 { s.Attr |= sym.AttrNoSplit } diff --git a/src/cmd/link/link_test.go b/src/cmd/link/link_test.go index 4f792bd1f1..7d87093813 100644 --- a/src/cmd/link/link_test.go +++ b/src/cmd/link/link_test.go @@ -447,3 +447,73 @@ func TestStrictDup(t *testing.T) { t.Errorf("unexpected output:\n%s", out) } } + +const testFuncAlignSrc = ` +package main +import ( + "fmt" + "reflect" +) +func alignPc() + +func main() { + addr := reflect.ValueOf(alignPc).Pointer() + if (addr % 512) != 0 { + fmt.Printf("expected 512 bytes alignment, got %v\n", addr) + } else { + fmt.Printf("PASS") + } +} +` + +const testFuncAlignAsmSrc = ` +#include "textflag.h" + +TEXT ·alignPc(SB),NOSPLIT, $0-0 + MOVD $2, R0 + PCALIGN $512 + MOVD $3, R1 + RET +` + +// TestFuncAlign verifies that the address of a function can be aligned +// with a specfic value on arm64. +func TestFuncAlign(t *testing.T) { + if runtime.GOARCH != "arm64" || runtime.GOOS != "linux" { + t.Skip("skipping on non-linux/arm64 platform") + } + testenv.MustHaveGoBuild(t) + + tmpdir, err := ioutil.TempDir("", "TestFuncAlign") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpdir) + + src := filepath.Join(tmpdir, "falign.go") + err = ioutil.WriteFile(src, []byte(testFuncAlignSrc), 0666) + if err != nil { + t.Fatal(err) + } + src = filepath.Join(tmpdir, "falign.s") + err = ioutil.WriteFile(src, []byte(testFuncAlignAsmSrc), 0666) + if err != nil { + t.Fatal(err) + } + + // Build and run with old object file format. + cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "falign") + cmd.Dir = tmpdir + out, err := cmd.CombinedOutput() + if err != nil { + t.Errorf("build failed: %v", err) + } + cmd = exec.Command(tmpdir + "/falign") + out, err = cmd.CombinedOutput() + if err != nil { + t.Errorf("failed to run with err %v, output: %s", err, out) + } + if string(out) != "PASS" { + t.Errorf("unexpected output: %s\n", out) + } +} -- cgit v1.2.3-54-g00ecf From aa4d92b8aab63c847ab077417b809694a2a6ea81 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Wed, 26 Feb 2020 02:48:24 +1100 Subject: cmd/link: skip symbol references when looking for missing symbols ErrorUnresolved attempts to find the missing symbol in another ABI, in order to provide more friendly error messages. However, in doing so it checks the same ABI and can find the symbol reference for the symbol that it is currently reporting the unresolved error for. Avoid this by ignoring SXREF symbols, which is the same behaviour used when linking is performed. Fixes #33979 Change-Id: I9bfc40146dec2666d25e93d3bcd1984da5c71215 Reviewed-on: https://go-review.googlesource.com/c/go/+/220917 Run-TryBot: Than McIntosh Run-TryBot: Austin Clements Reviewed-by: Austin Clements TryBot-Result: Gobot Gobot --- src/cmd/link/internal/ld/link.go | 2 +- src/cmd/link/link_test.go | 87 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/src/cmd/link/internal/ld/link.go b/src/cmd/link/internal/ld/link.go index 124f7d9001..df3845fac3 100644 --- a/src/cmd/link/internal/ld/link.go +++ b/src/cmd/link/internal/ld/link.go @@ -136,7 +136,7 @@ func (ctxt *Link) ErrorUnresolved(s *sym.Symbol, r *sym.Reloc) { if v == -1 { continue } - if rs := ctxt.Syms.ROLookup(r.Sym.Name, v); rs != nil && rs.Type != sym.Sxxx { + if rs := ctxt.Syms.ROLookup(r.Sym.Name, v); rs != nil && rs.Type != sym.Sxxx && rs.Type != sym.SXREF { haveABI = abi } } diff --git a/src/cmd/link/link_test.go b/src/cmd/link/link_test.go index 7d87093813..b1f0e8882c 100644 --- a/src/cmd/link/link_test.go +++ b/src/cmd/link/link_test.go @@ -172,6 +172,93 @@ main.x: relocation target main.zero not defined } } +func TestIssue33979(t *testing.T) { + testenv.MustHaveGoBuild(t) + testenv.MustHaveCGO(t) + + // Skip test on platforms that do not support cgo internal linking. + switch runtime.GOARCH { + case "mips", "mipsle", "mips64", "mips64le": + t.Skipf("Skipping on %s/%s", runtime.GOOS, runtime.GOARCH) + } + if runtime.GOOS == "aix" { + t.Skipf("Skipping on %s/%s", runtime.GOOS, runtime.GOARCH) + } + + tmpdir, err := ioutil.TempDir("", "unresolved-") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpdir) + + write := func(name, content string) { + err := ioutil.WriteFile(filepath.Join(tmpdir, name), []byte(content), 0666) + if err != nil { + t.Fatal(err) + } + } + + run := func(name string, args ...string) string { + cmd := exec.Command(name, args...) + cmd.Dir = tmpdir + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("'go %s' failed: %v, output: %s", strings.Join(args, " "), err, out) + } + return string(out) + } + runGo := func(args ...string) string { + return run(testenv.GoToolPath(t), args...) + } + + // Test object with undefined reference that was not generated + // by Go, resulting in an SXREF symbol being loaded during linking. + // Because of issue #33979, the SXREF symbol would be found during + // error reporting, resulting in confusing error messages. + + write("main.go", `package main +func main() { + x() +} +func x() +`) + // The following assembly must work on all architectures. + write("x.s", ` +TEXT ·x(SB),0,$0 + CALL foo(SB) + RET +`) + write("x.c", ` +void undefined(); + +void foo() { + undefined(); +} +`) + + cc := strings.TrimSpace(runGo("env", "CC")) + cflags := strings.Fields(runGo("env", "GOGCCFLAGS")) + + // Compile, assemble and pack the Go and C code. + runGo("tool", "asm", "-gensymabis", "-o", "symabis", "x.s") + runGo("tool", "compile", "-symabis", "symabis", "-p", "main", "-o", "x1.o", "main.go") + runGo("tool", "asm", "-o", "x2.o", "x.s") + run(cc, append(cflags, "-c", "-o", "x3.o", "x.c")...) + runGo("tool", "pack", "c", "x.a", "x1.o", "x2.o", "x3.o") + + // Now attempt to link using the internal linker. + cmd := exec.Command(testenv.GoToolPath(t), "tool", "link", "-linkmode=internal", "x.a") + cmd.Dir = tmpdir + out, err := cmd.CombinedOutput() + if err == nil { + t.Fatalf("expected link to fail, but it succeeded") + } + re := regexp.MustCompile(`(?m)^main\(.*text\): relocation target undefined not defined$`) + if !re.Match(out) { + t.Fatalf("got:\n%q\nwant:\n%s", out, re) + } +} + func TestBuildForTvOS(t *testing.T) { testenv.MustHaveCGO(t) testenv.MustHaveGoBuild(t) -- cgit v1.2.3-54-g00ecf