diff options
author | Joel Sing <joel@sing.id.au> | 2023-04-29 06:07:42 +1000 |
---|---|---|
committer | Joel Sing <joel@sing.id.au> | 2023-07-26 12:39:29 +0000 |
commit | af297c35555d75e43bcd58f868534bdd022767bd (patch) | |
tree | 1ed36d96bed4a3f48502afdb0e39a909779800b5 | |
parent | 2a1ba6ebb12994f4df15236f03eb63116b4a0c4d (diff) | |
download | go-af297c35555d75e43bcd58f868534bdd022767bd.tar.gz go-af297c35555d75e43bcd58f868534bdd022767bd.zip |
cmd/internal/obj/arm64: factor out splitting of 24 bit unsigned scaled immediates
Rather than duplicating this code, factor it out into a function and
add test coverage.
Change-Id: I37ce568ded4659d98a4ff1361520c5fb2207e947
Reviewed-on: https://go-review.googlesource.com/c/go/+/512537
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
-rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64.s | 72 | ||||
-rw-r--r-- | src/cmd/internal/obj/arm64/asm7.go | 53 | ||||
-rw-r--r-- | src/cmd/internal/obj/arm64/asm_arm64_test.go | 158 |
3 files changed, 231 insertions, 52 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 2c7d638319..54f4de76d8 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -558,36 +558,60 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 FMOVQ 64(RSP), F11 // eb13c03d // large aligned offset, use two instructions(add+ldr/store). - MOVB R1, 0x1001(R2) // MOVB R1, 4097(R2) // 5b04409161070039 - MOVH R1, 0x2002(R2) // MOVH R1, 8194(R2) // 5b08409161070079 - MOVW R1, 0x4004(R2) // MOVW R1, 16388(R2) // 5b104091610700b9 - MOVD R1, 0x8008(R2) // MOVD R1, 32776(R2) // 5b204091610700f9 - FMOVS F1, 0x4004(R2) // FMOVS F1, 16388(R2) // 5b104091610700bd - FMOVD F1, 0x8008(R2) // FMOVD F1, 32776(R2) // 5b204091610700fd + MOVB R1, 0x1001(R2) // MOVB R1, 4097(R2) // 5b04409161070039 + MOVB R1, 0xffffff(R2) // MOVB R1, 16777215(R2) // 5bfc7f9161ff3f39 + MOVH R1, 0x2002(R2) // MOVH R1, 8194(R2) // 5b08409161070079 + MOVH R1, 0xfffffe(R2) // MOVH R1, 16777214(R2) // 5bf87f9161ff3f79 + MOVW R1, 0x4004(R2) // MOVW R1, 16388(R2) // 5b104091610700b9 + MOVW R1, 0xfffffc(R2) // MOVW R1, 16777212(R2) // 5bf07f9161ff3fb9 + MOVD R1, 0x8008(R2) // MOVD R1, 32776(R2) // 5b204091610700f9 + MOVD R1, 0xfffff8(R2) // MOVD R1, 16777208(R2) // 5be07f9161ff3ff9 + FMOVS F1, 0x4004(R2) // FMOVS F1, 16388(R2) // 5b104091610700bd + FMOVS F1, 0xfffffc(R2) // FMOVS F1, 16777212(R2) // 5bf07f9161ff3fbd + FMOVD F1, 0x8008(R2) // FMOVD F1, 32776(R2) // 5b204091610700fd + FMOVD F1, 0xfffff8(R2) // FMOVD F1, 16777208(R2) // 5be07f9161ff3ffd - MOVB 0x1001(R1), R2 // MOVB 4097(R1), R2 // 3b04409162078039 - MOVH 0x2002(R1), R2 // MOVH 8194(R1), R2 // 3b08409162078079 - MOVW 0x4004(R1), R2 // MOVW 16388(R1), R2 // 3b104091620780b9 - MOVD 0x8008(R1), R2 // MOVD 32776(R1), R2 // 3b204091620740f9 - FMOVS 0x4004(R1), F2 // FMOVS 16388(R1), F2 // 3b104091620740bd - FMOVD 0x8008(R1), F2 // FMOVD 32776(R1), F2 // 3b204091620740fd + MOVB 0x1001(R1), R2 // MOVB 4097(R1), R2 // 3b04409162078039 + MOVB 0xffffff(R1), R2 // MOVB 16777215(R1), R2 // 3bfc7f9162ffbf39 + MOVH 0x2002(R1), R2 // MOVH 8194(R1), R2 // 3b08409162078079 + MOVH 0xfffffe(R1), R2 // MOVH 16777214(R1), R2 // 3bf87f9162ffbf79 + MOVW 0x4004(R1), R2 // MOVW 16388(R1), R2 // 3b104091620780b9 + MOVW 0xfffffc(R1), R2 // MOVW 16777212(R1), R2 // 3bf07f9162ffbfb9 + MOVD 0x8008(R1), R2 // MOVD 32776(R1), R2 // 3b204091620740f9 + MOVD 0xfffff8(R1), R2 // MOVD 16777208(R1), R2 // 3be07f9162ff7ff9 + FMOVS 0x4004(R1), F2 // FMOVS 16388(R1), F2 // 3b104091620740bd + FMOVS 0xfffffc(R1), F2 // FMOVS 16777212(R1), F2 // 3bf07f9162ff7fbd + FMOVD 0x8008(R1), F2 // FMOVD 32776(R1), F2 // 3b204091620740fd + FMOVD 0xfffff8(R1), F2 // FMOVD 16777208(R1), F2 // 3be07f9162ff7ffd // very large or unaligned offset uses constant pool. // the encoding cannot be checked as the address of the constant pool is unknown. // here we only test that they can be assembled. - MOVB R1, 0x44332211(R2) // MOVB R1, 1144201745(R2) - MOVH R1, 0x44332211(R2) // MOVH R1, 1144201745(R2) - MOVW R1, 0x44332211(R2) // MOVW R1, 1144201745(R2) - MOVD R1, 0x44332211(R2) // MOVD R1, 1144201745(R2) - FMOVS F1, 0x44332211(R2) // FMOVS F1, 1144201745(R2) - FMOVD F1, 0x44332211(R2) // FMOVD F1, 1144201745(R2) + MOVB R1, 0x1000000(R2) // MOVB R1, 16777216(R2) + MOVB R1, 0x44332211(R2) // MOVB R1, 1144201745(R2) + MOVH R1, 0x1000000(R2) // MOVH R1, 16777216(R2) + MOVH R1, 0x44332211(R2) // MOVH R1, 1144201745(R2) + MOVW R1, 0x1000000(R2) // MOVW R1, 16777216(R2) + MOVW R1, 0x44332211(R2) // MOVW R1, 1144201745(R2) + MOVD R1, 0x1000000(R2) // MOVD R1, 16777216(R2) + MOVD R1, 0x44332211(R2) // MOVD R1, 1144201745(R2) + FMOVS F1, 0x1000000(R2) // FMOVS F1, 16777216(R2) + FMOVS F1, 0x44332211(R2) // FMOVS F1, 1144201745(R2) + FMOVD F1, 0x1000000(R2) // FMOVD F1, 16777216(R2) + FMOVD F1, 0x44332211(R2) // FMOVD F1, 1144201745(R2) - MOVB 0x44332211(R1), R2 // MOVB 1144201745(R1), R2 - MOVH 0x44332211(R1), R2 // MOVH 1144201745(R1), R2 - MOVW 0x44332211(R1), R2 // MOVW 1144201745(R1), R2 - MOVD 0x44332211(R1), R2 // MOVD 1144201745(R1), R2 - FMOVS 0x44332211(R1), F2 // FMOVS 1144201745(R1), F2 - FMOVD 0x44332211(R1), F2 // FMOVD 1144201745(R1), F2 + MOVB 0x1000000(R1), R2 // MOVB 16777216(R1), R2 + MOVB 0x44332211(R1), R2 // MOVB 1144201745(R1), R2 + MOVH 0x1000000(R1), R2 // MOVH 16777216(R1), R2 + MOVH 0x44332211(R1), R2 // MOVH 1144201745(R1), R2 + MOVW 0x1000000(R1), R2 // MOVW 16777216(R1), R2 + MOVW 0x44332211(R1), R2 // MOVW 1144201745(R1), R2 + MOVD 0x1000000(R1), R2 // MOVD 16777216(R1), R2 + MOVD 0x44332211(R1), R2 // MOVD 1144201745(R1), R2 + FMOVS 0x1000000(R1), F2 // FMOVS 16777216(R1), F2 + FMOVS 0x44332211(R1), F2 // FMOVS 1144201745(R1), F2 + FMOVD 0x1000000(R1), F2 // FMOVD 16777216(R1), F2 + FMOVD 0x44332211(R1), F2 // FMOVD 1144201745(R1), F2 // shifted or extended register offset. MOVD (R2)(R6.SXTW), R4 // 44c866f8 diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index d5f3f20410..88c46a80c5 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -1384,6 +1384,25 @@ func roundUp(x, to uint32) uint32 { return (x + to - 1) &^ (to - 1) } +// splitImm24uScaled splits an immediate into a scaled 12 bit unsigned lo value +// and an unscaled shifted 12 bit unsigned hi value. These are typically used +// by adding or subtracting the hi value and using the lo value as the offset +// for a load or store. +func splitImm24uScaled(v int32, shift int) (int32, int32, error) { + if v < 0 { + return 0, 0, fmt.Errorf("%d is not a 24 bit unsigned immediate", v) + } + if v&((1<<shift)-1) != 0 { + return 0, 0, fmt.Errorf("%d is not a multiple of %d", v, 1<<shift) + } + lo := (v >> shift) & 0xfff + hi := v - (lo << shift) + if hi&^0xfff000 != 0 { + return 0, 0, fmt.Errorf("%d is too large for a scaled 24 bit unsigned immediate %x %x", v, lo, hi) + } + return hi, lo, nil +} + func (c *ctxt7) regoff(a *obj.Addr) int32 { c.instoffset = 0 c.aclass(a) @@ -3908,23 +3927,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } v := c.regoff(&p.To) - var hi int32 - if v < 0 || (v&((1<<uint(s))-1)) != 0 { - // negative or unaligned offset, use constant pool - goto storeusepool - } - - hi = v - (v & (0xFFF << uint(s))) - if hi&0xFFF != 0 { - c.ctxt.Diag("internal: miscalculated offset %d [%d]\n%v", v, s, p) - } - if hi&^0xFFF000 != 0 { - // hi doesn't fit into an ADD instruction + hi, lo, err := splitImm24uScaled(v, s) + if err != nil { goto storeusepool } - o1 = c.oaddi(p, AADD, hi, REGTMP, r) - o2 = c.olsr12u(p, c.opstr(p, p.As), ((v-hi)>>uint(s))&0xFFF, REGTMP, p.From.Reg) + o2 = c.olsr12u(p, c.opstr(p, p.As), lo, REGTMP, p.From.Reg) break storeusepool: @@ -3952,23 +3960,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } v := c.regoff(&p.From) - var hi int32 - if v < 0 || (v&((1<<uint(s))-1)) != 0 { - // negative or unaligned offset, use constant pool - goto loadusepool - } - - hi = v - (v & (0xFFF << uint(s))) - if (hi & 0xFFF) != 0 { - c.ctxt.Diag("internal: miscalculated offset %d [%d]\n%v", v, s, p) - } - if hi&^0xFFF000 != 0 { - // hi doesn't fit into an ADD instruction + hi, lo, err := splitImm24uScaled(v, s) + if err != nil { goto loadusepool } - o1 = c.oaddi(p, AADD, hi, REGTMP, r) - o2 = c.olsr12u(p, c.opldr(p, p.As), ((v-hi)>>uint(s))&0xFFF, REGTMP, p.To.Reg) + o2 = c.olsr12u(p, c.opldr(p, p.As), lo, REGTMP, p.To.Reg) break loadusepool: diff --git a/src/cmd/internal/obj/arm64/asm_arm64_test.go b/src/cmd/internal/obj/arm64/asm_arm64_test.go index c52717dc19..d13946f7eb 100644 --- a/src/cmd/internal/obj/arm64/asm_arm64_test.go +++ b/src/cmd/internal/obj/arm64/asm_arm64_test.go @@ -14,6 +14,164 @@ import ( "testing" ) +func TestSplitImm24uScaled(t *testing.T) { + tests := []struct { + v int32 + shift int + wantErr bool + wantHi int32 + wantLo int32 + }{ + { + v: 0, + shift: 0, + wantHi: 0, + wantLo: 0, + }, + { + v: 0x1001, + shift: 0, + wantHi: 0x1000, + wantLo: 0x1, + }, + { + v: 0xffffff, + shift: 0, + wantHi: 0xfff000, + wantLo: 0xfff, + }, + { + v: 0xffffff, + shift: 1, + wantErr: true, + }, + { + v: 0xfe, + shift: 1, + wantHi: 0x0, + wantLo: 0x7f, + }, + { + v: 0x10fe, + shift: 1, + wantHi: 0x0, + wantLo: 0x87f, + }, + { + v: 0x2002, + shift: 1, + wantHi: 0x2000, + wantLo: 0x1, + }, + { + v: 0xfffffe, + shift: 1, + wantHi: 0xffe000, + wantLo: 0xfff, + }, + { + // TODO(jsing): Fix splitting to make this fit. + v: 0x1000ffe, + shift: 1, + wantErr: true, + wantHi: 0xfff000, + wantLo: 0xfff, + }, + { + v: 0x1001000, + shift: 1, + wantErr: true, + }, + { + v: 0xfffffe, + shift: 2, + wantErr: true, + }, + { + v: 0x4004, + shift: 2, + wantHi: 0x4000, + wantLo: 0x1, + }, + { + v: 0xfffffc, + shift: 2, + wantHi: 0xffc000, + wantLo: 0xfff, + }, + { + // TODO(jsing): Fix splitting to make this fit. + v: 0x1002ffc, + shift: 2, + wantErr: true, + wantHi: 0xfff000, + wantLo: 0xfff, + }, + { + v: 0x1003000, + shift: 2, + wantErr: true, + }, + { + v: 0xfffffe, + shift: 3, + wantErr: true, + }, + { + v: 0x8008, + shift: 3, + wantHi: 0x8000, + wantLo: 0x1, + }, + { + v: 0xfffff8, + shift: 3, + wantHi: 0xff8000, + wantLo: 0xfff, + }, + { + // TODO(jsing): Fix splitting to make this fit. + v: 0x1006ff8, + shift: 3, + wantErr: true, + wantHi: 0xfff000, + wantLo: 0xfff, + }, + { + v: 0x1007000, + shift: 3, + wantErr: true, + }, + } + for _, test := range tests { + hi, lo, err := splitImm24uScaled(test.v, test.shift) + switch { + case err == nil && test.wantErr: + t.Errorf("splitImm24uScaled(%v, %v) succeeded, want error", test.v, test.shift) + case err != nil && !test.wantErr: + t.Errorf("splitImm24uScaled(%v, %v) failed: %v", test.v, test.shift, err) + case !test.wantErr: + if got, want := hi, test.wantHi; got != want { + t.Errorf("splitImm24uScaled(%x, %x) - got hi %x, want %x", test.v, test.shift, got, want) + } + if got, want := lo, test.wantLo; got != want { + t.Errorf("splitImm24uScaled(%x, %x) - got lo %x, want %x", test.v, test.shift, got, want) + } + } + } + for shift := 0; shift <= 3; shift++ { + for v := int32(0); v < 0xfff000|0xfff<<shift; v = v + 1<<shift { + hi, lo, err := splitImm24uScaled(v, shift) + if err != nil { + t.Fatalf("splitImm24uScaled(%x, %x) failed: %v", v, shift, err) + } + if hi+lo<<shift != v { + t.Fatalf("splitImm24uScaled(%x, %x) = (%x, %x) is incorrect", v, shift, hi, lo) + } + } + } +} + // TestLarge generates a very large file to verify that large // program builds successfully, in particular, too-far // conditional branches are fixed, and also verify that the |