diff options
author | fanzha02 <fannie.zhang@arm.com> | 2020-08-20 17:02:18 +0800 |
---|---|---|
committer | fannie zhang <Fannie.Zhang@arm.com> | 2020-09-10 02:22:19 +0000 |
commit | dfdc3880b01d46d1d8125ab9eea0606b2fa5b819 (patch) | |
tree | 04c5845faacf93507fa0d24556d1dc80742e57bf | |
parent | aa476ba6f43ebc4e7ddb6599a7ad35d9fbf1ec6d (diff) | |
download | go-dfdc3880b01d46d1d8125ab9eea0606b2fa5b819.tar.gz go-dfdc3880b01d46d1d8125ab9eea0606b2fa5b819.zip |
cmd/internal/obj/arm64: enable some SIMD instructions
Enable VBSL, VBIT, VCMTST, VUXTL VUXTL2 and FMOVQ SIMD
instructions required by the issue #40725. And FMOVQ
instrucion is used to move a large constant to a Vn
register.
Add test cases.
Fixes #40725
Change-Id: I1cac1922a0a0165d698a4b73a41f7a5f0a0ad549
Reviewed-on: https://go-review.googlesource.com/c/go/+/249758
Reviewed-by: Cherry Zhang <cherryyz@google.com>
-rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64.s | 15 | ||||
-rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64error.s | 5 | ||||
-rw-r--r-- | src/cmd/internal/obj/arm64/a.out.go | 6 | ||||
-rw-r--r-- | src/cmd/internal/obj/arm64/anames.go | 6 | ||||
-rw-r--r-- | src/cmd/internal/obj/arm64/asm7.go | 121 |
5 files changed, 139 insertions, 14 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index f0c716a2b5..451ca749ba 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -145,6 +145,17 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VZIP2 V10.D2, V13.D2, V3.D2 // a379ca4e VZIP1 V17.S2, V4.S2, V26.S2 // 9a38910e VZIP2 V25.S2, V14.S2, V25.S2 // d979990e + VUXTL V30.B8, V30.H8 // dea7082f + VUXTL V30.H4, V29.S4 // dda7102f + VUXTL V29.S2, V2.D2 // a2a7202f + VUXTL2 V30.H8, V30.S4 // dea7106f + VUXTL2 V29.S4, V2.D2 // a2a7206f + VUXTL2 V30.B16, V2.H8 // c2a7086f + VBIT V21.B16, V25.B16, V4.B16 // 241fb56e + VBSL V23.B16, V3.B16, V7.B16 // 671c776e + VCMTST V2.B8, V29.B8, V2.B8 // a28f220e + VCMTST V2.D2, V23.D2, V3.D2 // e38ee24e + VSUB V2.B8, V30.B8, V30.B8 // de87222e MOVD (R2)(R6.SXTW), R4 // 44c866f8 MOVD (R3)(R6), R5 // MOVD (R3)(R6*1), R5 // 656866f8 MOVD (R2)(R6), R4 // MOVD (R2)(R6*1), R4 // 446866f8 @@ -186,6 +197,10 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 FMOVS $(0.96875), F3 // 03f02d1e FMOVD $(28.0), F4 // 0490671e +// move a large constant to a Vd. + FMOVD $0x8040201008040201, V20 // FMOVD $-9205322385119247871, V20 + FMOVQ $0x8040201008040202, V29 // FMOVQ $-9205322385119247870, V29 + FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc FMOVS (R2)(R6<<2), F4 // 447866bc FMOVD (R2)(R6), F4 // FMOVD (R2)(R6*1), F4 // 446866fc diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index 9f377817a9..2a911b4cce 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -340,4 +340,9 @@ TEXT errors(SB),$0 MRS PMSWINC_EL0, R3 // ERROR "system register is not readable" MRS OSLAR_EL1, R3 // ERROR "system register is not readable" VLD3R.P 24(R15), [V15.H4,V16.H4,V17.H4] // ERROR "invalid post-increment offset" + VBIT V1.H4, V12.H4, V3.H4 // ERROR "invalid arrangement" + VBSL V1.D2, V12.D2, V3.D2 // ERROR "invalid arrangement" + VUXTL V30.D2, V30.H8 // ERROR "operand mismatch" + VUXTL2 V20.B8, V21.H8 // ERROR "operand mismatch" + VUXTL V3.D2, V4.B8 // ERROR "operand mismatch" RET diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 03e0278a33..ab065e07e5 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -874,6 +874,7 @@ const ( AFLDPS AFMOVD AFMOVS + AFMOVQ AFMULD AFMULS AFNEGD @@ -987,9 +988,14 @@ const ( AVUSHR AVSHL AVSRI + AVBSL + AVBIT AVTBL AVZIP1 AVZIP2 + AVCMTST + AVUXTL + AVUXTL2 ALAST AB = obj.AJMP ABL = obj.ACALL diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 65ecd007ea..8961f04b0c 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -381,6 +381,7 @@ var Anames = []string{ "FLDPS", "FMOVD", "FMOVS", + "FMOVQ", "FMULD", "FMULS", "FNEGD", @@ -494,8 +495,13 @@ var Anames = []string{ "VUSHR", "VSHL", "VSRI", + "VBSL", + "VBIT", "VTBL", "VZIP1", "VZIP2", + "VCMTST", + "VUXTL", + "VUXTL2", "LAST", } diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 0b90e31392..7ce18d0f13 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -393,6 +393,11 @@ var optab = []Optab{ {AMOVK, C_VCON, C_NONE, C_NONE, C_REG, 33, 4, 0, 0, 0}, {AMOVD, C_AACON, C_NONE, C_NONE, C_REG, 4, 4, REGFROM, 0, 0}, + // Move a large constant to a Vn. + {AFMOVQ, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + {AFMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + {AFMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + /* jump operations */ {AB, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, {ABL, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, @@ -403,12 +408,14 @@ var optab = []Optab{ {obj.ARET, C_NONE, C_NONE, C_NONE, C_REG, 6, 4, 0, 0, 0}, {obj.ARET, C_NONE, C_NONE, C_NONE, C_ZOREG, 6, 4, 0, 0, 0}, {ABEQ, C_NONE, C_NONE, C_NONE, C_SBRA, 7, 4, 0, 0, 0}, - {AADRP, C_SBRA, C_NONE, C_NONE, C_REG, 60, 4, 0, 0, 0}, - {AADR, C_SBRA, C_NONE, C_NONE, C_REG, 61, 4, 0, 0, 0}, {ACBZ, C_REG, C_NONE, C_NONE, C_SBRA, 39, 4, 0, 0, 0}, {ATBZ, C_VCON, C_REG, C_NONE, C_SBRA, 40, 4, 0, 0, 0}, {AERET, C_NONE, C_NONE, C_NONE, C_NONE, 41, 4, 0, 0, 0}, + // get a PC-relative address + {AADRP, C_SBRA, C_NONE, C_NONE, C_REG, 60, 4, 0, 0, 0}, + {AADR, C_SBRA, C_NONE, C_NONE, C_REG, 61, 4, 0, 0, 0}, + {ACLREX, C_NONE, C_NONE, C_NONE, C_VCON, 38, 4, 0, 0, 0}, {ACLREX, C_NONE, C_NONE, C_NONE, C_NONE, 38, 4, 0, 0, 0}, {ABFM, C_VCON, C_REG, C_VCON, C_REG, 42, 4, 0, 0, 0}, @@ -473,6 +480,7 @@ var optab = []Optab{ {AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, 100, 4, 0, 0, 0}, {AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, 95, 4, 0, 0, 0}, {AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, + {AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, /* conditional operations */ {ACSEL, C_COND, C_REG, C_REG, C_REG, 18, 4, 0, 0, 0}, @@ -2657,7 +2665,7 @@ func buildop(ctxt *obj.Link) { case AFCSELD: oprangeset(AFCSELS, t) - case AFMOVS, AFMOVD: + case AFMOVS, AFMOVD, AFMOVQ: break case AFCVTZSD: @@ -2740,6 +2748,9 @@ func buildop(ctxt *obj.Link) { oprangeset(AVCMEQ, t) oprangeset(AVORR, t) oprangeset(AVEOR, t) + oprangeset(AVBSL, t) + oprangeset(AVBIT, t) + oprangeset(AVCMTST, t) case AVADD: oprangeset(AVSUB, t) @@ -2787,6 +2798,9 @@ func buildop(ctxt *obj.Link) { case AVZIP1: oprangeset(AVZIP2, t) + case AVUXTL: + oprangeset(AVUXTL2, t) + case AVLD1R: oprangeset(AVLD2, t) oprangeset(AVLD2R, t) @@ -4163,7 +4177,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Add = 0 rel.Type = objabi.R_ARM64_GOTPCREL - case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls Vm.<T>, Vn.<T>, Vd.<T> */ + case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub Vm.<T>, Vn.<T>, Vd.<T> */ af := int((p.From.Reg >> 5) & 15) af3 := int((p.Reg >> 5) & 15) at := int((p.To.Reg >> 5) & 15) @@ -4204,17 +4218,24 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { c.ctxt.Diag("invalid arrangement: %v", p) } - if (p.As == AVORR || p.As == AVAND || p.As == AVEOR) && - (af != ARNG_16B && af != ARNG_8B) { - c.ctxt.Diag("invalid arrangement: %v", p) - } else if (p.As == AVFMLA || p.As == AVFMLS) && - (af != ARNG_2D && af != ARNG_2S && af != ARNG_4S) { - c.ctxt.Diag("invalid arrangement: %v", p) - } else if p.As == AVORR { - size = 2 - } else if p.As == AVAND || p.As == AVEOR { + switch p.As { + case AVORR, AVAND, AVEOR, AVBIT, AVBSL: + if af != ARNG_16B && af != ARNG_8B { + c.ctxt.Diag("invalid arrangement: %v", p) + } + case AVFMLA, AVFMLS: + if af != ARNG_2D && af != ARNG_2S && af != ARNG_4S { + c.ctxt.Diag("invalid arrangement: %v", p) + } + } + switch p.As { + case AVAND, AVEOR: size = 0 - } else if p.As == AVFMLA || p.As == AVFMLS { + case AVBSL: + size = 1 + case AVORR, AVBIT: + size = 2 + case AVFMLA, AVFMLS: if af == ARNG_2D { size = 1 } else { @@ -5096,6 +5117,59 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 = q<<30 | 0xe<<24 | len<<13 o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31) + case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool. + o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg)) + + case 102: // VUXTL{2} Vn.<Tb>, Vd.<Ta> + af := int((p.From.Reg >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + var Q, immh uint32 + switch at { + case ARNG_8H: + if af == ARNG_8B { + immh = 1 + Q = 0 + } else if af == ARNG_16B { + immh = 1 + Q = 1 + } else { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + case ARNG_4S: + if af == ARNG_4H { + immh = 2 + Q = 0 + } else if af == ARNG_8H { + immh = 2 + Q = 1 + } else { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + case ARNG_2D: + if af == ARNG_2S { + immh = 4 + Q = 0 + } else if af == ARNG_4S { + immh = 4 + Q = 1 + } else { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + default: + c.ctxt.Diag("operand mismatch: %v\n", p) + } + + if p.As == AVUXTL && Q == 1 { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + if p.As == AVUXTL2 && Q == 0 { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + o1 |= Q<<30 | immh<<19 | uint32((rf&31)<<5) | uint32(rt&31) } out[0] = o1 out[1] = o2 @@ -5662,6 +5736,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVADD: return 7<<25 | 1<<21 | 1<<15 | 1<<10 + case AVSUB: + return 0x17<<25 | 1<<21 | 1<<15 | 1<<10 + case AVADDP: return 7<<25 | 1<<21 | 1<<15 | 15<<10 @@ -5724,6 +5801,18 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVLD2R, AVLD4R: return 0xD<<24 | 3<<21 + + case AVBIT: + return 1<<29 | 0x75<<21 | 7<<10 + + case AVBSL: + return 1<<29 | 0x73<<21 | 7<<10 + + case AVCMTST: + return 0xE<<24 | 1<<21 | 0x23<<10 + + case AVUXTL, AVUXTL2: + return 0x5e<<23 | 0x29<<10 } c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) @@ -6566,6 +6655,10 @@ func (c *ctxt7) omovlit(as obj.As, p *obj.Prog, a *obj.Addr, dr int) uint32 { fp = 1 w = 1 /* 64-bit SIMD/FP */ + case AFMOVQ: + fp = 1 + w = 2 /* 128-bit SIMD/FP */ + case AMOVD: if p.Pool.As == ADWORD { w = 1 /* 64-bit */ |