aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfanzha02 <fannie.zhang@arm.com>2020-08-20 17:02:18 +0800
committerfannie zhang <Fannie.Zhang@arm.com>2020-09-10 02:22:19 +0000
commitdfdc3880b01d46d1d8125ab9eea0606b2fa5b819 (patch)
tree04c5845faacf93507fa0d24556d1dc80742e57bf
parentaa476ba6f43ebc4e7ddb6599a7ad35d9fbf1ec6d (diff)
downloadgo-dfdc3880b01d46d1d8125ab9eea0606b2fa5b819.tar.gz
go-dfdc3880b01d46d1d8125ab9eea0606b2fa5b819.zip
cmd/internal/obj/arm64: enable some SIMD instructions
Enable VBSL, VBIT, VCMTST, VUXTL VUXTL2 and FMOVQ SIMD instructions required by the issue #40725. And FMOVQ instrucion is used to move a large constant to a Vn register. Add test cases. Fixes #40725 Change-Id: I1cac1922a0a0165d698a4b73a41f7a5f0a0ad549 Reviewed-on: https://go-review.googlesource.com/c/go/+/249758 Reviewed-by: Cherry Zhang <cherryyz@google.com>
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64.s15
-rw-r--r--src/cmd/asm/internal/asm/testdata/arm64error.s5
-rw-r--r--src/cmd/internal/obj/arm64/a.out.go6
-rw-r--r--src/cmd/internal/obj/arm64/anames.go6
-rw-r--r--src/cmd/internal/obj/arm64/asm7.go121
5 files changed, 139 insertions, 14 deletions
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s
index f0c716a2b5..451ca749ba 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64.s
@@ -145,6 +145,17 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VZIP2 V10.D2, V13.D2, V3.D2 // a379ca4e
VZIP1 V17.S2, V4.S2, V26.S2 // 9a38910e
VZIP2 V25.S2, V14.S2, V25.S2 // d979990e
+ VUXTL V30.B8, V30.H8 // dea7082f
+ VUXTL V30.H4, V29.S4 // dda7102f
+ VUXTL V29.S2, V2.D2 // a2a7202f
+ VUXTL2 V30.H8, V30.S4 // dea7106f
+ VUXTL2 V29.S4, V2.D2 // a2a7206f
+ VUXTL2 V30.B16, V2.H8 // c2a7086f
+ VBIT V21.B16, V25.B16, V4.B16 // 241fb56e
+ VBSL V23.B16, V3.B16, V7.B16 // 671c776e
+ VCMTST V2.B8, V29.B8, V2.B8 // a28f220e
+ VCMTST V2.D2, V23.D2, V3.D2 // e38ee24e
+ VSUB V2.B8, V30.B8, V30.B8 // de87222e
MOVD (R2)(R6.SXTW), R4 // 44c866f8
MOVD (R3)(R6), R5 // MOVD (R3)(R6*1), R5 // 656866f8
MOVD (R2)(R6), R4 // MOVD (R2)(R6*1), R4 // 446866f8
@@ -186,6 +197,10 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
FMOVS $(0.96875), F3 // 03f02d1e
FMOVD $(28.0), F4 // 0490671e
+// move a large constant to a Vd.
+ FMOVD $0x8040201008040201, V20 // FMOVD $-9205322385119247871, V20
+ FMOVQ $0x8040201008040202, V29 // FMOVQ $-9205322385119247870, V29
+
FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc
FMOVS (R2)(R6<<2), F4 // 447866bc
FMOVD (R2)(R6), F4 // FMOVD (R2)(R6*1), F4 // 446866fc
diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s
index 9f377817a9..2a911b4cce 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64error.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64error.s
@@ -340,4 +340,9 @@ TEXT errors(SB),$0
MRS PMSWINC_EL0, R3 // ERROR "system register is not readable"
MRS OSLAR_EL1, R3 // ERROR "system register is not readable"
VLD3R.P 24(R15), [V15.H4,V16.H4,V17.H4] // ERROR "invalid post-increment offset"
+ VBIT V1.H4, V12.H4, V3.H4 // ERROR "invalid arrangement"
+ VBSL V1.D2, V12.D2, V3.D2 // ERROR "invalid arrangement"
+ VUXTL V30.D2, V30.H8 // ERROR "operand mismatch"
+ VUXTL2 V20.B8, V21.H8 // ERROR "operand mismatch"
+ VUXTL V3.D2, V4.B8 // ERROR "operand mismatch"
RET
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go
index 03e0278a33..ab065e07e5 100644
--- a/src/cmd/internal/obj/arm64/a.out.go
+++ b/src/cmd/internal/obj/arm64/a.out.go
@@ -874,6 +874,7 @@ const (
AFLDPS
AFMOVD
AFMOVS
+ AFMOVQ
AFMULD
AFMULS
AFNEGD
@@ -987,9 +988,14 @@ const (
AVUSHR
AVSHL
AVSRI
+ AVBSL
+ AVBIT
AVTBL
AVZIP1
AVZIP2
+ AVCMTST
+ AVUXTL
+ AVUXTL2
ALAST
AB = obj.AJMP
ABL = obj.ACALL
diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go
index 65ecd007ea..8961f04b0c 100644
--- a/src/cmd/internal/obj/arm64/anames.go
+++ b/src/cmd/internal/obj/arm64/anames.go
@@ -381,6 +381,7 @@ var Anames = []string{
"FLDPS",
"FMOVD",
"FMOVS",
+ "FMOVQ",
"FMULD",
"FMULS",
"FNEGD",
@@ -494,8 +495,13 @@ var Anames = []string{
"VUSHR",
"VSHL",
"VSRI",
+ "VBSL",
+ "VBIT",
"VTBL",
"VZIP1",
"VZIP2",
+ "VCMTST",
+ "VUXTL",
+ "VUXTL2",
"LAST",
}
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 0b90e31392..7ce18d0f13 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -393,6 +393,11 @@ var optab = []Optab{
{AMOVK, C_VCON, C_NONE, C_NONE, C_REG, 33, 4, 0, 0, 0},
{AMOVD, C_AACON, C_NONE, C_NONE, C_REG, 4, 4, REGFROM, 0, 0},
+ // Move a large constant to a Vn.
+ {AFMOVQ, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
+ {AFMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
+ {AFMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
+
/* jump operations */
{AB, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0},
{ABL, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0},
@@ -403,12 +408,14 @@ var optab = []Optab{
{obj.ARET, C_NONE, C_NONE, C_NONE, C_REG, 6, 4, 0, 0, 0},
{obj.ARET, C_NONE, C_NONE, C_NONE, C_ZOREG, 6, 4, 0, 0, 0},
{ABEQ, C_NONE, C_NONE, C_NONE, C_SBRA, 7, 4, 0, 0, 0},
- {AADRP, C_SBRA, C_NONE, C_NONE, C_REG, 60, 4, 0, 0, 0},
- {AADR, C_SBRA, C_NONE, C_NONE, C_REG, 61, 4, 0, 0, 0},
{ACBZ, C_REG, C_NONE, C_NONE, C_SBRA, 39, 4, 0, 0, 0},
{ATBZ, C_VCON, C_REG, C_NONE, C_SBRA, 40, 4, 0, 0, 0},
{AERET, C_NONE, C_NONE, C_NONE, C_NONE, 41, 4, 0, 0, 0},
+ // get a PC-relative address
+ {AADRP, C_SBRA, C_NONE, C_NONE, C_REG, 60, 4, 0, 0, 0},
+ {AADR, C_SBRA, C_NONE, C_NONE, C_REG, 61, 4, 0, 0, 0},
+
{ACLREX, C_NONE, C_NONE, C_NONE, C_VCON, 38, 4, 0, 0, 0},
{ACLREX, C_NONE, C_NONE, C_NONE, C_NONE, 38, 4, 0, 0, 0},
{ABFM, C_VCON, C_REG, C_VCON, C_REG, 42, 4, 0, 0, 0},
@@ -473,6 +480,7 @@ var optab = []Optab{
{AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, 100, 4, 0, 0, 0},
{AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, 95, 4, 0, 0, 0},
{AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0},
+ {AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0},
/* conditional operations */
{ACSEL, C_COND, C_REG, C_REG, C_REG, 18, 4, 0, 0, 0},
@@ -2657,7 +2665,7 @@ func buildop(ctxt *obj.Link) {
case AFCSELD:
oprangeset(AFCSELS, t)
- case AFMOVS, AFMOVD:
+ case AFMOVS, AFMOVD, AFMOVQ:
break
case AFCVTZSD:
@@ -2740,6 +2748,9 @@ func buildop(ctxt *obj.Link) {
oprangeset(AVCMEQ, t)
oprangeset(AVORR, t)
oprangeset(AVEOR, t)
+ oprangeset(AVBSL, t)
+ oprangeset(AVBIT, t)
+ oprangeset(AVCMTST, t)
case AVADD:
oprangeset(AVSUB, t)
@@ -2787,6 +2798,9 @@ func buildop(ctxt *obj.Link) {
case AVZIP1:
oprangeset(AVZIP2, t)
+ case AVUXTL:
+ oprangeset(AVUXTL2, t)
+
case AVLD1R:
oprangeset(AVLD2, t)
oprangeset(AVLD2R, t)
@@ -4163,7 +4177,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
rel.Add = 0
rel.Type = objabi.R_ARM64_GOTPCREL
- case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls Vm.<T>, Vn.<T>, Vd.<T> */
+ case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub Vm.<T>, Vn.<T>, Vd.<T> */
af := int((p.From.Reg >> 5) & 15)
af3 := int((p.Reg >> 5) & 15)
at := int((p.To.Reg >> 5) & 15)
@@ -4204,17 +4218,24 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
c.ctxt.Diag("invalid arrangement: %v", p)
}
- if (p.As == AVORR || p.As == AVAND || p.As == AVEOR) &&
- (af != ARNG_16B && af != ARNG_8B) {
- c.ctxt.Diag("invalid arrangement: %v", p)
- } else if (p.As == AVFMLA || p.As == AVFMLS) &&
- (af != ARNG_2D && af != ARNG_2S && af != ARNG_4S) {
- c.ctxt.Diag("invalid arrangement: %v", p)
- } else if p.As == AVORR {
- size = 2
- } else if p.As == AVAND || p.As == AVEOR {
+ switch p.As {
+ case AVORR, AVAND, AVEOR, AVBIT, AVBSL:
+ if af != ARNG_16B && af != ARNG_8B {
+ c.ctxt.Diag("invalid arrangement: %v", p)
+ }
+ case AVFMLA, AVFMLS:
+ if af != ARNG_2D && af != ARNG_2S && af != ARNG_4S {
+ c.ctxt.Diag("invalid arrangement: %v", p)
+ }
+ }
+ switch p.As {
+ case AVAND, AVEOR:
size = 0
- } else if p.As == AVFMLA || p.As == AVFMLS {
+ case AVBSL:
+ size = 1
+ case AVORR, AVBIT:
+ size = 2
+ case AVFMLA, AVFMLS:
if af == ARNG_2D {
size = 1
} else {
@@ -5096,6 +5117,59 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
o1 = q<<30 | 0xe<<24 | len<<13
o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31)
+ case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool.
+ o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg))
+
+ case 102: // VUXTL{2} Vn.<Tb>, Vd.<Ta>
+ af := int((p.From.Reg >> 5) & 15)
+ at := int((p.To.Reg >> 5) & 15)
+ var Q, immh uint32
+ switch at {
+ case ARNG_8H:
+ if af == ARNG_8B {
+ immh = 1
+ Q = 0
+ } else if af == ARNG_16B {
+ immh = 1
+ Q = 1
+ } else {
+ c.ctxt.Diag("operand mismatch: %v\n", p)
+ }
+ case ARNG_4S:
+ if af == ARNG_4H {
+ immh = 2
+ Q = 0
+ } else if af == ARNG_8H {
+ immh = 2
+ Q = 1
+ } else {
+ c.ctxt.Diag("operand mismatch: %v\n", p)
+ }
+ case ARNG_2D:
+ if af == ARNG_2S {
+ immh = 4
+ Q = 0
+ } else if af == ARNG_4S {
+ immh = 4
+ Q = 1
+ } else {
+ c.ctxt.Diag("operand mismatch: %v\n", p)
+ }
+ default:
+ c.ctxt.Diag("operand mismatch: %v\n", p)
+ }
+
+ if p.As == AVUXTL && Q == 1 {
+ c.ctxt.Diag("operand mismatch: %v\n", p)
+ }
+ if p.As == AVUXTL2 && Q == 0 {
+ c.ctxt.Diag("operand mismatch: %v\n", p)
+ }
+
+ o1 = c.oprrr(p, p.As)
+ rf := int((p.From.Reg) & 31)
+ rt := int((p.To.Reg) & 31)
+ o1 |= Q<<30 | immh<<19 | uint32((rf&31)<<5) | uint32(rt&31)
}
out[0] = o1
out[1] = o2
@@ -5662,6 +5736,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
case AVADD:
return 7<<25 | 1<<21 | 1<<15 | 1<<10
+ case AVSUB:
+ return 0x17<<25 | 1<<21 | 1<<15 | 1<<10
+
case AVADDP:
return 7<<25 | 1<<21 | 1<<15 | 15<<10
@@ -5724,6 +5801,18 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
case AVLD2R, AVLD4R:
return 0xD<<24 | 3<<21
+
+ case AVBIT:
+ return 1<<29 | 0x75<<21 | 7<<10
+
+ case AVBSL:
+ return 1<<29 | 0x73<<21 | 7<<10
+
+ case AVCMTST:
+ return 0xE<<24 | 1<<21 | 0x23<<10
+
+ case AVUXTL, AVUXTL2:
+ return 0x5e<<23 | 0x29<<10
}
c.ctxt.Diag("%v: bad rrr %d %v", p, a, a)
@@ -6566,6 +6655,10 @@ func (c *ctxt7) omovlit(as obj.As, p *obj.Prog, a *obj.Addr, dr int) uint32 {
fp = 1
w = 1 /* 64-bit SIMD/FP */
+ case AFMOVQ:
+ fp = 1
+ w = 2 /* 128-bit SIMD/FP */
+
case AMOVD:
if p.Pool.As == ADWORD {
w = 1 /* 64-bit */