diff options
author | fanzha02 <fannie.zhang@arm.com> | 2020-09-16 14:05:18 +0800 |
---|---|---|
committer | fannie zhang <Fannie.Zhang@arm.com> | 2020-09-25 01:47:40 +0000 |
commit | fa04d488bd54b8fdd78cc9bcc6d90de4bf5f8efb (patch) | |
tree | ef0b7d4890d7d108565c388868765d3cbdd4009f | |
parent | ea106cc07ac73110a8a25fcc5aef07b283159db0 (diff) | |
download | go-fa04d488bd54b8fdd78cc9bcc6d90de4bf5f8efb.tar.gz go-fa04d488bd54b8fdd78cc9bcc6d90de4bf5f8efb.zip |
cmd/asm: fix the issue of moving 128-bit integers to vector registers on arm64
The CL 249758 added `FMOVQ $vcon, Vd` instruction and assembler used
128-bit simd literal-loading to load `$vcon` from pool into 128-bit vector
register `Vd`. Because Go does not have 128-bit integers for now, the
assembler will report an error of `immediate out of range` when
assembleing `FMOVQ $0x123456789abcdef0123456789abcdef, V0` instruction.
This patch lets 128-bit integers take two 64-bit operands, for the high
and low parts separately and adds `VMOVQ $hi, $lo, Vd` instruction to
move `$hi<<64+$lo' into 128-bit register `Vd`.
In addition, this patch renames `FMOVQ/FMOVD/FMOVS` ops to 'VMOVQ/VMOVD/VMOVS'
and uses them to move 128-bit, 64-bit and 32-bit constants into vector
registers, respectively
Update the go doc.
Fixes #40725
Change-Id: Ia3c83bb6463f104d2bee960905053a97299e0a3a
Reviewed-on: https://go-review.googlesource.com/c/go/+/255900
Trust: fannie zhang <Fannie.Zhang@arm.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
-rw-r--r-- | src/cmd/asm/internal/arch/arm64.go | 18 | ||||
-rw-r--r-- | src/cmd/asm/internal/asm/asm.go | 21 | ||||
-rw-r--r-- | src/cmd/asm/internal/asm/testdata/arm64.s | 6 | ||||
-rw-r--r-- | src/cmd/internal/obj/arm64/a.out.go | 4 | ||||
-rw-r--r-- | src/cmd/internal/obj/arm64/anames.go | 4 | ||||
-rw-r--r-- | src/cmd/internal/obj/arm64/asm7.go | 38 | ||||
-rw-r--r-- | src/cmd/internal/obj/arm64/doc.go | 10 |
7 files changed, 60 insertions, 41 deletions
diff --git a/src/cmd/asm/internal/arch/arm64.go b/src/cmd/asm/internal/arch/arm64.go index 3817fcd5c2..e643889aef 100644 --- a/src/cmd/asm/internal/arch/arm64.go +++ b/src/cmd/asm/internal/arch/arm64.go @@ -82,6 +82,17 @@ func IsARM64STLXR(op obj.As) bool { return false } +// IsARM64TBL reports whether the op (as defined by an arm64.A* +// constant) is one of the TBL-like instructions and one of its +// inputs does not fit into prog.Reg, so require special handling. +func IsARM64TBL(op obj.As) bool { + switch op { + case arm64.AVTBL, arm64.AVMOVQ: + return true + } + return false +} + // ARM64Suffix handles the special suffix for the ARM64. // It returns a boolean to indicate success; failure means // cond was unrecognized. @@ -125,13 +136,6 @@ func arm64RegisterNumber(name string, n int16) (int16, bool) { return 0, false } -// IsARM64TBL reports whether the op (as defined by an arm64.A* -// constant) is one of the table lookup instructions that require special -// handling. -func IsARM64TBL(op obj.As) bool { - return op == arm64.AVTBL -} - // ARM64RegisterExtension parses an ARM64 register with extension or arrangement. func ARM64RegisterExtension(a *obj.Addr, ext string, reg, num int16, isAmount, isIndex bool) error { Rnum := (reg & 31) + int16(num<<5) diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index 42e217dc23..7878d74549 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -622,8 +622,9 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { prog.SetFrom3(a[1]) prog.To = a[2] case sys.ARM64: - // ARM64 instructions with one input and two outputs. - if arch.IsARM64STLXR(op) { + switch { + case arch.IsARM64STLXR(op): + // ARM64 instructions with one input and two outputs. prog.From = a[0] prog.To = a[1] if a[2].Type != obj.TYPE_REG { @@ -631,20 +632,16 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { return } prog.RegTo2 = a[2].Reg - break - } - if arch.IsARM64TBL(op) { + case arch.IsARM64TBL(op): + // one of its inputs does not fit into prog.Reg. prog.From = a[0] - if a[1].Type != obj.TYPE_REGLIST { - p.errorf("%s: expected list; found %s", op, obj.Dconv(prog, &a[1])) - } prog.SetFrom3(a[1]) prog.To = a[2] - break + default: + prog.From = a[0] + prog.Reg = p.getRegister(prog, op, &a[1]) + prog.To = a[2] } - prog.From = a[0] - prog.Reg = p.getRegister(prog, op, &a[1]) - prog.To = a[2] case sys.I386: prog.From = a[0] prog.SetFrom3(a[1]) diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index acfb16b096..e277c04b7c 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -218,8 +218,10 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 FMOVD $(28.0), F4 // 0490671e // move a large constant to a Vd. - FMOVD $0x8040201008040201, V20 // FMOVD $-9205322385119247871, V20 - FMOVQ $0x8040201008040202, V29 // FMOVQ $-9205322385119247870, V29 + VMOVS $0x80402010, V11 // VMOVS $2151686160, V11 + VMOVD $0x8040201008040201, V20 // VMOVD $-9205322385119247871, V20 + VMOVQ $0x7040201008040201, $0x8040201008040201, V10 // VMOVQ $8088500183983456769, $-9205322385119247871, V10 + VMOVQ $0x8040201008040202, $0x7040201008040201, V20 // VMOVQ $-9205322385119247870, $8088500183983456769, V20 FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc FMOVS (R2)(R6<<2), F4 // 447866bc diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index b3c9e9a18e..1ca41c15ba 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -875,7 +875,9 @@ const ( AFLDPS AFMOVD AFMOVS - AFMOVQ + AVMOVQ + AVMOVD + AVMOVS AFMULD AFMULS AFNEGD diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 48c066abfd..900cdba817 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -381,7 +381,9 @@ var Anames = []string{ "FLDPS", "FMOVD", "FMOVS", - "FMOVQ", + "VMOVQ", + "VMOVD", + "VMOVS", "FMULD", "FMULS", "FNEGD", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index fc2033d689..ee4a33eef4 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -260,8 +260,9 @@ func MOVCONST(d int64, s int, rt int) uint32 { const ( // Optab.flag LFROM = 1 << 0 // p.From uses constant pool - LTO = 1 << 1 // p.To uses constant pool - NOTUSETMP = 1 << 2 // p expands to multiple instructions, but does NOT use REGTMP + LFROM3 = 1 << 1 // p.From3 uses constant pool + LTO = 1 << 2 // p.To uses constant pool + NOTUSETMP = 1 << 3 // p expands to multiple instructions, but does NOT use REGTMP ) var optab = []Optab{ @@ -397,10 +398,10 @@ var optab = []Optab{ /* load long effective stack address (load int32 offset and add) */ {AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, 34, 8, REGSP, LFROM, 0}, - // Move a large constant to a Vn. - {AFMOVQ, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, - {AFMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, - {AFMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + // Move a large constant to a vector register. + {AVMOVQ, C_VCON, C_NONE, C_VCON, C_VREG, 101, 4, 0, LFROM | LFROM3, 0}, + {AVMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + {AVMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, /* jump operations */ {AB, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, @@ -950,13 +951,14 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.ctxt.Diag("zero-width instruction\n%v", p) } } - switch o.flag & (LFROM | LTO) { - case LFROM: + if o.flag&LFROM != 0 { c.addpool(p, &p.From) - - case LTO: + } + if o.flag&LFROM3 != 0 { + c.addpool(p, p.GetFrom3()) + } + if o.flag<O != 0 { c.addpool(p, &p.To) - break } if p.As == AB || p.As == obj.ARET || p.As == AERET { /* TODO: other unconditional operations */ @@ -1174,8 +1176,8 @@ func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) { sz := 4 if a.Type == obj.TYPE_CONST { - if lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit)) { - // out of range -0x80000000 ~ 0xffffffff, must store 64-bit + if (lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit))) || p.As == AVMOVQ || p.As == AVMOVD { + // out of range -0x80000000 ~ 0xffffffff or VMOVQ or VMOVD operand, must store 64-bit. t.As = ADWORD sz = 8 } // else store 32-bit @@ -2675,7 +2677,7 @@ func buildop(ctxt *obj.Link) { case AFCSELD: oprangeset(AFCSELS, t) - case AFMOVS, AFMOVD, AFMOVQ: + case AFMOVS, AFMOVD, AVMOVQ, AVMOVD, AVMOVS: break case AFCVTZSD: @@ -5142,7 +5144,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 = q<<30 | 0xe<<24 | len<<13 o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31) - case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool. + case 101: // VMOVQ $vcon1, $vcon2, Vd or VMOVD|VMOVS $vcon, Vd -> FMOVQ/FMOVD/FMOVS pool(PC), Vd: load from constant pool. o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg)) case 102: /* vushll, vushll2, vuxtl, vuxtl2 */ @@ -6672,15 +6674,15 @@ func (c *ctxt7) omovlit(as obj.As, p *obj.Prog, a *obj.Addr, dr int) uint32 { } else { fp, w := 0, 0 switch as { - case AFMOVS: + case AFMOVS, AVMOVS: fp = 1 w = 0 /* 32-bit SIMD/FP */ - case AFMOVD: + case AFMOVD, AVMOVD: fp = 1 w = 1 /* 64-bit SIMD/FP */ - case AFMOVQ: + case AVMOVQ: fp = 1 w = 2 /* 128-bit SIMD/FP */ diff --git a/src/cmd/internal/obj/arm64/doc.go b/src/cmd/internal/obj/arm64/doc.go index 7515217544..efd4577f56 100644 --- a/src/cmd/internal/obj/arm64/doc.go +++ b/src/cmd/internal/obj/arm64/doc.go @@ -86,6 +86,16 @@ In the following example, PCALIGN at the entry of the function Add will align it MOVD $1, R1 RET +7. Move large constants to vector registers. + +Go asm uses VMOVQ/VMOVD/VMOVS to move 128-bit, 64-bit and 32-bit constants into vector registers, respectively. +And for a 128-bit interger, it take two 64-bit operands, for the high and low parts separately. + + Examples: + VMOVS $0x11223344, V0 + VMOVD $0x1122334455667788, V1 + VMOVQ $0x1122334455667788, $8877665544332211, V2 // V2=0x11223344556677888877665544332211 + Special Cases. (1) umov is written as VMOV. |