aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/x86/387.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/compile/internal/x86/387.go')
-rw-r--r--src/cmd/compile/internal/x86/387.go395
1 files changed, 395 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/x86/387.go b/src/cmd/compile/internal/x86/387.go
new file mode 100644
index 0000000000..96a7d63082
--- /dev/null
+++ b/src/cmd/compile/internal/x86/387.go
@@ -0,0 +1,395 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86
+
+import (
+ "cmd/compile/internal/gc"
+ "cmd/compile/internal/ssa"
+ "cmd/internal/obj"
+ "cmd/internal/obj/x86"
+ "math"
+)
+
+// Generates code for v using 387 instructions. Reports whether
+// the instruction was handled by this routine.
+func ssaGenValue387(s *gc.SSAGenState, v *ssa.Value) bool {
+ // The SSA compiler pretends that it has an SSE backend.
+ // If we don't have one of those, we need to translate
+ // all the SSE ops to equivalent 387 ops. That's what this
+ // function does.
+
+ switch v.Op {
+ case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
+ p := gc.Prog(loadPush(v.Type))
+ p.From.Type = obj.TYPE_FCONST
+ p.From.Val = math.Float64frombits(uint64(v.AuxInt))
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = x86.REG_F0
+ popAndSave(s, v)
+ return true
+
+ case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1, ssa.Op386MOVSSloadidx4, ssa.Op386MOVSDloadidx8:
+ p := gc.Prog(loadPush(v.Type))
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = gc.SSARegNum(v.Args[0])
+ gc.AddAux(&p.From, v)
+ switch v.Op {
+ case ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
+ p.From.Scale = 1
+ p.From.Index = gc.SSARegNum(v.Args[1])
+ case ssa.Op386MOVSSloadidx4:
+ p.From.Scale = 4
+ p.From.Index = gc.SSARegNum(v.Args[1])
+ case ssa.Op386MOVSDloadidx8:
+ p.From.Scale = 8
+ p.From.Index = gc.SSARegNum(v.Args[1])
+ }
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = x86.REG_F0
+ popAndSave(s, v)
+ return true
+
+ case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore:
+ // Push to-be-stored value on top of stack.
+ push(s, v.Args[1])
+
+ // Pop and store value.
+ var op obj.As
+ switch v.Op {
+ case ssa.Op386MOVSSstore:
+ op = x86.AFMOVFP
+ case ssa.Op386MOVSDstore:
+ op = x86.AFMOVDP
+ }
+ p := gc.Prog(op)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = x86.REG_F0
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = gc.SSARegNum(v.Args[0])
+ gc.AddAux(&p.To, v)
+ return true
+
+ case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVSDstoreidx8:
+ push(s, v.Args[2])
+ var op obj.As
+ switch v.Op {
+ case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSSstoreidx4:
+ op = x86.AFMOVFP
+ case ssa.Op386MOVSDstoreidx1, ssa.Op386MOVSDstoreidx8:
+ op = x86.AFMOVDP
+ }
+ p := gc.Prog(op)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = x86.REG_F0
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = gc.SSARegNum(v.Args[0])
+ gc.AddAux(&p.To, v)
+ switch v.Op {
+ case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
+ p.To.Scale = 1
+ p.To.Index = gc.SSARegNum(v.Args[1])
+ case ssa.Op386MOVSSstoreidx4:
+ p.To.Scale = 4
+ p.To.Index = gc.SSARegNum(v.Args[1])
+ case ssa.Op386MOVSDstoreidx8:
+ p.To.Scale = 8
+ p.To.Index = gc.SSARegNum(v.Args[1])
+ }
+ return true
+
+ case ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
+ ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD:
+ if gc.SSARegNum(v) != gc.SSARegNum(v.Args[0]) {
+ v.Fatalf("input[0] and output not in same register %s", v.LongString())
+ }
+
+ // Push arg1 on top of stack
+ push(s, v.Args[1])
+
+ // Set precision if needed. 64 bits is the default.
+ switch v.Op {
+ case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS:
+ p := gc.Prog(x86.AFSTCW)
+ scratch387(s, &p.To)
+ p = gc.Prog(x86.AFLDCW)
+ p.From.Type = obj.TYPE_MEM
+ p.From.Name = obj.NAME_EXTERN
+ p.From.Sym = gc.Linksym(gc.Pkglookup("controlWord32", gc.Runtimepkg))
+ }
+
+ var op obj.As
+ switch v.Op {
+ case ssa.Op386ADDSS, ssa.Op386ADDSD:
+ op = x86.AFADDDP
+ case ssa.Op386SUBSS, ssa.Op386SUBSD:
+ op = x86.AFSUBDP
+ case ssa.Op386MULSS, ssa.Op386MULSD:
+ op = x86.AFMULDP
+ case ssa.Op386DIVSS, ssa.Op386DIVSD:
+ op = x86.AFDIVDP
+ }
+ p := gc.Prog(op)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = x86.REG_F0
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = s.SSEto387[gc.SSARegNum(v)] + 1
+
+ // Restore precision if needed.
+ switch v.Op {
+ case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS:
+ p := gc.Prog(x86.AFLDCW)
+ scratch387(s, &p.From)
+ }
+
+ return true
+
+ case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
+ push(s, v.Args[0])
+
+ // Compare.
+ p := gc.Prog(x86.AFUCOMP)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = x86.REG_F0
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = s.SSEto387[gc.SSARegNum(v.Args[1])] + 1
+
+ // Save AX.
+ p = gc.Prog(x86.AMOVL)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = x86.REG_AX
+ scratch387(s, &p.To)
+
+ // Move status word into AX.
+ p = gc.Prog(x86.AFSTSW)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = x86.REG_AX
+
+ // Then move the flags we need to the integer flags.
+ gc.Prog(x86.ASAHF)
+
+ // Restore AX.
+ p = gc.Prog(x86.AMOVL)
+ scratch387(s, &p.From)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = x86.REG_AX
+
+ return true
+
+ case ssa.Op386SQRTSD:
+ push(s, v.Args[0])
+ gc.Prog(x86.AFSQRT)
+ popAndSave(s, v)
+ return true
+
+ case ssa.Op386PXOR:
+ a0 := v.Args[0]
+ a1 := v.Args[1]
+ for a0.Op == ssa.OpCopy {
+ a0 = a0.Args[0]
+ }
+ for a1.Op == ssa.OpCopy {
+ a1 = a1.Args[0]
+ }
+ if (a0.Op == ssa.Op386MOVSSconst || a0.Op == ssa.Op386MOVSDconst) && a0.AuxInt == -0x8000000000000000 {
+ push(s, v.Args[1])
+ gc.Prog(x86.AFCHS)
+ popAndSave(s, v)
+ return true
+ }
+ if (a1.Op == ssa.Op386MOVSSconst || a1.Op == ssa.Op386MOVSDconst) && a1.AuxInt == -0x8000000000000000 {
+ push(s, v.Args[0])
+ gc.Prog(x86.AFCHS)
+ popAndSave(s, v)
+ return true
+ }
+ v.Fatalf("PXOR not used to change sign %s", v.LongString())
+
+ case ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD:
+ p := gc.Prog(x86.AMOVL)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = gc.SSARegNum(v.Args[0])
+ scratch387(s, &p.To)
+ p = gc.Prog(x86.AFMOVL)
+ scratch387(s, &p.From)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = x86.REG_F0
+ popAndSave(s, v)
+ return true
+
+ case ssa.Op386CVTTSD2SL, ssa.Op386CVTTSS2SL:
+ push(s, v.Args[0])
+
+ // Save control word.
+ p := gc.Prog(x86.AFSTCW)
+ scratch387(s, &p.To)
+ p.To.Offset += 4
+
+ // Load control word which truncates (rounds towards zero).
+ p = gc.Prog(x86.AFLDCW)
+ p.From.Type = obj.TYPE_MEM
+ p.From.Name = obj.NAME_EXTERN
+ p.From.Sym = gc.Linksym(gc.Pkglookup("controlWord64trunc", gc.Runtimepkg))
+
+ // Now do the conversion.
+ p = gc.Prog(x86.AFMOVLP)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = x86.REG_F0
+ scratch387(s, &p.To)
+ p = gc.Prog(x86.AMOVL)
+ scratch387(s, &p.From)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = gc.SSARegNum(v)
+
+ // Restore control word.
+ p = gc.Prog(x86.AFLDCW)
+ scratch387(s, &p.From)
+ p.From.Offset += 4
+ return true
+
+ case ssa.Op386CVTSS2SD:
+ // float32 -> float64 is a nop
+ push(s, v.Args[0])
+ popAndSave(s, v)
+ return true
+
+ case ssa.Op386CVTSD2SS:
+ // Round to nearest float32.
+ push(s, v.Args[0])
+ p := gc.Prog(x86.AFMOVFP)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = x86.REG_F0
+ scratch387(s, &p.To)
+ p = gc.Prog(x86.AFMOVF)
+ scratch387(s, &p.From)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = x86.REG_F0
+ popAndSave(s, v)
+ return true
+
+ case ssa.OpLoadReg:
+ if !v.Type.IsFloat() {
+ return false
+ }
+ // Load+push the value we need.
+ p := gc.Prog(loadPush(v.Type))
+ n, off := gc.AutoVar(v.Args[0])
+ p.From.Type = obj.TYPE_MEM
+ p.From.Node = n
+ p.From.Sym = gc.Linksym(n.Sym)
+ p.From.Offset = off
+ if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT {
+ p.From.Name = obj.NAME_PARAM
+ p.From.Offset += n.Xoffset
+ } else {
+ p.From.Name = obj.NAME_AUTO
+ }
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = x86.REG_F0
+ // Move the value to its assigned register.
+ popAndSave(s, v)
+ return true
+
+ case ssa.OpStoreReg:
+ if !v.Type.IsFloat() {
+ return false
+ }
+ push(s, v.Args[0])
+ var op obj.As
+ switch v.Type.Size() {
+ case 4:
+ op = x86.AFMOVFP
+ case 8:
+ op = x86.AFMOVDP
+ }
+ p := gc.Prog(op)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = x86.REG_F0
+ n, off := gc.AutoVar(v)
+ p.To.Type = obj.TYPE_MEM
+ p.To.Node = n
+ p.To.Sym = gc.Linksym(n.Sym)
+ p.To.Offset = off
+ if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT {
+ p.To.Name = obj.NAME_PARAM
+ p.To.Offset += n.Xoffset
+ } else {
+ p.To.Name = obj.NAME_AUTO
+ }
+ return true
+
+ case ssa.OpCopy:
+ if !v.Type.IsFloat() {
+ return false
+ }
+ push(s, v.Args[0])
+ popAndSave(s, v)
+ return true
+
+ case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLdefer, ssa.Op386CALLgo, ssa.Op386CALLinter:
+ flush387(s) // Calls must empty the the FP stack.
+ return false // then issue the call as normal
+ }
+ return false
+}
+
+// push pushes v onto the floating-point stack. v must be in a register.
+func push(s *gc.SSAGenState, v *ssa.Value) {
+ p := gc.Prog(x86.AFMOVD)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = s.SSEto387[gc.SSARegNum(v)]
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = x86.REG_F0
+}
+
+// popAndSave pops a value off of the floating-point stack and stores
+// it in the reigster assigned to v.
+func popAndSave(s *gc.SSAGenState, v *ssa.Value) {
+ r := gc.SSARegNum(v)
+ if _, ok := s.SSEto387[r]; ok {
+ // Pop value, write to correct register.
+ p := gc.Prog(x86.AFMOVDP)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = x86.REG_F0
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = s.SSEto387[gc.SSARegNum(v)] + 1
+ } else {
+ // Don't actually pop value. This 387 register is now the
+ // new home for the not-yet-assigned-a-home SSE register.
+ // Increase the register mapping of all other registers by one.
+ for rSSE, r387 := range s.SSEto387 {
+ s.SSEto387[rSSE] = r387 + 1
+ }
+ s.SSEto387[r] = x86.REG_F0
+ }
+}
+
+// loadPush returns the opcode for load+push of the given type.
+func loadPush(t ssa.Type) obj.As {
+ if t.Size() == 4 {
+ return x86.AFMOVF
+ }
+ return x86.AFMOVD
+}
+
+// flush387 removes all entries from the 387 floating-point stack.
+func flush387(s *gc.SSAGenState) {
+ for k := range s.SSEto387 {
+ p := gc.Prog(x86.AFMOVDP)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = x86.REG_F0
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = x86.REG_F0
+ delete(s.SSEto387, k)
+ }
+}
+
+// scratch387 initializes a to the scratch location used by some 387 rewrites.
+func scratch387(s *gc.SSAGenState, a *obj.Addr) {
+ a.Type = obj.TYPE_MEM
+ a.Name = obj.NAME_AUTO
+ a.Node = s.Scratch387
+ a.Sym = gc.Linksym(s.Scratch387.Sym)
+ a.Reg = x86.REG_SP
+}