aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/8g/ggen.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/8g/ggen.go')
-rw-r--r--src/cmd/8g/ggen.go1297
1 files changed, 1297 insertions, 0 deletions
diff --git a/src/cmd/8g/ggen.go b/src/cmd/8g/ggen.go
new file mode 100644
index 0000000000..f72beda21a
--- /dev/null
+++ b/src/cmd/8g/ggen.go
@@ -0,0 +1,1297 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "cmd/internal/obj"
+ "cmd/internal/obj/i386"
+)
+import "cmd/internal/gc"
+
+func defframe(ptxt *obj.Prog) {
+ var frame uint32
+ var ax uint32
+ var p *obj.Prog
+ var lo int64
+ var hi int64
+ var l *gc.NodeList
+ var n *gc.Node
+
+ // fill in argument size, stack size
+ ptxt.To.Type = obj.TYPE_TEXTSIZE
+
+ ptxt.To.U.Argsize = int32(gc.Rnd(gc.Curfn.Type.Argwid, int64(gc.Widthptr)))
+ frame = uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg)))
+ ptxt.To.Offset = int64(frame)
+
+ // insert code to zero ambiguously live variables
+ // so that the garbage collector only sees initialized values
+ // when it looks for pointers.
+ p = ptxt
+
+ hi = 0
+ lo = hi
+ ax = 0
+ for l = gc.Curfn.Dcl; l != nil; l = l.Next {
+ n = l.N
+ if n.Needzero == 0 {
+ continue
+ }
+ if n.Class != gc.PAUTO {
+ gc.Fatal("needzero class %d", n.Class)
+ }
+ if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 {
+ gc.Fatal("var %v has size %d offset %d", gc.Nconv(n, obj.FmtLong), int(n.Type.Width), int(n.Xoffset))
+ }
+ if lo != hi && n.Xoffset+n.Type.Width == lo-int64(2*gc.Widthptr) {
+ // merge with range we already have
+ lo = n.Xoffset
+
+ continue
+ }
+
+ // zero old range
+ p = zerorange(p, int64(frame), lo, hi, &ax)
+
+ // set new range
+ hi = n.Xoffset + n.Type.Width
+
+ lo = n.Xoffset
+ }
+
+ // zero final range
+ zerorange(p, int64(frame), lo, hi, &ax)
+}
+
+func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog {
+ var cnt int64
+ var i int64
+
+ cnt = hi - lo
+ if cnt == 0 {
+ return p
+ }
+ if *ax == 0 {
+ p = appendpp(p, i386.AMOVL, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, i386.REG_AX, 0)
+ *ax = 1
+ }
+
+ if cnt <= int64(4*gc.Widthreg) {
+ for i = 0; i < cnt; i += int64(gc.Widthreg) {
+ p = appendpp(p, i386.AMOVL, obj.TYPE_REG, i386.REG_AX, 0, obj.TYPE_MEM, i386.REG_SP, frame+lo+i)
+ }
+ } else if !gc.Nacl && cnt <= int64(128*gc.Widthreg) {
+ p = appendpp(p, i386.ALEAL, obj.TYPE_MEM, i386.REG_SP, frame+lo, obj.TYPE_REG, i386.REG_DI, 0)
+ p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, 1*(128-cnt/int64(gc.Widthreg)))
+ p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
+ } else {
+ p = appendpp(p, i386.AMOVL, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, i386.REG_CX, 0)
+ p = appendpp(p, i386.ALEAL, obj.TYPE_MEM, i386.REG_SP, frame+lo, obj.TYPE_REG, i386.REG_DI, 0)
+ p = appendpp(p, i386.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
+ p = appendpp(p, i386.ASTOSL, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
+ }
+
+ return p
+}
+
+func appendpp(p *obj.Prog, as int, ftype int, freg int, foffset int64, ttype int, treg int, toffset int64) *obj.Prog {
+ var q *obj.Prog
+ q = gc.Ctxt.NewProg()
+ gc.Clearp(q)
+ q.As = int16(as)
+ q.Lineno = p.Lineno
+ q.From.Type = int16(ftype)
+ q.From.Reg = int16(freg)
+ q.From.Offset = foffset
+ q.To.Type = int16(ttype)
+ q.To.Reg = int16(treg)
+ q.To.Offset = toffset
+ q.Link = p.Link
+ p.Link = q
+ return q
+}
+
+func clearfat(nl *gc.Node) {
+ var w uint32
+ var c uint32
+ var q uint32
+ var n1 gc.Node
+ var z gc.Node
+ var p *obj.Prog
+
+ /* clear a fat object */
+ if gc.Debug['g'] != 0 {
+ gc.Dump("\nclearfat", nl)
+ }
+
+ w = uint32(nl.Type.Width)
+
+ // Avoid taking the address for simple enough types.
+ if componentgen(nil, nl) {
+ return
+ }
+
+ c = w % 4 // bytes
+ q = w / 4 // quads
+
+ if q < 4 {
+ // Write sequence of MOV 0, off(base) instead of using STOSL.
+ // The hope is that although the code will be slightly longer,
+ // the MOVs will have no dependencies and pipeline better
+ // than the unrolled STOSL loop.
+ // NOTE: Must use agen, not igen, so that optimizer sees address
+ // being taken. We are not writing on field boundaries.
+ regalloc(&n1, gc.Types[gc.Tptr], nil)
+
+ agen(nl, &n1)
+ n1.Op = gc.OINDREG
+ gc.Nodconst(&z, gc.Types[gc.TUINT64], 0)
+ for {
+ tmp14 := q
+ q--
+ if tmp14 <= 0 {
+ break
+ }
+ n1.Type = z.Type
+ gins(i386.AMOVL, &z, &n1)
+ n1.Xoffset += 4
+ }
+
+ gc.Nodconst(&z, gc.Types[gc.TUINT8], 0)
+ for {
+ tmp15 := c
+ c--
+ if tmp15 <= 0 {
+ break
+ }
+ n1.Type = z.Type
+ gins(i386.AMOVB, &z, &n1)
+ n1.Xoffset++
+ }
+
+ regfree(&n1)
+ return
+ }
+
+ gc.Nodreg(&n1, gc.Types[gc.Tptr], i386.REG_DI)
+ agen(nl, &n1)
+ gconreg(i386.AMOVL, 0, i386.REG_AX)
+
+ if q > 128 || (q >= 4 && gc.Nacl) {
+ gconreg(i386.AMOVL, int64(q), i386.REG_CX)
+ gins(i386.AREP, nil, nil) // repeat
+ gins(i386.ASTOSL, nil, nil) // STOL AL,*(DI)+
+ } else if q >= 4 {
+ p = gins(obj.ADUFFZERO, nil, nil)
+ p.To.Type = obj.TYPE_ADDR
+ p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
+
+ // 1 and 128 = magic constants: see ../../runtime/asm_386.s
+ p.To.Offset = 1 * (128 - int64(q))
+ } else {
+ for q > 0 {
+ gins(i386.ASTOSL, nil, nil) // STOL AL,*(DI)+
+ q--
+ }
+ }
+
+ for c > 0 {
+ gins(i386.ASTOSB, nil, nil) // STOB AL,*(DI)+
+ c--
+ }
+}
+
+/*
+ * generate:
+ * call f
+ * proc=-1 normal call but no return
+ * proc=0 normal call
+ * proc=1 goroutine run in new proc
+ * proc=2 defer call save away stack
+ * proc=3 normal call to C pointer (not Go func value)
+*/
+func ginscall(f *gc.Node, proc int) {
+ var p *obj.Prog
+ var reg gc.Node
+ var r1 gc.Node
+ var con gc.Node
+ var stk gc.Node
+ var extra int32
+
+ if f.Type != nil {
+ extra = 0
+ if proc == 1 || proc == 2 {
+ extra = 2 * int32(gc.Widthptr)
+ }
+ gc.Setmaxarg(f.Type, extra)
+ }
+
+ switch proc {
+ default:
+ gc.Fatal("ginscall: bad proc %d", proc)
+
+ case 0, // normal call
+ -1: // normal call but no return
+ if f.Op == gc.ONAME && f.Class == gc.PFUNC {
+ if f == gc.Deferreturn {
+ // Deferred calls will appear to be returning to
+ // the CALL deferreturn(SB) that we are about to emit.
+ // However, the stack trace code will show the line
+ // of the instruction byte before the return PC.
+ // To avoid that being an unrelated instruction,
+ // insert an x86 NOP that we will have the right line number.
+ // x86 NOP 0x90 is really XCHG AX, AX; use that description
+ // because the NOP pseudo-instruction will be removed by
+ // the linker.
+ gc.Nodreg(&reg, gc.Types[gc.TINT], i386.REG_AX)
+
+ gins(i386.AXCHGL, &reg, &reg)
+ }
+
+ p = gins(obj.ACALL, nil, f)
+ gc.Afunclit(&p.To, f)
+ if proc == -1 || gc.Noreturn(p) {
+ gins(obj.AUNDEF, nil, nil)
+ }
+ break
+ }
+
+ gc.Nodreg(&reg, gc.Types[gc.Tptr], i386.REG_DX)
+ gc.Nodreg(&r1, gc.Types[gc.Tptr], i386.REG_BX)
+ gmove(f, &reg)
+ reg.Op = gc.OINDREG
+ gmove(&reg, &r1)
+ reg.Op = gc.OREGISTER
+ gins(obj.ACALL, &reg, &r1)
+
+ case 3: // normal call of c function pointer
+ gins(obj.ACALL, nil, f)
+
+ case 1, // call in new proc (go)
+ 2: // deferred call (defer)
+ stk = gc.Node{}
+
+ stk.Op = gc.OINDREG
+ stk.Val.U.Reg = i386.REG_SP
+ stk.Xoffset = 0
+
+ // size of arguments at 0(SP)
+ gc.Nodconst(&con, gc.Types[gc.TINT32], int64(gc.Argsize(f.Type)))
+
+ gins(i386.AMOVL, &con, &stk)
+
+ // FuncVal* at 4(SP)
+ stk.Xoffset = int64(gc.Widthptr)
+
+ gins(i386.AMOVL, f, &stk)
+
+ if proc == 1 {
+ ginscall(gc.Newproc, 0)
+ } else {
+ ginscall(gc.Deferproc, 0)
+ }
+ if proc == 2 {
+ gc.Nodreg(&reg, gc.Types[gc.TINT32], i386.REG_AX)
+ gins(i386.ATESTL, &reg, &reg)
+ p = gc.Gbranch(i386.AJEQ, nil, +1)
+ cgen_ret(nil)
+ gc.Patch(p, gc.Pc)
+ }
+ }
+}
+
+/*
+ * n is call to interface method.
+ * generate res = n.
+ */
+func cgen_callinter(n *gc.Node, res *gc.Node, proc int) {
+ var i *gc.Node
+ var f *gc.Node
+ var tmpi gc.Node
+ var nodi gc.Node
+ var nodo gc.Node
+ var nodr gc.Node
+ var nodsp gc.Node
+
+ i = n.Left
+ if i.Op != gc.ODOTINTER {
+ gc.Fatal("cgen_callinter: not ODOTINTER %v", gc.Oconv(int(i.Op), 0))
+ }
+
+ f = i.Right // field
+ if f.Op != gc.ONAME {
+ gc.Fatal("cgen_callinter: not ONAME %v", gc.Oconv(int(f.Op), 0))
+ }
+
+ i = i.Left // interface
+
+ if i.Addable == 0 {
+ gc.Tempname(&tmpi, i.Type)
+ cgen(i, &tmpi)
+ i = &tmpi
+ }
+
+ gc.Genlist(n.List) // assign the args
+
+ // i is now addable, prepare an indirected
+ // register to hold its address.
+ igen(i, &nodi, res) // REG = &inter
+
+ gc.Nodindreg(&nodsp, gc.Types[gc.Tptr], i386.REG_SP)
+
+ nodsp.Xoffset = 0
+ if proc != 0 {
+ nodsp.Xoffset += 2 * int64(gc.Widthptr) // leave room for size & fn
+ }
+ nodi.Type = gc.Types[gc.Tptr]
+ nodi.Xoffset += int64(gc.Widthptr)
+ cgen(&nodi, &nodsp) // {0 or 8}(SP) = 4(REG) -- i.data
+
+ regalloc(&nodo, gc.Types[gc.Tptr], res)
+
+ nodi.Type = gc.Types[gc.Tptr]
+ nodi.Xoffset -= int64(gc.Widthptr)
+ cgen(&nodi, &nodo) // REG = 0(REG) -- i.tab
+ regfree(&nodi)
+
+ regalloc(&nodr, gc.Types[gc.Tptr], &nodo)
+ if n.Left.Xoffset == gc.BADWIDTH {
+ gc.Fatal("cgen_callinter: badwidth")
+ }
+ gc.Cgen_checknil(&nodo)
+ nodo.Op = gc.OINDREG
+ nodo.Xoffset = n.Left.Xoffset + 3*int64(gc.Widthptr) + 8
+
+ if proc == 0 {
+ // plain call: use direct c function pointer - more efficient
+ cgen(&nodo, &nodr) // REG = 20+offset(REG) -- i.tab->fun[f]
+ proc = 3
+ } else {
+ // go/defer. generate go func value.
+ gins(i386.ALEAL, &nodo, &nodr) // REG = &(20+offset(REG)) -- i.tab->fun[f]
+ }
+
+ nodr.Type = n.Left.Type
+ ginscall(&nodr, proc)
+
+ regfree(&nodr)
+ regfree(&nodo)
+}
+
+/*
+ * generate function call;
+ * proc=0 normal call
+ * proc=1 goroutine run in new proc
+ * proc=2 defer call save away stack
+ */
+func cgen_call(n *gc.Node, proc int) {
+ var t *gc.Type
+ var nod gc.Node
+ var afun gc.Node
+
+ if n == nil {
+ return
+ }
+
+ if n.Left.Ullman >= gc.UINF {
+ // if name involves a fn call
+ // precompute the address of the fn
+ gc.Tempname(&afun, gc.Types[gc.Tptr])
+
+ cgen(n.Left, &afun)
+ }
+
+ gc.Genlist(n.List) // assign the args
+ t = n.Left.Type
+
+ // call tempname pointer
+ if n.Left.Ullman >= gc.UINF {
+ regalloc(&nod, gc.Types[gc.Tptr], nil)
+ gc.Cgen_as(&nod, &afun)
+ nod.Type = t
+ ginscall(&nod, proc)
+ regfree(&nod)
+ return
+ }
+
+ // call pointer
+ if n.Left.Op != gc.ONAME || n.Left.Class != gc.PFUNC {
+ regalloc(&nod, gc.Types[gc.Tptr], nil)
+ gc.Cgen_as(&nod, n.Left)
+ nod.Type = t
+ ginscall(&nod, proc)
+ regfree(&nod)
+ return
+ }
+
+ // call direct
+ n.Left.Method = 1
+
+ ginscall(n.Left, proc)
+}
+
+/*
+ * call to n has already been generated.
+ * generate:
+ * res = return value from call.
+ */
+func cgen_callret(n *gc.Node, res *gc.Node) {
+ var nod gc.Node
+ var fp *gc.Type
+ var t *gc.Type
+ var flist gc.Iter
+
+ t = n.Left.Type
+ if t.Etype == gc.TPTR32 || t.Etype == gc.TPTR64 {
+ t = t.Type
+ }
+
+ fp = gc.Structfirst(&flist, gc.Getoutarg(t))
+ if fp == nil {
+ gc.Fatal("cgen_callret: nil")
+ }
+
+ nod = gc.Node{}
+ nod.Op = gc.OINDREG
+ nod.Val.U.Reg = i386.REG_SP
+ nod.Addable = 1
+
+ nod.Xoffset = fp.Width
+ nod.Type = fp.Type
+ gc.Cgen_as(res, &nod)
+}
+
+/*
+ * call to n has already been generated.
+ * generate:
+ * res = &return value from call.
+ */
+func cgen_aret(n *gc.Node, res *gc.Node) {
+ var nod1 gc.Node
+ var nod2 gc.Node
+ var fp *gc.Type
+ var t *gc.Type
+ var flist gc.Iter
+
+ t = n.Left.Type
+ if gc.Isptr[t.Etype] != 0 {
+ t = t.Type
+ }
+
+ fp = gc.Structfirst(&flist, gc.Getoutarg(t))
+ if fp == nil {
+ gc.Fatal("cgen_aret: nil")
+ }
+
+ nod1 = gc.Node{}
+ nod1.Op = gc.OINDREG
+ nod1.Val.U.Reg = i386.REG_SP
+ nod1.Addable = 1
+
+ nod1.Xoffset = fp.Width
+ nod1.Type = fp.Type
+
+ if res.Op != gc.OREGISTER {
+ regalloc(&nod2, gc.Types[gc.Tptr], res)
+ gins(i386.ALEAL, &nod1, &nod2)
+ gins(i386.AMOVL, &nod2, res)
+ regfree(&nod2)
+ } else {
+ gins(i386.ALEAL, &nod1, res)
+ }
+}
+
+/*
+ * generate return.
+ * n->left is assignments to return values.
+ */
+func cgen_ret(n *gc.Node) {
+ var p *obj.Prog
+
+ if n != nil {
+ gc.Genlist(n.List) // copy out args
+ }
+ if gc.Hasdefer != 0 {
+ ginscall(gc.Deferreturn, 0)
+ }
+ gc.Genlist(gc.Curfn.Exit)
+ p = gins(obj.ARET, nil, nil)
+ if n != nil && n.Op == gc.ORETJMP {
+ p.To.Type = obj.TYPE_MEM
+ p.To.Name = obj.NAME_EXTERN
+ p.To.Sym = gc.Linksym(n.Left.Sym)
+ }
+}
+
+/*
+ * generate division.
+ * caller must set:
+ * ax = allocated AX register
+ * dx = allocated DX register
+ * generates one of:
+ * res = nl / nr
+ * res = nl % nr
+ * according to op.
+ */
+func dodiv(op int, nl *gc.Node, nr *gc.Node, res *gc.Node, ax *gc.Node, dx *gc.Node) {
+ var check int
+ var n1 gc.Node
+ var t1 gc.Node
+ var t2 gc.Node
+ var t3 gc.Node
+ var t4 gc.Node
+ var n4 gc.Node
+ var nz gc.Node
+ var t *gc.Type
+ var t0 *gc.Type
+ var p1 *obj.Prog
+ var p2 *obj.Prog
+
+ // Have to be careful about handling
+ // most negative int divided by -1 correctly.
+ // The hardware will trap.
+ // Also the byte divide instruction needs AH,
+ // which we otherwise don't have to deal with.
+ // Easiest way to avoid for int8, int16: use int32.
+ // For int32 and int64, use explicit test.
+ // Could use int64 hw for int32.
+ t = nl.Type
+
+ t0 = t
+ check = 0
+ if gc.Issigned[t.Etype] != 0 {
+ check = 1
+ if gc.Isconst(nl, gc.CTINT) && gc.Mpgetfix(nl.Val.U.Xval) != -1<<uint64(t.Width*8-1) {
+ check = 0
+ } else if gc.Isconst(nr, gc.CTINT) && gc.Mpgetfix(nr.Val.U.Xval) != -1 {
+ check = 0
+ }
+ }
+
+ if t.Width < 4 {
+ if gc.Issigned[t.Etype] != 0 {
+ t = gc.Types[gc.TINT32]
+ } else {
+ t = gc.Types[gc.TUINT32]
+ }
+ check = 0
+ }
+
+ gc.Tempname(&t1, t)
+ gc.Tempname(&t2, t)
+ if t0 != t {
+ gc.Tempname(&t3, t0)
+ gc.Tempname(&t4, t0)
+ cgen(nl, &t3)
+ cgen(nr, &t4)
+
+ // Convert.
+ gmove(&t3, &t1)
+
+ gmove(&t4, &t2)
+ } else {
+ cgen(nl, &t1)
+ cgen(nr, &t2)
+ }
+
+ if !gc.Samereg(ax, res) && !gc.Samereg(dx, res) {
+ regalloc(&n1, t, res)
+ } else {
+ regalloc(&n1, t, nil)
+ }
+ gmove(&t2, &n1)
+ gmove(&t1, ax)
+ p2 = nil
+ if gc.Nacl {
+ // Native Client does not relay the divide-by-zero trap
+ // to the executing program, so we must insert a check
+ // for ourselves.
+ gc.Nodconst(&n4, t, 0)
+
+ gins(optoas(gc.OCMP, t), &n1, &n4)
+ p1 = gc.Gbranch(optoas(gc.ONE, t), nil, +1)
+ if panicdiv == nil {
+ panicdiv = gc.Sysfunc("panicdivide")
+ }
+ ginscall(panicdiv, -1)
+ gc.Patch(p1, gc.Pc)
+ }
+
+ if check != 0 {
+ gc.Nodconst(&n4, t, -1)
+ gins(optoas(gc.OCMP, t), &n1, &n4)
+ p1 = gc.Gbranch(optoas(gc.ONE, t), nil, +1)
+ if op == gc.ODIV {
+ // a / (-1) is -a.
+ gins(optoas(gc.OMINUS, t), nil, ax)
+
+ gmove(ax, res)
+ } else {
+ // a % (-1) is 0.
+ gc.Nodconst(&n4, t, 0)
+
+ gmove(&n4, res)
+ }
+
+ p2 = gc.Gbranch(obj.AJMP, nil, 0)
+ gc.Patch(p1, gc.Pc)
+ }
+
+ if gc.Issigned[t.Etype] == 0 {
+ gc.Nodconst(&nz, t, 0)
+ gmove(&nz, dx)
+ } else {
+ gins(optoas(gc.OEXTEND, t), nil, nil)
+ }
+ gins(optoas(op, t), &n1, nil)
+ regfree(&n1)
+
+ if op == gc.ODIV {
+ gmove(ax, res)
+ } else {
+ gmove(dx, res)
+ }
+ if check != 0 {
+ gc.Patch(p2, gc.Pc)
+ }
+}
+
+func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
+ var r int
+
+ r = int(reg[dr])
+ gc.Nodreg(x, gc.Types[gc.TINT32], dr)
+
+ // save current ax and dx if they are live
+ // and not the destination
+ *oldx = gc.Node{}
+
+ if r > 0 && !gc.Samereg(x, res) {
+ gc.Tempname(oldx, gc.Types[gc.TINT32])
+ gmove(x, oldx)
+ }
+
+ regalloc(x, t, x)
+}
+
+func restx(x *gc.Node, oldx *gc.Node) {
+ regfree(x)
+
+ if oldx.Op != 0 {
+ x.Type = gc.Types[gc.TINT32]
+ gmove(oldx, x)
+ }
+}
+
+/*
+ * generate division according to op, one of:
+ * res = nl / nr
+ * res = nl % nr
+ */
+func cgen_div(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) {
+ var ax gc.Node
+ var dx gc.Node
+ var oldax gc.Node
+ var olddx gc.Node
+ var t *gc.Type
+
+ if gc.Is64(nl.Type) {
+ gc.Fatal("cgen_div %v", gc.Tconv(nl.Type, 0))
+ }
+
+ if gc.Issigned[nl.Type.Etype] != 0 {
+ t = gc.Types[gc.TINT32]
+ } else {
+ t = gc.Types[gc.TUINT32]
+ }
+ savex(i386.REG_AX, &ax, &oldax, res, t)
+ savex(i386.REG_DX, &dx, &olddx, res, t)
+ dodiv(op, nl, nr, res, &ax, &dx)
+ restx(&dx, &olddx)
+ restx(&ax, &oldax)
+}
+
+/*
+ * generate shift according to op, one of:
+ * res = nl << nr
+ * res = nl >> nr
+ */
+func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
+ var n1 gc.Node
+ var n2 gc.Node
+ var nt gc.Node
+ var cx gc.Node
+ var oldcx gc.Node
+ var hi gc.Node
+ var lo gc.Node
+ var a int
+ var w int
+ var p1 *obj.Prog
+ var p2 *obj.Prog
+ var sc uint64
+
+ if nl.Type.Width > 4 {
+ gc.Fatal("cgen_shift %v", gc.Tconv(nl.Type, 0))
+ }
+
+ w = int(nl.Type.Width * 8)
+
+ a = optoas(op, nl.Type)
+
+ if nr.Op == gc.OLITERAL {
+ gc.Tempname(&n2, nl.Type)
+ cgen(nl, &n2)
+ regalloc(&n1, nl.Type, res)
+ gmove(&n2, &n1)
+ sc = uint64(gc.Mpgetfix(nr.Val.U.Xval))
+ if sc >= uint64(nl.Type.Width*8) {
+ // large shift gets 2 shifts by width-1
+ gins(a, ncon(uint32(w)-1), &n1)
+
+ gins(a, ncon(uint32(w)-1), &n1)
+ } else {
+ gins(a, nr, &n1)
+ }
+ gmove(&n1, res)
+ regfree(&n1)
+ return
+ }
+
+ oldcx = gc.Node{}
+ gc.Nodreg(&cx, gc.Types[gc.TUINT32], i386.REG_CX)
+ if reg[i386.REG_CX] > 1 && !gc.Samereg(&cx, res) {
+ gc.Tempname(&oldcx, gc.Types[gc.TUINT32])
+ gmove(&cx, &oldcx)
+ }
+
+ if nr.Type.Width > 4 {
+ gc.Tempname(&nt, nr.Type)
+ n1 = nt
+ } else {
+ gc.Nodreg(&n1, gc.Types[gc.TUINT32], i386.REG_CX)
+ regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX
+ }
+
+ if gc.Samereg(&cx, res) {
+ regalloc(&n2, nl.Type, nil)
+ } else {
+ regalloc(&n2, nl.Type, res)
+ }
+ if nl.Ullman >= nr.Ullman {
+ cgen(nl, &n2)
+ cgen(nr, &n1)
+ } else {
+ cgen(nr, &n1)
+ cgen(nl, &n2)
+ }
+
+ // test and fix up large shifts
+ if bounded {
+ if nr.Type.Width > 4 {
+ // delayed reg alloc
+ gc.Nodreg(&n1, gc.Types[gc.TUINT32], i386.REG_CX)
+
+ regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
+ split64(&nt, &lo, &hi)
+ gmove(&lo, &n1)
+ splitclean()
+ }
+ } else {
+ if nr.Type.Width > 4 {
+ // delayed reg alloc
+ gc.Nodreg(&n1, gc.Types[gc.TUINT32], i386.REG_CX)
+
+ regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
+ split64(&nt, &lo, &hi)
+ gmove(&lo, &n1)
+ gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &hi, ncon(0))
+ p2 = gc.Gbranch(optoas(gc.ONE, gc.Types[gc.TUINT32]), nil, +1)
+ gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &n1, ncon(uint32(w)))
+ p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
+ splitclean()
+ gc.Patch(p2, gc.Pc)
+ } else {
+ gins(optoas(gc.OCMP, nr.Type), &n1, ncon(uint32(w)))
+ p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
+ }
+
+ if op == gc.ORSH && gc.Issigned[nl.Type.Etype] != 0 {
+ gins(a, ncon(uint32(w)-1), &n2)
+ } else {
+ gmove(ncon(0), &n2)
+ }
+
+ gc.Patch(p1, gc.Pc)
+ }
+
+ gins(a, &n1, &n2)
+
+ if oldcx.Op != 0 {
+ gmove(&oldcx, &cx)
+ }
+
+ gmove(&n2, res)
+
+ regfree(&n1)
+ regfree(&n2)
+}
+
+/*
+ * generate byte multiply:
+ * res = nl * nr
+ * there is no 2-operand byte multiply instruction so
+ * we do a full-width multiplication and truncate afterwards.
+ */
+func cgen_bmul(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) {
+ var n1 gc.Node
+ var n2 gc.Node
+ var nt gc.Node
+ var tmp *gc.Node
+ var t *gc.Type
+ var a int
+
+ // copy from byte to full registers
+ t = gc.Types[gc.TUINT32]
+
+ if gc.Issigned[nl.Type.Etype] != 0 {
+ t = gc.Types[gc.TINT32]
+ }
+
+ // largest ullman on left.
+ if nl.Ullman < nr.Ullman {
+ tmp = nl
+ nl = nr
+ nr = tmp
+ }
+
+ gc.Tempname(&nt, nl.Type)
+ cgen(nl, &nt)
+ regalloc(&n1, t, res)
+ cgen(nr, &n1)
+ regalloc(&n2, t, nil)
+ gmove(&nt, &n2)
+ a = optoas(op, t)
+ gins(a, &n2, &n1)
+ regfree(&n2)
+ gmove(&n1, res)
+ regfree(&n1)
+}
+
+/*
+ * generate high multiply:
+ * res = (nl*nr) >> width
+ */
+func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
+ var t *gc.Type
+ var a int
+ var n1 gc.Node
+ var n2 gc.Node
+ var ax gc.Node
+ var dx gc.Node
+
+ t = nl.Type
+ a = optoas(gc.OHMUL, t)
+
+ // gen nl in n1.
+ gc.Tempname(&n1, t)
+
+ cgen(nl, &n1)
+
+ // gen nr in n2.
+ regalloc(&n2, t, res)
+
+ cgen(nr, &n2)
+
+ // multiply.
+ gc.Nodreg(&ax, t, i386.REG_AX)
+
+ gmove(&n2, &ax)
+ gins(a, &n1, nil)
+ regfree(&n2)
+
+ if t.Width == 1 {
+ // byte multiply behaves differently.
+ gc.Nodreg(&ax, t, i386.REG_AH)
+
+ gc.Nodreg(&dx, t, i386.REG_DX)
+ gmove(&ax, &dx)
+ }
+
+ gc.Nodreg(&dx, t, i386.REG_DX)
+ gmove(&dx, res)
+}
+
+/*
+ * generate floating-point operation.
+ */
+func cgen_float(n *gc.Node, res *gc.Node) {
+ var nl *gc.Node
+ var n1 gc.Node
+ var n2 gc.Node
+ var p1 *obj.Prog
+ var p2 *obj.Prog
+ var p3 *obj.Prog
+
+ nl = n.Left
+ switch n.Op {
+ case gc.OEQ,
+ gc.ONE,
+ gc.OLT,
+ gc.OLE,
+ gc.OGE:
+ p1 = gc.Gbranch(obj.AJMP, nil, 0)
+ p2 = gc.Pc
+ gmove(gc.Nodbool(true), res)
+ p3 = gc.Gbranch(obj.AJMP, nil, 0)
+ gc.Patch(p1, gc.Pc)
+ bgen(n, true, 0, p2)
+ gmove(gc.Nodbool(false), res)
+ gc.Patch(p3, gc.Pc)
+ return
+
+ case gc.OPLUS:
+ cgen(nl, res)
+ return
+
+ case gc.OCONV:
+ if gc.Eqtype(n.Type, nl.Type) || gc.Noconv(n.Type, nl.Type) {
+ cgen(nl, res)
+ return
+ }
+
+ gc.Tempname(&n2, n.Type)
+ mgen(nl, &n1, res)
+ gmove(&n1, &n2)
+ gmove(&n2, res)
+ mfree(&n1)
+ return
+ }
+
+ if gc.Use_sse != 0 {
+ cgen_floatsse(n, res)
+ } else {
+ cgen_float387(n, res)
+ }
+}
+
+// floating-point. 387 (not SSE2)
+func cgen_float387(n *gc.Node, res *gc.Node) {
+ var f0 gc.Node
+ var f1 gc.Node
+ var nl *gc.Node
+ var nr *gc.Node
+
+ nl = n.Left
+ nr = n.Right
+ gc.Nodreg(&f0, nl.Type, i386.REG_F0)
+ gc.Nodreg(&f1, n.Type, i386.REG_F0+1)
+ if nr != nil {
+ goto flt2
+ }
+
+ // unary
+ cgen(nl, &f0)
+
+ if n.Op != gc.OCONV && n.Op != gc.OPLUS {
+ gins(foptoas(int(n.Op), n.Type, 0), nil, nil)
+ }
+ gmove(&f0, res)
+ return
+
+flt2: // binary
+ if nl.Ullman >= nr.Ullman {
+ cgen(nl, &f0)
+ if nr.Addable != 0 {
+ gins(foptoas(int(n.Op), n.Type, 0), nr, &f0)
+ } else {
+ cgen(nr, &f0)
+ gins(foptoas(int(n.Op), n.Type, Fpop), &f0, &f1)
+ }
+ } else {
+ cgen(nr, &f0)
+ if nl.Addable != 0 {
+ gins(foptoas(int(n.Op), n.Type, Frev), nl, &f0)
+ } else {
+ cgen(nl, &f0)
+ gins(foptoas(int(n.Op), n.Type, Frev|Fpop), &f0, &f1)
+ }
+ }
+
+ gmove(&f0, res)
+ return
+}
+
+func cgen_floatsse(n *gc.Node, res *gc.Node) {
+ var nl *gc.Node
+ var nr *gc.Node
+ var r *gc.Node
+ var n1 gc.Node
+ var n2 gc.Node
+ var nt gc.Node
+ var a int
+
+ nl = n.Left
+ nr = n.Right
+ switch n.Op {
+ default:
+ gc.Dump("cgen_floatsse", n)
+ gc.Fatal("cgen_floatsse %v", gc.Oconv(int(n.Op), 0))
+ return
+
+ case gc.OMINUS,
+ gc.OCOM:
+ nr = gc.Nodintconst(-1)
+ gc.Convlit(&nr, n.Type)
+ a = foptoas(gc.OMUL, nl.Type, 0)
+ goto sbop
+
+ // symmetric binary
+ case gc.OADD,
+ gc.OMUL:
+ a = foptoas(int(n.Op), nl.Type, 0)
+
+ goto sbop
+
+ // asymmetric binary
+ case gc.OSUB,
+ gc.OMOD,
+ gc.ODIV:
+ a = foptoas(int(n.Op), nl.Type, 0)
+
+ goto abop
+ }
+
+sbop: // symmetric binary
+ if nl.Ullman < nr.Ullman || nl.Op == gc.OLITERAL {
+ r = nl
+ nl = nr
+ nr = r
+ }
+
+abop: // asymmetric binary
+ if nl.Ullman >= nr.Ullman {
+ gc.Tempname(&nt, nl.Type)
+ cgen(nl, &nt)
+ mgen(nr, &n2, nil)
+ regalloc(&n1, nl.Type, res)
+ gmove(&nt, &n1)
+ gins(a, &n2, &n1)
+ gmove(&n1, res)
+ regfree(&n1)
+ mfree(&n2)
+ } else {
+ regalloc(&n2, nr.Type, res)
+ cgen(nr, &n2)
+ regalloc(&n1, nl.Type, nil)
+ cgen(nl, &n1)
+ gins(a, &n2, &n1)
+ regfree(&n2)
+ gmove(&n1, res)
+ regfree(&n1)
+ }
+
+ return
+}
+
+func bgen_float(n *gc.Node, true_ int, likely int, to *obj.Prog) {
+ var et int
+ var a int
+ var nl *gc.Node
+ var nr *gc.Node
+ var r *gc.Node
+ var n1 gc.Node
+ var n2 gc.Node
+ var n3 gc.Node
+ var tmp gc.Node
+ var t1 gc.Node
+ var t2 gc.Node
+ var ax gc.Node
+ var p1 *obj.Prog
+ var p2 *obj.Prog
+
+ nl = n.Left
+ nr = n.Right
+ a = int(n.Op)
+ if true_ == 0 {
+ // brcom is not valid on floats when NaN is involved.
+ p1 = gc.Gbranch(obj.AJMP, nil, 0)
+
+ p2 = gc.Gbranch(obj.AJMP, nil, 0)
+ gc.Patch(p1, gc.Pc)
+
+ // No need to avoid re-genning ninit.
+ bgen_float(n, 1, -likely, p2)
+
+ gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
+ gc.Patch(p2, gc.Pc)
+ return
+ }
+
+ if gc.Use_sse != 0 {
+ goto sse
+ } else {
+ goto x87
+ }
+
+x87:
+ a = gc.Brrev(a) // because the args are stacked
+ if a == gc.OGE || a == gc.OGT {
+ // only < and <= work right with NaN; reverse if needed
+ r = nr
+
+ nr = nl
+ nl = r
+ a = gc.Brrev(a)
+ }
+
+ gc.Nodreg(&tmp, nr.Type, i386.REG_F0)
+ gc.Nodreg(&n2, nr.Type, i386.REG_F0+1)
+ gc.Nodreg(&ax, gc.Types[gc.TUINT16], i386.REG_AX)
+ et = gc.Simsimtype(nr.Type)
+ if et == gc.TFLOAT64 {
+ if nl.Ullman > nr.Ullman {
+ cgen(nl, &tmp)
+ cgen(nr, &tmp)
+ gins(i386.AFXCHD, &tmp, &n2)
+ } else {
+ cgen(nr, &tmp)
+ cgen(nl, &tmp)
+ }
+
+ gins(i386.AFUCOMIP, &tmp, &n2)
+ gins(i386.AFMOVDP, &tmp, &tmp) // annoying pop but still better than STSW+SAHF
+ } else {
+ // TODO(rsc): The moves back and forth to memory
+ // here are for truncating the value to 32 bits.
+ // This handles 32-bit comparison but presumably
+ // all the other ops have the same problem.
+ // We need to figure out what the right general
+ // solution is, besides telling people to use float64.
+ gc.Tempname(&t1, gc.Types[gc.TFLOAT32])
+
+ gc.Tempname(&t2, gc.Types[gc.TFLOAT32])
+ cgen(nr, &t1)
+ cgen(nl, &t2)
+ gmove(&t2, &tmp)
+ gins(i386.AFCOMFP, &t1, &tmp)
+ gins(i386.AFSTSW, nil, &ax)
+ gins(i386.ASAHF, nil, nil)
+ }
+
+ goto ret
+
+sse:
+ if nl.Addable == 0 {
+ gc.Tempname(&n1, nl.Type)
+ cgen(nl, &n1)
+ nl = &n1
+ }
+
+ if nr.Addable == 0 {
+ gc.Tempname(&tmp, nr.Type)
+ cgen(nr, &tmp)
+ nr = &tmp
+ }
+
+ regalloc(&n2, nr.Type, nil)
+ gmove(nr, &n2)
+ nr = &n2
+
+ if nl.Op != gc.OREGISTER {
+ regalloc(&n3, nl.Type, nil)
+ gmove(nl, &n3)
+ nl = &n3
+ }
+
+ if a == gc.OGE || a == gc.OGT {
+ // only < and <= work right with NaN; reverse if needed
+ r = nr
+
+ nr = nl
+ nl = r
+ a = gc.Brrev(a)
+ }
+
+ gins(foptoas(gc.OCMP, nr.Type, 0), nl, nr)
+ if nl.Op == gc.OREGISTER {
+ regfree(nl)
+ }
+ regfree(nr)
+
+ret:
+ if a == gc.OEQ {
+ // neither NE nor P
+ p1 = gc.Gbranch(i386.AJNE, nil, -likely)
+
+ p2 = gc.Gbranch(i386.AJPS, nil, -likely)
+ gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
+ gc.Patch(p1, gc.Pc)
+ gc.Patch(p2, gc.Pc)
+ } else if a == gc.ONE {
+ // either NE or P
+ gc.Patch(gc.Gbranch(i386.AJNE, nil, likely), to)
+
+ gc.Patch(gc.Gbranch(i386.AJPS, nil, likely), to)
+ } else {
+ gc.Patch(gc.Gbranch(optoas(a, nr.Type), nil, likely), to)
+ }
+}
+
+// Called after regopt and peep have run.
+// Expand CHECKNIL pseudo-op into actual nil pointer check.
+func expandchecks(firstp *obj.Prog) {
+ var p *obj.Prog
+ var p1 *obj.Prog
+ var p2 *obj.Prog
+
+ for p = firstp; p != nil; p = p.Link {
+ if p.As != obj.ACHECKNIL {
+ continue
+ }
+ if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers
+ gc.Warnl(int(p.Lineno), "generated nil check")
+ }
+
+ // check is
+ // CMP arg, $0
+ // JNE 2(PC) (likely)
+ // MOV AX, 0
+ p1 = gc.Ctxt.NewProg()
+
+ p2 = gc.Ctxt.NewProg()
+ gc.Clearp(p1)
+ gc.Clearp(p2)
+ p1.Link = p2
+ p2.Link = p.Link
+ p.Link = p1
+ p1.Lineno = p.Lineno
+ p2.Lineno = p.Lineno
+ p1.Pc = 9999
+ p2.Pc = 9999
+ p.As = i386.ACMPL
+ p.To.Type = obj.TYPE_CONST
+ p.To.Offset = 0
+ p1.As = i386.AJNE
+ p1.From.Type = obj.TYPE_CONST
+ p1.From.Offset = 1 // likely
+ p1.To.Type = obj.TYPE_BRANCH
+ p1.To.U.Branch = p2.Link
+
+ // crash by write to memory address 0.
+ // if possible, since we know arg is 0, use 0(arg),
+ // which will be shorter to encode than plain 0.
+ p2.As = i386.AMOVL
+
+ p2.From.Type = obj.TYPE_REG
+ p2.From.Reg = i386.REG_AX
+ if regtyp(&p.From) {
+ p2.To.Type = obj.TYPE_MEM
+ p2.To.Reg = p.From.Reg
+ } else {
+ p2.To.Type = obj.TYPE_MEM
+ }
+ p2.To.Offset = 0
+ }
+}