diff options
Diffstat (limited to 'src/cmd/8g/ggen.go')
-rw-r--r-- | src/cmd/8g/ggen.go | 1297 |
1 files changed, 1297 insertions, 0 deletions
diff --git a/src/cmd/8g/ggen.go b/src/cmd/8g/ggen.go new file mode 100644 index 0000000000..f72beda21a --- /dev/null +++ b/src/cmd/8g/ggen.go @@ -0,0 +1,1297 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "cmd/internal/obj" + "cmd/internal/obj/i386" +) +import "cmd/internal/gc" + +func defframe(ptxt *obj.Prog) { + var frame uint32 + var ax uint32 + var p *obj.Prog + var lo int64 + var hi int64 + var l *gc.NodeList + var n *gc.Node + + // fill in argument size, stack size + ptxt.To.Type = obj.TYPE_TEXTSIZE + + ptxt.To.U.Argsize = int32(gc.Rnd(gc.Curfn.Type.Argwid, int64(gc.Widthptr))) + frame = uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg))) + ptxt.To.Offset = int64(frame) + + // insert code to zero ambiguously live variables + // so that the garbage collector only sees initialized values + // when it looks for pointers. + p = ptxt + + hi = 0 + lo = hi + ax = 0 + for l = gc.Curfn.Dcl; l != nil; l = l.Next { + n = l.N + if n.Needzero == 0 { + continue + } + if n.Class != gc.PAUTO { + gc.Fatal("needzero class %d", n.Class) + } + if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 { + gc.Fatal("var %v has size %d offset %d", gc.Nconv(n, obj.FmtLong), int(n.Type.Width), int(n.Xoffset)) + } + if lo != hi && n.Xoffset+n.Type.Width == lo-int64(2*gc.Widthptr) { + // merge with range we already have + lo = n.Xoffset + + continue + } + + // zero old range + p = zerorange(p, int64(frame), lo, hi, &ax) + + // set new range + hi = n.Xoffset + n.Type.Width + + lo = n.Xoffset + } + + // zero final range + zerorange(p, int64(frame), lo, hi, &ax) +} + +func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog { + var cnt int64 + var i int64 + + cnt = hi - lo + if cnt == 0 { + return p + } + if *ax == 0 { + p = appendpp(p, i386.AMOVL, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, i386.REG_AX, 0) + *ax = 1 + } + + if cnt <= int64(4*gc.Widthreg) { + for i = 0; i < cnt; i += int64(gc.Widthreg) { + p = appendpp(p, i386.AMOVL, obj.TYPE_REG, i386.REG_AX, 0, obj.TYPE_MEM, i386.REG_SP, frame+lo+i) + } + } else if !gc.Nacl && cnt <= int64(128*gc.Widthreg) { + p = appendpp(p, i386.ALEAL, obj.TYPE_MEM, i386.REG_SP, frame+lo, obj.TYPE_REG, i386.REG_DI, 0) + p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, 1*(128-cnt/int64(gc.Widthreg))) + p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) + } else { + p = appendpp(p, i386.AMOVL, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, i386.REG_CX, 0) + p = appendpp(p, i386.ALEAL, obj.TYPE_MEM, i386.REG_SP, frame+lo, obj.TYPE_REG, i386.REG_DI, 0) + p = appendpp(p, i386.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) + p = appendpp(p, i386.ASTOSL, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) + } + + return p +} + +func appendpp(p *obj.Prog, as int, ftype int, freg int, foffset int64, ttype int, treg int, toffset int64) *obj.Prog { + var q *obj.Prog + q = gc.Ctxt.NewProg() + gc.Clearp(q) + q.As = int16(as) + q.Lineno = p.Lineno + q.From.Type = int16(ftype) + q.From.Reg = int16(freg) + q.From.Offset = foffset + q.To.Type = int16(ttype) + q.To.Reg = int16(treg) + q.To.Offset = toffset + q.Link = p.Link + p.Link = q + return q +} + +func clearfat(nl *gc.Node) { + var w uint32 + var c uint32 + var q uint32 + var n1 gc.Node + var z gc.Node + var p *obj.Prog + + /* clear a fat object */ + if gc.Debug['g'] != 0 { + gc.Dump("\nclearfat", nl) + } + + w = uint32(nl.Type.Width) + + // Avoid taking the address for simple enough types. + if componentgen(nil, nl) { + return + } + + c = w % 4 // bytes + q = w / 4 // quads + + if q < 4 { + // Write sequence of MOV 0, off(base) instead of using STOSL. + // The hope is that although the code will be slightly longer, + // the MOVs will have no dependencies and pipeline better + // than the unrolled STOSL loop. + // NOTE: Must use agen, not igen, so that optimizer sees address + // being taken. We are not writing on field boundaries. + regalloc(&n1, gc.Types[gc.Tptr], nil) + + agen(nl, &n1) + n1.Op = gc.OINDREG + gc.Nodconst(&z, gc.Types[gc.TUINT64], 0) + for { + tmp14 := q + q-- + if tmp14 <= 0 { + break + } + n1.Type = z.Type + gins(i386.AMOVL, &z, &n1) + n1.Xoffset += 4 + } + + gc.Nodconst(&z, gc.Types[gc.TUINT8], 0) + for { + tmp15 := c + c-- + if tmp15 <= 0 { + break + } + n1.Type = z.Type + gins(i386.AMOVB, &z, &n1) + n1.Xoffset++ + } + + regfree(&n1) + return + } + + gc.Nodreg(&n1, gc.Types[gc.Tptr], i386.REG_DI) + agen(nl, &n1) + gconreg(i386.AMOVL, 0, i386.REG_AX) + + if q > 128 || (q >= 4 && gc.Nacl) { + gconreg(i386.AMOVL, int64(q), i386.REG_CX) + gins(i386.AREP, nil, nil) // repeat + gins(i386.ASTOSL, nil, nil) // STOL AL,*(DI)+ + } else if q >= 4 { + p = gins(obj.ADUFFZERO, nil, nil) + p.To.Type = obj.TYPE_ADDR + p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) + + // 1 and 128 = magic constants: see ../../runtime/asm_386.s + p.To.Offset = 1 * (128 - int64(q)) + } else { + for q > 0 { + gins(i386.ASTOSL, nil, nil) // STOL AL,*(DI)+ + q-- + } + } + + for c > 0 { + gins(i386.ASTOSB, nil, nil) // STOB AL,*(DI)+ + c-- + } +} + +/* + * generate: + * call f + * proc=-1 normal call but no return + * proc=0 normal call + * proc=1 goroutine run in new proc + * proc=2 defer call save away stack + * proc=3 normal call to C pointer (not Go func value) +*/ +func ginscall(f *gc.Node, proc int) { + var p *obj.Prog + var reg gc.Node + var r1 gc.Node + var con gc.Node + var stk gc.Node + var extra int32 + + if f.Type != nil { + extra = 0 + if proc == 1 || proc == 2 { + extra = 2 * int32(gc.Widthptr) + } + gc.Setmaxarg(f.Type, extra) + } + + switch proc { + default: + gc.Fatal("ginscall: bad proc %d", proc) + + case 0, // normal call + -1: // normal call but no return + if f.Op == gc.ONAME && f.Class == gc.PFUNC { + if f == gc.Deferreturn { + // Deferred calls will appear to be returning to + // the CALL deferreturn(SB) that we are about to emit. + // However, the stack trace code will show the line + // of the instruction byte before the return PC. + // To avoid that being an unrelated instruction, + // insert an x86 NOP that we will have the right line number. + // x86 NOP 0x90 is really XCHG AX, AX; use that description + // because the NOP pseudo-instruction will be removed by + // the linker. + gc.Nodreg(®, gc.Types[gc.TINT], i386.REG_AX) + + gins(i386.AXCHGL, ®, ®) + } + + p = gins(obj.ACALL, nil, f) + gc.Afunclit(&p.To, f) + if proc == -1 || gc.Noreturn(p) { + gins(obj.AUNDEF, nil, nil) + } + break + } + + gc.Nodreg(®, gc.Types[gc.Tptr], i386.REG_DX) + gc.Nodreg(&r1, gc.Types[gc.Tptr], i386.REG_BX) + gmove(f, ®) + reg.Op = gc.OINDREG + gmove(®, &r1) + reg.Op = gc.OREGISTER + gins(obj.ACALL, ®, &r1) + + case 3: // normal call of c function pointer + gins(obj.ACALL, nil, f) + + case 1, // call in new proc (go) + 2: // deferred call (defer) + stk = gc.Node{} + + stk.Op = gc.OINDREG + stk.Val.U.Reg = i386.REG_SP + stk.Xoffset = 0 + + // size of arguments at 0(SP) + gc.Nodconst(&con, gc.Types[gc.TINT32], int64(gc.Argsize(f.Type))) + + gins(i386.AMOVL, &con, &stk) + + // FuncVal* at 4(SP) + stk.Xoffset = int64(gc.Widthptr) + + gins(i386.AMOVL, f, &stk) + + if proc == 1 { + ginscall(gc.Newproc, 0) + } else { + ginscall(gc.Deferproc, 0) + } + if proc == 2 { + gc.Nodreg(®, gc.Types[gc.TINT32], i386.REG_AX) + gins(i386.ATESTL, ®, ®) + p = gc.Gbranch(i386.AJEQ, nil, +1) + cgen_ret(nil) + gc.Patch(p, gc.Pc) + } + } +} + +/* + * n is call to interface method. + * generate res = n. + */ +func cgen_callinter(n *gc.Node, res *gc.Node, proc int) { + var i *gc.Node + var f *gc.Node + var tmpi gc.Node + var nodi gc.Node + var nodo gc.Node + var nodr gc.Node + var nodsp gc.Node + + i = n.Left + if i.Op != gc.ODOTINTER { + gc.Fatal("cgen_callinter: not ODOTINTER %v", gc.Oconv(int(i.Op), 0)) + } + + f = i.Right // field + if f.Op != gc.ONAME { + gc.Fatal("cgen_callinter: not ONAME %v", gc.Oconv(int(f.Op), 0)) + } + + i = i.Left // interface + + if i.Addable == 0 { + gc.Tempname(&tmpi, i.Type) + cgen(i, &tmpi) + i = &tmpi + } + + gc.Genlist(n.List) // assign the args + + // i is now addable, prepare an indirected + // register to hold its address. + igen(i, &nodi, res) // REG = &inter + + gc.Nodindreg(&nodsp, gc.Types[gc.Tptr], i386.REG_SP) + + nodsp.Xoffset = 0 + if proc != 0 { + nodsp.Xoffset += 2 * int64(gc.Widthptr) // leave room for size & fn + } + nodi.Type = gc.Types[gc.Tptr] + nodi.Xoffset += int64(gc.Widthptr) + cgen(&nodi, &nodsp) // {0 or 8}(SP) = 4(REG) -- i.data + + regalloc(&nodo, gc.Types[gc.Tptr], res) + + nodi.Type = gc.Types[gc.Tptr] + nodi.Xoffset -= int64(gc.Widthptr) + cgen(&nodi, &nodo) // REG = 0(REG) -- i.tab + regfree(&nodi) + + regalloc(&nodr, gc.Types[gc.Tptr], &nodo) + if n.Left.Xoffset == gc.BADWIDTH { + gc.Fatal("cgen_callinter: badwidth") + } + gc.Cgen_checknil(&nodo) + nodo.Op = gc.OINDREG + nodo.Xoffset = n.Left.Xoffset + 3*int64(gc.Widthptr) + 8 + + if proc == 0 { + // plain call: use direct c function pointer - more efficient + cgen(&nodo, &nodr) // REG = 20+offset(REG) -- i.tab->fun[f] + proc = 3 + } else { + // go/defer. generate go func value. + gins(i386.ALEAL, &nodo, &nodr) // REG = &(20+offset(REG)) -- i.tab->fun[f] + } + + nodr.Type = n.Left.Type + ginscall(&nodr, proc) + + regfree(&nodr) + regfree(&nodo) +} + +/* + * generate function call; + * proc=0 normal call + * proc=1 goroutine run in new proc + * proc=2 defer call save away stack + */ +func cgen_call(n *gc.Node, proc int) { + var t *gc.Type + var nod gc.Node + var afun gc.Node + + if n == nil { + return + } + + if n.Left.Ullman >= gc.UINF { + // if name involves a fn call + // precompute the address of the fn + gc.Tempname(&afun, gc.Types[gc.Tptr]) + + cgen(n.Left, &afun) + } + + gc.Genlist(n.List) // assign the args + t = n.Left.Type + + // call tempname pointer + if n.Left.Ullman >= gc.UINF { + regalloc(&nod, gc.Types[gc.Tptr], nil) + gc.Cgen_as(&nod, &afun) + nod.Type = t + ginscall(&nod, proc) + regfree(&nod) + return + } + + // call pointer + if n.Left.Op != gc.ONAME || n.Left.Class != gc.PFUNC { + regalloc(&nod, gc.Types[gc.Tptr], nil) + gc.Cgen_as(&nod, n.Left) + nod.Type = t + ginscall(&nod, proc) + regfree(&nod) + return + } + + // call direct + n.Left.Method = 1 + + ginscall(n.Left, proc) +} + +/* + * call to n has already been generated. + * generate: + * res = return value from call. + */ +func cgen_callret(n *gc.Node, res *gc.Node) { + var nod gc.Node + var fp *gc.Type + var t *gc.Type + var flist gc.Iter + + t = n.Left.Type + if t.Etype == gc.TPTR32 || t.Etype == gc.TPTR64 { + t = t.Type + } + + fp = gc.Structfirst(&flist, gc.Getoutarg(t)) + if fp == nil { + gc.Fatal("cgen_callret: nil") + } + + nod = gc.Node{} + nod.Op = gc.OINDREG + nod.Val.U.Reg = i386.REG_SP + nod.Addable = 1 + + nod.Xoffset = fp.Width + nod.Type = fp.Type + gc.Cgen_as(res, &nod) +} + +/* + * call to n has already been generated. + * generate: + * res = &return value from call. + */ +func cgen_aret(n *gc.Node, res *gc.Node) { + var nod1 gc.Node + var nod2 gc.Node + var fp *gc.Type + var t *gc.Type + var flist gc.Iter + + t = n.Left.Type + if gc.Isptr[t.Etype] != 0 { + t = t.Type + } + + fp = gc.Structfirst(&flist, gc.Getoutarg(t)) + if fp == nil { + gc.Fatal("cgen_aret: nil") + } + + nod1 = gc.Node{} + nod1.Op = gc.OINDREG + nod1.Val.U.Reg = i386.REG_SP + nod1.Addable = 1 + + nod1.Xoffset = fp.Width + nod1.Type = fp.Type + + if res.Op != gc.OREGISTER { + regalloc(&nod2, gc.Types[gc.Tptr], res) + gins(i386.ALEAL, &nod1, &nod2) + gins(i386.AMOVL, &nod2, res) + regfree(&nod2) + } else { + gins(i386.ALEAL, &nod1, res) + } +} + +/* + * generate return. + * n->left is assignments to return values. + */ +func cgen_ret(n *gc.Node) { + var p *obj.Prog + + if n != nil { + gc.Genlist(n.List) // copy out args + } + if gc.Hasdefer != 0 { + ginscall(gc.Deferreturn, 0) + } + gc.Genlist(gc.Curfn.Exit) + p = gins(obj.ARET, nil, nil) + if n != nil && n.Op == gc.ORETJMP { + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = gc.Linksym(n.Left.Sym) + } +} + +/* + * generate division. + * caller must set: + * ax = allocated AX register + * dx = allocated DX register + * generates one of: + * res = nl / nr + * res = nl % nr + * according to op. + */ +func dodiv(op int, nl *gc.Node, nr *gc.Node, res *gc.Node, ax *gc.Node, dx *gc.Node) { + var check int + var n1 gc.Node + var t1 gc.Node + var t2 gc.Node + var t3 gc.Node + var t4 gc.Node + var n4 gc.Node + var nz gc.Node + var t *gc.Type + var t0 *gc.Type + var p1 *obj.Prog + var p2 *obj.Prog + + // Have to be careful about handling + // most negative int divided by -1 correctly. + // The hardware will trap. + // Also the byte divide instruction needs AH, + // which we otherwise don't have to deal with. + // Easiest way to avoid for int8, int16: use int32. + // For int32 and int64, use explicit test. + // Could use int64 hw for int32. + t = nl.Type + + t0 = t + check = 0 + if gc.Issigned[t.Etype] != 0 { + check = 1 + if gc.Isconst(nl, gc.CTINT) && gc.Mpgetfix(nl.Val.U.Xval) != -1<<uint64(t.Width*8-1) { + check = 0 + } else if gc.Isconst(nr, gc.CTINT) && gc.Mpgetfix(nr.Val.U.Xval) != -1 { + check = 0 + } + } + + if t.Width < 4 { + if gc.Issigned[t.Etype] != 0 { + t = gc.Types[gc.TINT32] + } else { + t = gc.Types[gc.TUINT32] + } + check = 0 + } + + gc.Tempname(&t1, t) + gc.Tempname(&t2, t) + if t0 != t { + gc.Tempname(&t3, t0) + gc.Tempname(&t4, t0) + cgen(nl, &t3) + cgen(nr, &t4) + + // Convert. + gmove(&t3, &t1) + + gmove(&t4, &t2) + } else { + cgen(nl, &t1) + cgen(nr, &t2) + } + + if !gc.Samereg(ax, res) && !gc.Samereg(dx, res) { + regalloc(&n1, t, res) + } else { + regalloc(&n1, t, nil) + } + gmove(&t2, &n1) + gmove(&t1, ax) + p2 = nil + if gc.Nacl { + // Native Client does not relay the divide-by-zero trap + // to the executing program, so we must insert a check + // for ourselves. + gc.Nodconst(&n4, t, 0) + + gins(optoas(gc.OCMP, t), &n1, &n4) + p1 = gc.Gbranch(optoas(gc.ONE, t), nil, +1) + if panicdiv == nil { + panicdiv = gc.Sysfunc("panicdivide") + } + ginscall(panicdiv, -1) + gc.Patch(p1, gc.Pc) + } + + if check != 0 { + gc.Nodconst(&n4, t, -1) + gins(optoas(gc.OCMP, t), &n1, &n4) + p1 = gc.Gbranch(optoas(gc.ONE, t), nil, +1) + if op == gc.ODIV { + // a / (-1) is -a. + gins(optoas(gc.OMINUS, t), nil, ax) + + gmove(ax, res) + } else { + // a % (-1) is 0. + gc.Nodconst(&n4, t, 0) + + gmove(&n4, res) + } + + p2 = gc.Gbranch(obj.AJMP, nil, 0) + gc.Patch(p1, gc.Pc) + } + + if gc.Issigned[t.Etype] == 0 { + gc.Nodconst(&nz, t, 0) + gmove(&nz, dx) + } else { + gins(optoas(gc.OEXTEND, t), nil, nil) + } + gins(optoas(op, t), &n1, nil) + regfree(&n1) + + if op == gc.ODIV { + gmove(ax, res) + } else { + gmove(dx, res) + } + if check != 0 { + gc.Patch(p2, gc.Pc) + } +} + +func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) { + var r int + + r = int(reg[dr]) + gc.Nodreg(x, gc.Types[gc.TINT32], dr) + + // save current ax and dx if they are live + // and not the destination + *oldx = gc.Node{} + + if r > 0 && !gc.Samereg(x, res) { + gc.Tempname(oldx, gc.Types[gc.TINT32]) + gmove(x, oldx) + } + + regalloc(x, t, x) +} + +func restx(x *gc.Node, oldx *gc.Node) { + regfree(x) + + if oldx.Op != 0 { + x.Type = gc.Types[gc.TINT32] + gmove(oldx, x) + } +} + +/* + * generate division according to op, one of: + * res = nl / nr + * res = nl % nr + */ +func cgen_div(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) { + var ax gc.Node + var dx gc.Node + var oldax gc.Node + var olddx gc.Node + var t *gc.Type + + if gc.Is64(nl.Type) { + gc.Fatal("cgen_div %v", gc.Tconv(nl.Type, 0)) + } + + if gc.Issigned[nl.Type.Etype] != 0 { + t = gc.Types[gc.TINT32] + } else { + t = gc.Types[gc.TUINT32] + } + savex(i386.REG_AX, &ax, &oldax, res, t) + savex(i386.REG_DX, &dx, &olddx, res, t) + dodiv(op, nl, nr, res, &ax, &dx) + restx(&dx, &olddx) + restx(&ax, &oldax) +} + +/* + * generate shift according to op, one of: + * res = nl << nr + * res = nl >> nr + */ +func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) { + var n1 gc.Node + var n2 gc.Node + var nt gc.Node + var cx gc.Node + var oldcx gc.Node + var hi gc.Node + var lo gc.Node + var a int + var w int + var p1 *obj.Prog + var p2 *obj.Prog + var sc uint64 + + if nl.Type.Width > 4 { + gc.Fatal("cgen_shift %v", gc.Tconv(nl.Type, 0)) + } + + w = int(nl.Type.Width * 8) + + a = optoas(op, nl.Type) + + if nr.Op == gc.OLITERAL { + gc.Tempname(&n2, nl.Type) + cgen(nl, &n2) + regalloc(&n1, nl.Type, res) + gmove(&n2, &n1) + sc = uint64(gc.Mpgetfix(nr.Val.U.Xval)) + if sc >= uint64(nl.Type.Width*8) { + // large shift gets 2 shifts by width-1 + gins(a, ncon(uint32(w)-1), &n1) + + gins(a, ncon(uint32(w)-1), &n1) + } else { + gins(a, nr, &n1) + } + gmove(&n1, res) + regfree(&n1) + return + } + + oldcx = gc.Node{} + gc.Nodreg(&cx, gc.Types[gc.TUINT32], i386.REG_CX) + if reg[i386.REG_CX] > 1 && !gc.Samereg(&cx, res) { + gc.Tempname(&oldcx, gc.Types[gc.TUINT32]) + gmove(&cx, &oldcx) + } + + if nr.Type.Width > 4 { + gc.Tempname(&nt, nr.Type) + n1 = nt + } else { + gc.Nodreg(&n1, gc.Types[gc.TUINT32], i386.REG_CX) + regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX + } + + if gc.Samereg(&cx, res) { + regalloc(&n2, nl.Type, nil) + } else { + regalloc(&n2, nl.Type, res) + } + if nl.Ullman >= nr.Ullman { + cgen(nl, &n2) + cgen(nr, &n1) + } else { + cgen(nr, &n1) + cgen(nl, &n2) + } + + // test and fix up large shifts + if bounded { + if nr.Type.Width > 4 { + // delayed reg alloc + gc.Nodreg(&n1, gc.Types[gc.TUINT32], i386.REG_CX) + + regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX + split64(&nt, &lo, &hi) + gmove(&lo, &n1) + splitclean() + } + } else { + if nr.Type.Width > 4 { + // delayed reg alloc + gc.Nodreg(&n1, gc.Types[gc.TUINT32], i386.REG_CX) + + regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX + split64(&nt, &lo, &hi) + gmove(&lo, &n1) + gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &hi, ncon(0)) + p2 = gc.Gbranch(optoas(gc.ONE, gc.Types[gc.TUINT32]), nil, +1) + gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &n1, ncon(uint32(w))) + p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1) + splitclean() + gc.Patch(p2, gc.Pc) + } else { + gins(optoas(gc.OCMP, nr.Type), &n1, ncon(uint32(w))) + p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1) + } + + if op == gc.ORSH && gc.Issigned[nl.Type.Etype] != 0 { + gins(a, ncon(uint32(w)-1), &n2) + } else { + gmove(ncon(0), &n2) + } + + gc.Patch(p1, gc.Pc) + } + + gins(a, &n1, &n2) + + if oldcx.Op != 0 { + gmove(&oldcx, &cx) + } + + gmove(&n2, res) + + regfree(&n1) + regfree(&n2) +} + +/* + * generate byte multiply: + * res = nl * nr + * there is no 2-operand byte multiply instruction so + * we do a full-width multiplication and truncate afterwards. + */ +func cgen_bmul(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) { + var n1 gc.Node + var n2 gc.Node + var nt gc.Node + var tmp *gc.Node + var t *gc.Type + var a int + + // copy from byte to full registers + t = gc.Types[gc.TUINT32] + + if gc.Issigned[nl.Type.Etype] != 0 { + t = gc.Types[gc.TINT32] + } + + // largest ullman on left. + if nl.Ullman < nr.Ullman { + tmp = nl + nl = nr + nr = tmp + } + + gc.Tempname(&nt, nl.Type) + cgen(nl, &nt) + regalloc(&n1, t, res) + cgen(nr, &n1) + regalloc(&n2, t, nil) + gmove(&nt, &n2) + a = optoas(op, t) + gins(a, &n2, &n1) + regfree(&n2) + gmove(&n1, res) + regfree(&n1) +} + +/* + * generate high multiply: + * res = (nl*nr) >> width + */ +func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) { + var t *gc.Type + var a int + var n1 gc.Node + var n2 gc.Node + var ax gc.Node + var dx gc.Node + + t = nl.Type + a = optoas(gc.OHMUL, t) + + // gen nl in n1. + gc.Tempname(&n1, t) + + cgen(nl, &n1) + + // gen nr in n2. + regalloc(&n2, t, res) + + cgen(nr, &n2) + + // multiply. + gc.Nodreg(&ax, t, i386.REG_AX) + + gmove(&n2, &ax) + gins(a, &n1, nil) + regfree(&n2) + + if t.Width == 1 { + // byte multiply behaves differently. + gc.Nodreg(&ax, t, i386.REG_AH) + + gc.Nodreg(&dx, t, i386.REG_DX) + gmove(&ax, &dx) + } + + gc.Nodreg(&dx, t, i386.REG_DX) + gmove(&dx, res) +} + +/* + * generate floating-point operation. + */ +func cgen_float(n *gc.Node, res *gc.Node) { + var nl *gc.Node + var n1 gc.Node + var n2 gc.Node + var p1 *obj.Prog + var p2 *obj.Prog + var p3 *obj.Prog + + nl = n.Left + switch n.Op { + case gc.OEQ, + gc.ONE, + gc.OLT, + gc.OLE, + gc.OGE: + p1 = gc.Gbranch(obj.AJMP, nil, 0) + p2 = gc.Pc + gmove(gc.Nodbool(true), res) + p3 = gc.Gbranch(obj.AJMP, nil, 0) + gc.Patch(p1, gc.Pc) + bgen(n, true, 0, p2) + gmove(gc.Nodbool(false), res) + gc.Patch(p3, gc.Pc) + return + + case gc.OPLUS: + cgen(nl, res) + return + + case gc.OCONV: + if gc.Eqtype(n.Type, nl.Type) || gc.Noconv(n.Type, nl.Type) { + cgen(nl, res) + return + } + + gc.Tempname(&n2, n.Type) + mgen(nl, &n1, res) + gmove(&n1, &n2) + gmove(&n2, res) + mfree(&n1) + return + } + + if gc.Use_sse != 0 { + cgen_floatsse(n, res) + } else { + cgen_float387(n, res) + } +} + +// floating-point. 387 (not SSE2) +func cgen_float387(n *gc.Node, res *gc.Node) { + var f0 gc.Node + var f1 gc.Node + var nl *gc.Node + var nr *gc.Node + + nl = n.Left + nr = n.Right + gc.Nodreg(&f0, nl.Type, i386.REG_F0) + gc.Nodreg(&f1, n.Type, i386.REG_F0+1) + if nr != nil { + goto flt2 + } + + // unary + cgen(nl, &f0) + + if n.Op != gc.OCONV && n.Op != gc.OPLUS { + gins(foptoas(int(n.Op), n.Type, 0), nil, nil) + } + gmove(&f0, res) + return + +flt2: // binary + if nl.Ullman >= nr.Ullman { + cgen(nl, &f0) + if nr.Addable != 0 { + gins(foptoas(int(n.Op), n.Type, 0), nr, &f0) + } else { + cgen(nr, &f0) + gins(foptoas(int(n.Op), n.Type, Fpop), &f0, &f1) + } + } else { + cgen(nr, &f0) + if nl.Addable != 0 { + gins(foptoas(int(n.Op), n.Type, Frev), nl, &f0) + } else { + cgen(nl, &f0) + gins(foptoas(int(n.Op), n.Type, Frev|Fpop), &f0, &f1) + } + } + + gmove(&f0, res) + return +} + +func cgen_floatsse(n *gc.Node, res *gc.Node) { + var nl *gc.Node + var nr *gc.Node + var r *gc.Node + var n1 gc.Node + var n2 gc.Node + var nt gc.Node + var a int + + nl = n.Left + nr = n.Right + switch n.Op { + default: + gc.Dump("cgen_floatsse", n) + gc.Fatal("cgen_floatsse %v", gc.Oconv(int(n.Op), 0)) + return + + case gc.OMINUS, + gc.OCOM: + nr = gc.Nodintconst(-1) + gc.Convlit(&nr, n.Type) + a = foptoas(gc.OMUL, nl.Type, 0) + goto sbop + + // symmetric binary + case gc.OADD, + gc.OMUL: + a = foptoas(int(n.Op), nl.Type, 0) + + goto sbop + + // asymmetric binary + case gc.OSUB, + gc.OMOD, + gc.ODIV: + a = foptoas(int(n.Op), nl.Type, 0) + + goto abop + } + +sbop: // symmetric binary + if nl.Ullman < nr.Ullman || nl.Op == gc.OLITERAL { + r = nl + nl = nr + nr = r + } + +abop: // asymmetric binary + if nl.Ullman >= nr.Ullman { + gc.Tempname(&nt, nl.Type) + cgen(nl, &nt) + mgen(nr, &n2, nil) + regalloc(&n1, nl.Type, res) + gmove(&nt, &n1) + gins(a, &n2, &n1) + gmove(&n1, res) + regfree(&n1) + mfree(&n2) + } else { + regalloc(&n2, nr.Type, res) + cgen(nr, &n2) + regalloc(&n1, nl.Type, nil) + cgen(nl, &n1) + gins(a, &n2, &n1) + regfree(&n2) + gmove(&n1, res) + regfree(&n1) + } + + return +} + +func bgen_float(n *gc.Node, true_ int, likely int, to *obj.Prog) { + var et int + var a int + var nl *gc.Node + var nr *gc.Node + var r *gc.Node + var n1 gc.Node + var n2 gc.Node + var n3 gc.Node + var tmp gc.Node + var t1 gc.Node + var t2 gc.Node + var ax gc.Node + var p1 *obj.Prog + var p2 *obj.Prog + + nl = n.Left + nr = n.Right + a = int(n.Op) + if true_ == 0 { + // brcom is not valid on floats when NaN is involved. + p1 = gc.Gbranch(obj.AJMP, nil, 0) + + p2 = gc.Gbranch(obj.AJMP, nil, 0) + gc.Patch(p1, gc.Pc) + + // No need to avoid re-genning ninit. + bgen_float(n, 1, -likely, p2) + + gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to) + gc.Patch(p2, gc.Pc) + return + } + + if gc.Use_sse != 0 { + goto sse + } else { + goto x87 + } + +x87: + a = gc.Brrev(a) // because the args are stacked + if a == gc.OGE || a == gc.OGT { + // only < and <= work right with NaN; reverse if needed + r = nr + + nr = nl + nl = r + a = gc.Brrev(a) + } + + gc.Nodreg(&tmp, nr.Type, i386.REG_F0) + gc.Nodreg(&n2, nr.Type, i386.REG_F0+1) + gc.Nodreg(&ax, gc.Types[gc.TUINT16], i386.REG_AX) + et = gc.Simsimtype(nr.Type) + if et == gc.TFLOAT64 { + if nl.Ullman > nr.Ullman { + cgen(nl, &tmp) + cgen(nr, &tmp) + gins(i386.AFXCHD, &tmp, &n2) + } else { + cgen(nr, &tmp) + cgen(nl, &tmp) + } + + gins(i386.AFUCOMIP, &tmp, &n2) + gins(i386.AFMOVDP, &tmp, &tmp) // annoying pop but still better than STSW+SAHF + } else { + // TODO(rsc): The moves back and forth to memory + // here are for truncating the value to 32 bits. + // This handles 32-bit comparison but presumably + // all the other ops have the same problem. + // We need to figure out what the right general + // solution is, besides telling people to use float64. + gc.Tempname(&t1, gc.Types[gc.TFLOAT32]) + + gc.Tempname(&t2, gc.Types[gc.TFLOAT32]) + cgen(nr, &t1) + cgen(nl, &t2) + gmove(&t2, &tmp) + gins(i386.AFCOMFP, &t1, &tmp) + gins(i386.AFSTSW, nil, &ax) + gins(i386.ASAHF, nil, nil) + } + + goto ret + +sse: + if nl.Addable == 0 { + gc.Tempname(&n1, nl.Type) + cgen(nl, &n1) + nl = &n1 + } + + if nr.Addable == 0 { + gc.Tempname(&tmp, nr.Type) + cgen(nr, &tmp) + nr = &tmp + } + + regalloc(&n2, nr.Type, nil) + gmove(nr, &n2) + nr = &n2 + + if nl.Op != gc.OREGISTER { + regalloc(&n3, nl.Type, nil) + gmove(nl, &n3) + nl = &n3 + } + + if a == gc.OGE || a == gc.OGT { + // only < and <= work right with NaN; reverse if needed + r = nr + + nr = nl + nl = r + a = gc.Brrev(a) + } + + gins(foptoas(gc.OCMP, nr.Type, 0), nl, nr) + if nl.Op == gc.OREGISTER { + regfree(nl) + } + regfree(nr) + +ret: + if a == gc.OEQ { + // neither NE nor P + p1 = gc.Gbranch(i386.AJNE, nil, -likely) + + p2 = gc.Gbranch(i386.AJPS, nil, -likely) + gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to) + gc.Patch(p1, gc.Pc) + gc.Patch(p2, gc.Pc) + } else if a == gc.ONE { + // either NE or P + gc.Patch(gc.Gbranch(i386.AJNE, nil, likely), to) + + gc.Patch(gc.Gbranch(i386.AJPS, nil, likely), to) + } else { + gc.Patch(gc.Gbranch(optoas(a, nr.Type), nil, likely), to) + } +} + +// Called after regopt and peep have run. +// Expand CHECKNIL pseudo-op into actual nil pointer check. +func expandchecks(firstp *obj.Prog) { + var p *obj.Prog + var p1 *obj.Prog + var p2 *obj.Prog + + for p = firstp; p != nil; p = p.Link { + if p.As != obj.ACHECKNIL { + continue + } + if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers + gc.Warnl(int(p.Lineno), "generated nil check") + } + + // check is + // CMP arg, $0 + // JNE 2(PC) (likely) + // MOV AX, 0 + p1 = gc.Ctxt.NewProg() + + p2 = gc.Ctxt.NewProg() + gc.Clearp(p1) + gc.Clearp(p2) + p1.Link = p2 + p2.Link = p.Link + p.Link = p1 + p1.Lineno = p.Lineno + p2.Lineno = p.Lineno + p1.Pc = 9999 + p2.Pc = 9999 + p.As = i386.ACMPL + p.To.Type = obj.TYPE_CONST + p.To.Offset = 0 + p1.As = i386.AJNE + p1.From.Type = obj.TYPE_CONST + p1.From.Offset = 1 // likely + p1.To.Type = obj.TYPE_BRANCH + p1.To.U.Branch = p2.Link + + // crash by write to memory address 0. + // if possible, since we know arg is 0, use 0(arg), + // which will be shorter to encode than plain 0. + p2.As = i386.AMOVL + + p2.From.Type = obj.TYPE_REG + p2.From.Reg = i386.REG_AX + if regtyp(&p.From) { + p2.To.Type = obj.TYPE_MEM + p2.To.Reg = p.From.Reg + } else { + p2.To.Type = obj.TYPE_MEM + } + p2.To.Offset = 0 + } +} |