diff options
Diffstat (limited to 'src/cmd/8g/ggen.c')
-rw-r--r-- | src/cmd/8g/ggen.c | 1165 |
1 files changed, 0 insertions, 1165 deletions
diff --git a/src/cmd/8g/ggen.c b/src/cmd/8g/ggen.c deleted file mode 100644 index 8188348282..0000000000 --- a/src/cmd/8g/ggen.c +++ /dev/null @@ -1,1165 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#undef EXTERN -#define EXTERN -#include <u.h> -#include <libc.h> -#include "gg.h" -#include "../gc/popt.h" - -static Prog *appendpp(Prog*, int, int, int, vlong, int, int, vlong); -static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax); - -void -defframe(Prog *ptxt) -{ - uint32 frame, ax; - Prog *p; - vlong lo, hi; - NodeList *l; - Node *n; - - // fill in argument size, stack size - ptxt->to.type = TYPE_TEXTSIZE; - ptxt->to.u.argsize = rnd(curfn->type->argwid, widthptr); - frame = rnd(stksize+maxarg, widthreg); - ptxt->to.offset = frame; - - // insert code to zero ambiguously live variables - // so that the garbage collector only sees initialized values - // when it looks for pointers. - p = ptxt; - hi = 0; - lo = hi; - ax = 0; - for(l=curfn->dcl; l != nil; l = l->next) { - n = l->n; - if(!n->needzero) - continue; - if(n->class != PAUTO) - fatal("needzero class %d", n->class); - if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0) - fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset); - if(lo != hi && n->xoffset + n->type->width == lo - 2*widthptr) { - // merge with range we already have - lo = n->xoffset; - continue; - } - // zero old range - p = zerorange(p, frame, lo, hi, &ax); - - // set new range - hi = n->xoffset + n->type->width; - lo = n->xoffset; - } - // zero final range - zerorange(p, frame, lo, hi, &ax); -} - -static Prog* -zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax) -{ - vlong cnt, i; - - cnt = hi - lo; - if(cnt == 0) - return p; - if(*ax == 0) { - p = appendpp(p, AMOVL, TYPE_CONST, 0, 0, TYPE_REG, REG_AX, 0); - *ax = 1; - } - if(cnt <= 4*widthreg) { - for(i = 0; i < cnt; i += widthreg) { - p = appendpp(p, AMOVL, TYPE_REG, REG_AX, 0, TYPE_MEM, REG_SP, frame+lo+i); - } - } else if(!nacl && cnt <= 128*widthreg) { - p = appendpp(p, ALEAL, TYPE_MEM, REG_SP, frame+lo, TYPE_REG, REG_DI, 0); - p = appendpp(p, ADUFFZERO, TYPE_NONE, 0, 0, TYPE_ADDR, 0, 1*(128-cnt/widthreg)); - p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); - } else { - p = appendpp(p, AMOVL, TYPE_CONST, 0, cnt/widthreg, TYPE_REG, REG_CX, 0); - p = appendpp(p, ALEAL, TYPE_MEM, REG_SP, frame+lo, TYPE_REG, REG_DI, 0); - p = appendpp(p, AREP, TYPE_NONE, 0, 0, TYPE_NONE, 0, 0); - p = appendpp(p, ASTOSL, TYPE_NONE, 0, 0, TYPE_NONE, 0, 0); - } - return p; -} - -static Prog* -appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset) -{ - Prog *q; - q = mal(sizeof(*q)); - clearp(q); - q->as = as; - q->lineno = p->lineno; - q->from.type = ftype; - q->from.reg = freg; - q->from.offset = foffset; - q->to.type = ttype; - q->to.reg = treg; - q->to.offset = toffset; - q->link = p->link; - p->link = q; - return q; -} - -void -clearfat(Node *nl) -{ - uint32 w, c, q; - Node n1, z; - Prog *p; - - /* clear a fat object */ - if(debug['g']) - dump("\nclearfat", nl); - - w = nl->type->width; - // Avoid taking the address for simple enough types. - if(componentgen(N, nl)) - return; - - c = w % 4; // bytes - q = w / 4; // quads - - if(q < 4) { - // Write sequence of MOV 0, off(base) instead of using STOSL. - // The hope is that although the code will be slightly longer, - // the MOVs will have no dependencies and pipeline better - // than the unrolled STOSL loop. - // NOTE: Must use agen, not igen, so that optimizer sees address - // being taken. We are not writing on field boundaries. - regalloc(&n1, types[tptr], N); - agen(nl, &n1); - n1.op = OINDREG; - nodconst(&z, types[TUINT64], 0); - while(q-- > 0) { - n1.type = z.type; - gins(AMOVL, &z, &n1); - n1.xoffset += 4; - } - nodconst(&z, types[TUINT8], 0); - while(c-- > 0) { - n1.type = z.type; - gins(AMOVB, &z, &n1); - n1.xoffset++; - } - regfree(&n1); - return; - } - - nodreg(&n1, types[tptr], REG_DI); - agen(nl, &n1); - gconreg(AMOVL, 0, REG_AX); - - if(q > 128 || (q >= 4 && nacl)) { - gconreg(AMOVL, q, REG_CX); - gins(AREP, N, N); // repeat - gins(ASTOSL, N, N); // STOL AL,*(DI)+ - } else if(q >= 4) { - p = gins(ADUFFZERO, N, N); - p->to.type = TYPE_ADDR; - p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); - // 1 and 128 = magic constants: see ../../runtime/asm_386.s - p->to.offset = 1*(128-q); - } else - while(q > 0) { - gins(ASTOSL, N, N); // STOL AL,*(DI)+ - q--; - } - - while(c > 0) { - gins(ASTOSB, N, N); // STOB AL,*(DI)+ - c--; - } -} - -/* - * generate: - * call f - * proc=-1 normal call but no return - * proc=0 normal call - * proc=1 goroutine run in new proc - * proc=2 defer call save away stack - * proc=3 normal call to C pointer (not Go func value) - */ -void -ginscall(Node *f, int proc) -{ - Prog *p; - Node reg, r1, con, stk; - int32 extra; - - if(f->type != T) { - extra = 0; - if(proc == 1 || proc == 2) - extra = 2 * widthptr; - setmaxarg(f->type, extra); - } - - switch(proc) { - default: - fatal("ginscall: bad proc %d", proc); - break; - - case 0: // normal call - case -1: // normal call but no return - if(f->op == ONAME && f->class == PFUNC) { - if(f == deferreturn) { - // Deferred calls will appear to be returning to - // the CALL deferreturn(SB) that we are about to emit. - // However, the stack trace code will show the line - // of the instruction byte before the return PC. - // To avoid that being an unrelated instruction, - // insert an x86 NOP that we will have the right line number. - // x86 NOP 0x90 is really XCHG AX, AX; use that description - // because the NOP pseudo-instruction will be removed by - // the linker. - nodreg(®, types[TINT], REG_AX); - gins(AXCHGL, ®, ®); - } - p = gins(ACALL, N, f); - afunclit(&p->to, f); - if(proc == -1 || noreturn(p)) - gins(AUNDEF, N, N); - break; - } - nodreg(®, types[tptr], REG_DX); - nodreg(&r1, types[tptr], REG_BX); - gmove(f, ®); - reg.op = OINDREG; - gmove(®, &r1); - reg.op = OREGISTER; - gins(ACALL, ®, &r1); - break; - - case 3: // normal call of c function pointer - gins(ACALL, N, f); - break; - - case 1: // call in new proc (go) - case 2: // deferred call (defer) - memset(&stk, 0, sizeof(stk)); - stk.op = OINDREG; - stk.val.u.reg = REG_SP; - stk.xoffset = 0; - - // size of arguments at 0(SP) - nodconst(&con, types[TINT32], argsize(f->type)); - gins(AMOVL, &con, &stk); - - // FuncVal* at 4(SP) - stk.xoffset = widthptr; - gins(AMOVL, f, &stk); - - if(proc == 1) - ginscall(newproc, 0); - else - ginscall(deferproc, 0); - if(proc == 2) { - nodreg(®, types[TINT32], REG_AX); - gins(ATESTL, ®, ®); - p = gbranch(AJEQ, T, +1); - cgen_ret(N); - patch(p, pc); - } - break; - } -} - -/* - * n is call to interface method. - * generate res = n. - */ -void -cgen_callinter(Node *n, Node *res, int proc) -{ - Node *i, *f; - Node tmpi, nodi, nodo, nodr, nodsp; - - i = n->left; - if(i->op != ODOTINTER) - fatal("cgen_callinter: not ODOTINTER %O", i->op); - - f = i->right; // field - if(f->op != ONAME) - fatal("cgen_callinter: not ONAME %O", f->op); - - i = i->left; // interface - - if(!i->addable) { - tempname(&tmpi, i->type); - cgen(i, &tmpi); - i = &tmpi; - } - - genlist(n->list); // assign the args - - // i is now addable, prepare an indirected - // register to hold its address. - igen(i, &nodi, res); // REG = &inter - - nodindreg(&nodsp, types[tptr], REG_SP); - nodsp.xoffset = 0; - if(proc != 0) - nodsp.xoffset += 2 * widthptr; // leave room for size & fn - nodi.type = types[tptr]; - nodi.xoffset += widthptr; - cgen(&nodi, &nodsp); // {0 or 8}(SP) = 4(REG) -- i.data - - regalloc(&nodo, types[tptr], res); - nodi.type = types[tptr]; - nodi.xoffset -= widthptr; - cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab - regfree(&nodi); - - regalloc(&nodr, types[tptr], &nodo); - if(n->left->xoffset == BADWIDTH) - fatal("cgen_callinter: badwidth"); - cgen_checknil(&nodo); - nodo.op = OINDREG; - nodo.xoffset = n->left->xoffset + 3*widthptr + 8; - - if(proc == 0) { - // plain call: use direct c function pointer - more efficient - cgen(&nodo, &nodr); // REG = 20+offset(REG) -- i.tab->fun[f] - proc = 3; - } else { - // go/defer. generate go func value. - gins(ALEAL, &nodo, &nodr); // REG = &(20+offset(REG)) -- i.tab->fun[f] - } - - nodr.type = n->left->type; - ginscall(&nodr, proc); - - regfree(&nodr); - regfree(&nodo); -} - -/* - * generate function call; - * proc=0 normal call - * proc=1 goroutine run in new proc - * proc=2 defer call save away stack - */ -void -cgen_call(Node *n, int proc) -{ - Type *t; - Node nod, afun; - - if(n == N) - return; - - if(n->left->ullman >= UINF) { - // if name involves a fn call - // precompute the address of the fn - tempname(&afun, types[tptr]); - cgen(n->left, &afun); - } - - genlist(n->list); // assign the args - t = n->left->type; - - // call tempname pointer - if(n->left->ullman >= UINF) { - regalloc(&nod, types[tptr], N); - cgen_as(&nod, &afun); - nod.type = t; - ginscall(&nod, proc); - regfree(&nod); - return; - } - - // call pointer - if(n->left->op != ONAME || n->left->class != PFUNC) { - regalloc(&nod, types[tptr], N); - cgen_as(&nod, n->left); - nod.type = t; - ginscall(&nod, proc); - regfree(&nod); - return; - } - - // call direct - n->left->method = 1; - ginscall(n->left, proc); -} - -/* - * call to n has already been generated. - * generate: - * res = return value from call. - */ -void -cgen_callret(Node *n, Node *res) -{ - Node nod; - Type *fp, *t; - Iter flist; - - t = n->left->type; - if(t->etype == TPTR32 || t->etype == TPTR64) - t = t->type; - - fp = structfirst(&flist, getoutarg(t)); - if(fp == T) - fatal("cgen_callret: nil"); - - memset(&nod, 0, sizeof(nod)); - nod.op = OINDREG; - nod.val.u.reg = REG_SP; - nod.addable = 1; - - nod.xoffset = fp->width; - nod.type = fp->type; - cgen_as(res, &nod); -} - -/* - * call to n has already been generated. - * generate: - * res = &return value from call. - */ -void -cgen_aret(Node *n, Node *res) -{ - Node nod1, nod2; - Type *fp, *t; - Iter flist; - - t = n->left->type; - if(isptr[t->etype]) - t = t->type; - - fp = structfirst(&flist, getoutarg(t)); - if(fp == T) - fatal("cgen_aret: nil"); - - memset(&nod1, 0, sizeof(nod1)); - nod1.op = OINDREG; - nod1.val.u.reg = REG_SP; - nod1.addable = 1; - - nod1.xoffset = fp->width; - nod1.type = fp->type; - - if(res->op != OREGISTER) { - regalloc(&nod2, types[tptr], res); - gins(ALEAL, &nod1, &nod2); - gins(AMOVL, &nod2, res); - regfree(&nod2); - } else - gins(ALEAL, &nod1, res); -} - -/* - * generate return. - * n->left is assignments to return values. - */ -void -cgen_ret(Node *n) -{ - Prog *p; - - if(n != N) - genlist(n->list); // copy out args - if(hasdefer) - ginscall(deferreturn, 0); - genlist(curfn->exit); - p = gins(ARET, N, N); - if(n != N && n->op == ORETJMP) { - p->to.type = TYPE_MEM; - p->to.name = NAME_EXTERN; - p->to.sym = linksym(n->left->sym); - } -} - -/* - * generate division. - * caller must set: - * ax = allocated AX register - * dx = allocated DX register - * generates one of: - * res = nl / nr - * res = nl % nr - * according to op. - */ -void -dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx) -{ - int check; - Node n1, t1, t2, t3, t4, n4, nz; - Type *t, *t0; - Prog *p1, *p2; - - // Have to be careful about handling - // most negative int divided by -1 correctly. - // The hardware will trap. - // Also the byte divide instruction needs AH, - // which we otherwise don't have to deal with. - // Easiest way to avoid for int8, int16: use int32. - // For int32 and int64, use explicit test. - // Could use int64 hw for int32. - t = nl->type; - t0 = t; - check = 0; - if(issigned[t->etype]) { - check = 1; - if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1)) - check = 0; - else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) - check = 0; - } - if(t->width < 4) { - if(issigned[t->etype]) - t = types[TINT32]; - else - t = types[TUINT32]; - check = 0; - } - - tempname(&t1, t); - tempname(&t2, t); - if(t0 != t) { - tempname(&t3, t0); - tempname(&t4, t0); - cgen(nl, &t3); - cgen(nr, &t4); - // Convert. - gmove(&t3, &t1); - gmove(&t4, &t2); - } else { - cgen(nl, &t1); - cgen(nr, &t2); - } - - if(!samereg(ax, res) && !samereg(dx, res)) - regalloc(&n1, t, res); - else - regalloc(&n1, t, N); - gmove(&t2, &n1); - gmove(&t1, ax); - p2 = P; - if(nacl) { - // Native Client does not relay the divide-by-zero trap - // to the executing program, so we must insert a check - // for ourselves. - nodconst(&n4, t, 0); - gins(optoas(OCMP, t), &n1, &n4); - p1 = gbranch(optoas(ONE, t), T, +1); - if(panicdiv == N) - panicdiv = sysfunc("panicdivide"); - ginscall(panicdiv, -1); - patch(p1, pc); - } - if(check) { - nodconst(&n4, t, -1); - gins(optoas(OCMP, t), &n1, &n4); - p1 = gbranch(optoas(ONE, t), T, +1); - if(op == ODIV) { - // a / (-1) is -a. - gins(optoas(OMINUS, t), N, ax); - gmove(ax, res); - } else { - // a % (-1) is 0. - nodconst(&n4, t, 0); - gmove(&n4, res); - } - p2 = gbranch(AJMP, T, 0); - patch(p1, pc); - } - if(!issigned[t->etype]) { - nodconst(&nz, t, 0); - gmove(&nz, dx); - } else - gins(optoas(OEXTEND, t), N, N); - gins(optoas(op, t), &n1, N); - regfree(&n1); - - if(op == ODIV) - gmove(ax, res); - else - gmove(dx, res); - if(check) - patch(p2, pc); -} - -static void -savex(int dr, Node *x, Node *oldx, Node *res, Type *t) -{ - int r; - - r = reg[dr]; - nodreg(x, types[TINT32], dr); - - // save current ax and dx if they are live - // and not the destination - memset(oldx, 0, sizeof *oldx); - if(r > 0 && !samereg(x, res)) { - tempname(oldx, types[TINT32]); - gmove(x, oldx); - } - - regalloc(x, t, x); -} - -static void -restx(Node *x, Node *oldx) -{ - regfree(x); - - if(oldx->op != 0) { - x->type = types[TINT32]; - gmove(oldx, x); - } -} - -/* - * generate division according to op, one of: - * res = nl / nr - * res = nl % nr - */ -void -cgen_div(int op, Node *nl, Node *nr, Node *res) -{ - Node ax, dx, oldax, olddx; - Type *t; - - if(is64(nl->type)) - fatal("cgen_div %T", nl->type); - - if(issigned[nl->type->etype]) - t = types[TINT32]; - else - t = types[TUINT32]; - savex(REG_AX, &ax, &oldax, res, t); - savex(REG_DX, &dx, &olddx, res, t); - dodiv(op, nl, nr, res, &ax, &dx); - restx(&dx, &olddx); - restx(&ax, &oldax); -} - -/* - * generate shift according to op, one of: - * res = nl << nr - * res = nl >> nr - */ -void -cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) -{ - Node n1, n2, nt, cx, oldcx, hi, lo; - int a, w; - Prog *p1, *p2; - uvlong sc; - - if(nl->type->width > 4) - fatal("cgen_shift %T", nl->type); - - w = nl->type->width * 8; - - a = optoas(op, nl->type); - - if(nr->op == OLITERAL) { - tempname(&n2, nl->type); - cgen(nl, &n2); - regalloc(&n1, nl->type, res); - gmove(&n2, &n1); - sc = mpgetfix(nr->val.u.xval); - if(sc >= nl->type->width*8) { - // large shift gets 2 shifts by width-1 - gins(a, ncon(w-1), &n1); - gins(a, ncon(w-1), &n1); - } else - gins(a, nr, &n1); - gmove(&n1, res); - regfree(&n1); - return; - } - - memset(&oldcx, 0, sizeof oldcx); - nodreg(&cx, types[TUINT32], REG_CX); - if(reg[REG_CX] > 1 && !samereg(&cx, res)) { - tempname(&oldcx, types[TUINT32]); - gmove(&cx, &oldcx); - } - - if(nr->type->width > 4) { - tempname(&nt, nr->type); - n1 = nt; - } else { - nodreg(&n1, types[TUINT32], REG_CX); - regalloc(&n1, nr->type, &n1); // to hold the shift type in CX - } - - if(samereg(&cx, res)) - regalloc(&n2, nl->type, N); - else - regalloc(&n2, nl->type, res); - if(nl->ullman >= nr->ullman) { - cgen(nl, &n2); - cgen(nr, &n1); - } else { - cgen(nr, &n1); - cgen(nl, &n2); - } - - // test and fix up large shifts - if(bounded) { - if(nr->type->width > 4) { - // delayed reg alloc - nodreg(&n1, types[TUINT32], REG_CX); - regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX - split64(&nt, &lo, &hi); - gmove(&lo, &n1); - splitclean(); - } - } else { - if(nr->type->width > 4) { - // delayed reg alloc - nodreg(&n1, types[TUINT32], REG_CX); - regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX - split64(&nt, &lo, &hi); - gmove(&lo, &n1); - gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0)); - p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1); - gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w)); - p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); - splitclean(); - patch(p2, pc); - } else { - gins(optoas(OCMP, nr->type), &n1, ncon(w)); - p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); - } - if(op == ORSH && issigned[nl->type->etype]) { - gins(a, ncon(w-1), &n2); - } else { - gmove(ncon(0), &n2); - } - patch(p1, pc); - } - gins(a, &n1, &n2); - - if(oldcx.op != 0) - gmove(&oldcx, &cx); - - gmove(&n2, res); - - regfree(&n1); - regfree(&n2); -} - -/* - * generate byte multiply: - * res = nl * nr - * there is no 2-operand byte multiply instruction so - * we do a full-width multiplication and truncate afterwards. - */ -void -cgen_bmul(int op, Node *nl, Node *nr, Node *res) -{ - Node n1, n2, nt, *tmp; - Type *t; - int a; - - // copy from byte to full registers - t = types[TUINT32]; - if(issigned[nl->type->etype]) - t = types[TINT32]; - - // largest ullman on left. - if(nl->ullman < nr->ullman) { - tmp = nl; - nl = nr; - nr = tmp; - } - - tempname(&nt, nl->type); - cgen(nl, &nt); - regalloc(&n1, t, res); - cgen(nr, &n1); - regalloc(&n2, t, N); - gmove(&nt, &n2); - a = optoas(op, t); - gins(a, &n2, &n1); - regfree(&n2); - gmove(&n1, res); - regfree(&n1); -} - -/* - * generate high multiply: - * res = (nl*nr) >> width - */ -void -cgen_hmul(Node *nl, Node *nr, Node *res) -{ - Type *t; - int a; - Node n1, n2, ax, dx; - - t = nl->type; - a = optoas(OHMUL, t); - // gen nl in n1. - tempname(&n1, t); - cgen(nl, &n1); - // gen nr in n2. - regalloc(&n2, t, res); - cgen(nr, &n2); - - // multiply. - nodreg(&ax, t, REG_AX); - gmove(&n2, &ax); - gins(a, &n1, N); - regfree(&n2); - - if(t->width == 1) { - // byte multiply behaves differently. - nodreg(&ax, t, REG_AH); - nodreg(&dx, t, REG_DX); - gmove(&ax, &dx); - } - nodreg(&dx, t, REG_DX); - gmove(&dx, res); -} - -static void cgen_float387(Node *n, Node *res); -static void cgen_floatsse(Node *n, Node *res); - -/* - * generate floating-point operation. - */ -void -cgen_float(Node *n, Node *res) -{ - Node *nl; - Node n1, n2; - Prog *p1, *p2, *p3; - - nl = n->left; - switch(n->op) { - case OEQ: - case ONE: - case OLT: - case OLE: - case OGE: - p1 = gbranch(AJMP, T, 0); - p2 = pc; - gmove(nodbool(1), res); - p3 = gbranch(AJMP, T, 0); - patch(p1, pc); - bgen(n, 1, 0, p2); - gmove(nodbool(0), res); - patch(p3, pc); - return; - - case OPLUS: - cgen(nl, res); - return; - - case OCONV: - if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) { - cgen(nl, res); - return; - } - - tempname(&n2, n->type); - mgen(nl, &n1, res); - gmove(&n1, &n2); - gmove(&n2, res); - mfree(&n1); - return; - } - - if(use_sse) - cgen_floatsse(n, res); - else - cgen_float387(n, res); -} - -// floating-point. 387 (not SSE2) -static void -cgen_float387(Node *n, Node *res) -{ - Node f0, f1; - Node *nl, *nr; - - nl = n->left; - nr = n->right; - nodreg(&f0, nl->type, REG_F0); - nodreg(&f1, n->type, REG_F0+1); - if(nr != N) - goto flt2; - - // unary - cgen(nl, &f0); - if(n->op != OCONV && n->op != OPLUS) - gins(foptoas(n->op, n->type, 0), N, N); - gmove(&f0, res); - return; - -flt2: // binary - if(nl->ullman >= nr->ullman) { - cgen(nl, &f0); - if(nr->addable) - gins(foptoas(n->op, n->type, 0), nr, &f0); - else { - cgen(nr, &f0); - gins(foptoas(n->op, n->type, Fpop), &f0, &f1); - } - } else { - cgen(nr, &f0); - if(nl->addable) - gins(foptoas(n->op, n->type, Frev), nl, &f0); - else { - cgen(nl, &f0); - gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1); - } - } - gmove(&f0, res); - return; - -} - -static void -cgen_floatsse(Node *n, Node *res) -{ - Node *nl, *nr, *r; - Node n1, n2, nt; - int a; - - nl = n->left; - nr = n->right; - switch(n->op) { - default: - dump("cgen_floatsse", n); - fatal("cgen_floatsse %O", n->op); - return; - - case OMINUS: - case OCOM: - nr = nodintconst(-1); - convlit(&nr, n->type); - a = foptoas(OMUL, nl->type, 0); - goto sbop; - - // symmetric binary - case OADD: - case OMUL: - a = foptoas(n->op, nl->type, 0); - goto sbop; - - // asymmetric binary - case OSUB: - case OMOD: - case ODIV: - a = foptoas(n->op, nl->type, 0); - goto abop; - } - -sbop: // symmetric binary - if(nl->ullman < nr->ullman || nl->op == OLITERAL) { - r = nl; - nl = nr; - nr = r; - } - -abop: // asymmetric binary - if(nl->ullman >= nr->ullman) { - tempname(&nt, nl->type); - cgen(nl, &nt); - mgen(nr, &n2, N); - regalloc(&n1, nl->type, res); - gmove(&nt, &n1); - gins(a, &n2, &n1); - gmove(&n1, res); - regfree(&n1); - mfree(&n2); - } else { - regalloc(&n2, nr->type, res); - cgen(nr, &n2); - regalloc(&n1, nl->type, N); - cgen(nl, &n1); - gins(a, &n2, &n1); - regfree(&n2); - gmove(&n1, res); - regfree(&n1); - } - return; -} - -void -bgen_float(Node *n, int true, int likely, Prog *to) -{ - int et, a; - Node *nl, *nr, *r; - Node n1, n2, n3, tmp, t1, t2, ax; - Prog *p1, *p2; - - nl = n->left; - nr = n->right; - a = n->op; - if(!true) { - // brcom is not valid on floats when NaN is involved. - p1 = gbranch(AJMP, T, 0); - p2 = gbranch(AJMP, T, 0); - patch(p1, pc); - // No need to avoid re-genning ninit. - bgen_float(n, 1, -likely, p2); - patch(gbranch(AJMP, T, 0), to); - patch(p2, pc); - return; - } - - if(use_sse) - goto sse; - else - goto x87; - -x87: - a = brrev(a); // because the args are stacked - if(a == OGE || a == OGT) { - // only < and <= work right with NaN; reverse if needed - r = nr; - nr = nl; - nl = r; - a = brrev(a); - } - - nodreg(&tmp, nr->type, REG_F0); - nodreg(&n2, nr->type, REG_F0 + 1); - nodreg(&ax, types[TUINT16], REG_AX); - et = simsimtype(nr->type); - if(et == TFLOAT64) { - if(nl->ullman > nr->ullman) { - cgen(nl, &tmp); - cgen(nr, &tmp); - gins(AFXCHD, &tmp, &n2); - } else { - cgen(nr, &tmp); - cgen(nl, &tmp); - } - gins(AFUCOMIP, &tmp, &n2); - gins(AFMOVDP, &tmp, &tmp); // annoying pop but still better than STSW+SAHF - } else { - // TODO(rsc): The moves back and forth to memory - // here are for truncating the value to 32 bits. - // This handles 32-bit comparison but presumably - // all the other ops have the same problem. - // We need to figure out what the right general - // solution is, besides telling people to use float64. - tempname(&t1, types[TFLOAT32]); - tempname(&t2, types[TFLOAT32]); - cgen(nr, &t1); - cgen(nl, &t2); - gmove(&t2, &tmp); - gins(AFCOMFP, &t1, &tmp); - gins(AFSTSW, N, &ax); - gins(ASAHF, N, N); - } - - goto ret; - -sse: - if(!nl->addable) { - tempname(&n1, nl->type); - cgen(nl, &n1); - nl = &n1; - } - if(!nr->addable) { - tempname(&tmp, nr->type); - cgen(nr, &tmp); - nr = &tmp; - } - regalloc(&n2, nr->type, N); - gmove(nr, &n2); - nr = &n2; - - if(nl->op != OREGISTER) { - regalloc(&n3, nl->type, N); - gmove(nl, &n3); - nl = &n3; - } - - if(a == OGE || a == OGT) { - // only < and <= work right with NaN; reverse if needed - r = nr; - nr = nl; - nl = r; - a = brrev(a); - } - - gins(foptoas(OCMP, nr->type, 0), nl, nr); - if(nl->op == OREGISTER) - regfree(nl); - regfree(nr); - -ret: - if(a == OEQ) { - // neither NE nor P - p1 = gbranch(AJNE, T, -likely); - p2 = gbranch(AJPS, T, -likely); - patch(gbranch(AJMP, T, 0), to); - patch(p1, pc); - patch(p2, pc); - } else if(a == ONE) { - // either NE or P - patch(gbranch(AJNE, T, likely), to); - patch(gbranch(AJPS, T, likely), to); - } else - patch(gbranch(optoas(a, nr->type), T, likely), to); - -} - -// Called after regopt and peep have run. -// Expand CHECKNIL pseudo-op into actual nil pointer check. -void -expandchecks(Prog *firstp) -{ - Prog *p, *p1, *p2; - - for(p = firstp; p != P; p = p->link) { - if(p->as != ACHECKNIL) - continue; - if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers - warnl(p->lineno, "generated nil check"); - // check is - // CMP arg, $0 - // JNE 2(PC) (likely) - // MOV AX, 0 - p1 = mal(sizeof *p1); - p2 = mal(sizeof *p2); - clearp(p1); - clearp(p2); - p1->link = p2; - p2->link = p->link; - p->link = p1; - p1->lineno = p->lineno; - p2->lineno = p->lineno; - p1->pc = 9999; - p2->pc = 9999; - p->as = ACMPL; - p->to.type = TYPE_CONST; - p->to.offset = 0; - p1->as = AJNE; - p1->from.type = TYPE_CONST; - p1->from.offset = 1; // likely - p1->to.type = TYPE_BRANCH; - p1->to.u.branch = p2->link; - // crash by write to memory address 0. - // if possible, since we know arg is 0, use 0(arg), - // which will be shorter to encode than plain 0. - p2->as = AMOVL; - p2->from.type = TYPE_REG; - p2->from.reg = REG_AX; - if(regtyp(&p->from)) { - p2->to.type = TYPE_MEM; - p2->to.reg = p->from.reg; - } else - p2->to.type = TYPE_MEM; - p2->to.offset = 0; - } -} |