diff options
Diffstat (limited to 'src/cmd/6g/ggen.c')
-rw-r--r-- | src/cmd/6g/ggen.c | 1046 |
1 files changed, 0 insertions, 1046 deletions
diff --git a/src/cmd/6g/ggen.c b/src/cmd/6g/ggen.c deleted file mode 100644 index 72104589a3..0000000000 --- a/src/cmd/6g/ggen.c +++ /dev/null @@ -1,1046 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#undef EXTERN -#define EXTERN -#include <u.h> -#include <libc.h> -#include "gg.h" -#include "../gc/popt.h" - -static Prog *appendpp(Prog*, int, int, int, vlong, int, int, vlong); -static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax); - -void -defframe(Prog *ptxt) -{ - uint32 frame, ax; - Prog *p; - vlong hi, lo; - NodeList *l; - Node *n; - - // fill in argument size, stack size - ptxt->to.type = TYPE_TEXTSIZE; - ptxt->to.u.argsize = rnd(curfn->type->argwid, widthptr); - frame = rnd(stksize+maxarg, widthreg); - ptxt->to.offset = frame; - - // insert code to zero ambiguously live variables - // so that the garbage collector only sees initialized values - // when it looks for pointers. - p = ptxt; - lo = hi = 0; - ax = 0; - // iterate through declarations - they are sorted in decreasing xoffset order. - for(l=curfn->dcl; l != nil; l = l->next) { - n = l->n; - if(!n->needzero) - continue; - if(n->class != PAUTO) - fatal("needzero class %d", n->class); - if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0) - fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset); - - if(lo != hi && n->xoffset + n->type->width >= lo - 2*widthreg) { - // merge with range we already have - lo = n->xoffset; - continue; - } - // zero old range - p = zerorange(p, frame, lo, hi, &ax); - - // set new range - hi = n->xoffset + n->type->width; - lo = n->xoffset; - } - // zero final range - zerorange(p, frame, lo, hi, &ax); -} - -static Prog* -zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax) -{ - vlong cnt, i; - - cnt = hi - lo; - if(cnt == 0) - return p; - if(*ax == 0) { - p = appendpp(p, AMOVQ, TYPE_CONST, 0, 0, TYPE_REG, REG_AX, 0); - *ax = 1; - } - if(cnt % widthreg != 0) { - // should only happen with nacl - if(cnt % widthptr != 0) - fatal("zerorange count not a multiple of widthptr %d", cnt); - p = appendpp(p, AMOVL, TYPE_REG, REG_AX, 0, TYPE_MEM, REG_SP, frame+lo); - lo += widthptr; - cnt -= widthptr; - } - if(cnt <= 4*widthreg) { - for(i = 0; i < cnt; i += widthreg) { - p = appendpp(p, AMOVQ, TYPE_REG, REG_AX, 0, TYPE_MEM, REG_SP, frame+lo+i); - } - } else if(!nacl && (cnt <= 128*widthreg)) { - p = appendpp(p, leaptr, TYPE_MEM, REG_SP, frame+lo, TYPE_REG, REG_DI, 0); - p = appendpp(p, ADUFFZERO, TYPE_NONE, 0, 0, TYPE_ADDR, 0, 2*(128-cnt/widthreg)); - p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); - } else { - p = appendpp(p, AMOVQ, TYPE_CONST, 0, cnt/widthreg, TYPE_REG, REG_CX, 0); - p = appendpp(p, leaptr, TYPE_MEM, REG_SP, frame+lo, TYPE_REG, REG_DI, 0); - p = appendpp(p, AREP, TYPE_NONE, 0, 0, TYPE_NONE, 0, 0); - p = appendpp(p, ASTOSQ, TYPE_NONE, 0, 0, TYPE_NONE, 0, 0); - } - return p; -} - -static Prog* -appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset) -{ - Prog *q; - q = mal(sizeof(*q)); - clearp(q); - q->as = as; - q->lineno = p->lineno; - q->from.type = ftype; - q->from.reg = freg; - q->from.offset = foffset; - q->to.type = ttype; - q->to.reg = treg; - q->to.offset = toffset; - q->link = p->link; - p->link = q; - return q; -} - -/* - * generate: - * call f - * proc=-1 normal call but no return - * proc=0 normal call - * proc=1 goroutine run in new proc - * proc=2 defer call save away stack - * proc=3 normal call to C pointer (not Go func value) - */ -void -ginscall(Node *f, int proc) -{ - Prog *p; - Node reg, stk; - Node r1; - int32 extra; - - if(f->type != T) { - extra = 0; - if(proc == 1 || proc == 2) - extra = 2 * widthptr; - setmaxarg(f->type, extra); - } - - switch(proc) { - default: - fatal("ginscall: bad proc %d", proc); - break; - - case 0: // normal call - case -1: // normal call but no return - if(f->op == ONAME && f->class == PFUNC) { - if(f == deferreturn) { - // Deferred calls will appear to be returning to - // the CALL deferreturn(SB) that we are about to emit. - // However, the stack trace code will show the line - // of the instruction byte before the return PC. - // To avoid that being an unrelated instruction, - // insert an x86 NOP that we will have the right line number. - // x86 NOP 0x90 is really XCHG AX, AX; use that description - // because the NOP pseudo-instruction would be removed by - // the linker. - nodreg(®, types[TINT], REG_AX); - gins(AXCHGL, ®, ®); - } - p = gins(ACALL, N, f); - afunclit(&p->to, f); - if(proc == -1 || noreturn(p)) - gins(AUNDEF, N, N); - break; - } - nodreg(®, types[tptr], REG_DX); - nodreg(&r1, types[tptr], REG_BX); - gmove(f, ®); - reg.op = OINDREG; - gmove(®, &r1); - reg.op = OREGISTER; - gins(ACALL, ®, &r1); - break; - - case 3: // normal call of c function pointer - gins(ACALL, N, f); - break; - - case 1: // call in new proc (go) - case 2: // deferred call (defer) - memset(&stk, 0, sizeof(stk)); - stk.op = OINDREG; - stk.val.u.reg = REG_SP; - stk.xoffset = 0; - - if(widthptr == 8) { - // size of arguments at 0(SP) - ginscon(AMOVQ, argsize(f->type), &stk); - - // FuncVal* at 8(SP) - stk.xoffset = widthptr; - nodreg(®, types[TINT64], REG_AX); - gmove(f, ®); - gins(AMOVQ, ®, &stk); - } else { - // size of arguments at 0(SP) - ginscon(AMOVL, argsize(f->type), &stk); - - // FuncVal* at 4(SP) - stk.xoffset = widthptr; - nodreg(®, types[TINT32], REG_AX); - gmove(f, ®); - gins(AMOVL, ®, &stk); - } - - if(proc == 1) - ginscall(newproc, 0); - else { - if(!hasdefer) - fatal("hasdefer=0 but has defer"); - ginscall(deferproc, 0); - } - if(proc == 2) { - nodreg(®, types[TINT32], REG_AX); - gins(ATESTL, ®, ®); - p = gbranch(AJEQ, T, +1); - cgen_ret(N); - patch(p, pc); - } - break; - } -} - -/* - * n is call to interface method. - * generate res = n. - */ -void -cgen_callinter(Node *n, Node *res, int proc) -{ - Node *i, *f; - Node tmpi, nodi, nodo, nodr, nodsp; - - i = n->left; - if(i->op != ODOTINTER) - fatal("cgen_callinter: not ODOTINTER %O", i->op); - - f = i->right; // field - if(f->op != ONAME) - fatal("cgen_callinter: not ONAME %O", f->op); - - i = i->left; // interface - - if(!i->addable) { - tempname(&tmpi, i->type); - cgen(i, &tmpi); - i = &tmpi; - } - - genlist(n->list); // assign the args - - // i is now addable, prepare an indirected - // register to hold its address. - igen(i, &nodi, res); // REG = &inter - - nodindreg(&nodsp, types[tptr], REG_SP); - nodsp.xoffset = 0; - if(proc != 0) - nodsp.xoffset += 2 * widthptr; // leave room for size & fn - nodi.type = types[tptr]; - nodi.xoffset += widthptr; - cgen(&nodi, &nodsp); // {0, 8(nacl), or 16}(SP) = 8(REG) -- i.data - - regalloc(&nodo, types[tptr], res); - nodi.type = types[tptr]; - nodi.xoffset -= widthptr; - cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab - regfree(&nodi); - - regalloc(&nodr, types[tptr], &nodo); - if(n->left->xoffset == BADWIDTH) - fatal("cgen_callinter: badwidth"); - cgen_checknil(&nodo); // in case offset is huge - nodo.op = OINDREG; - nodo.xoffset = n->left->xoffset + 3*widthptr + 8; - if(proc == 0) { - // plain call: use direct c function pointer - more efficient - cgen(&nodo, &nodr); // REG = 32+offset(REG) -- i.tab->fun[f] - proc = 3; - } else { - // go/defer. generate go func value. - gins(ALEAQ, &nodo, &nodr); // REG = &(32+offset(REG)) -- i.tab->fun[f] - } - - nodr.type = n->left->type; - ginscall(&nodr, proc); - - regfree(&nodr); - regfree(&nodo); -} - -/* - * generate function call; - * proc=0 normal call - * proc=1 goroutine run in new proc - * proc=2 defer call save away stack - */ -void -cgen_call(Node *n, int proc) -{ - Type *t; - Node nod, afun; - - if(n == N) - return; - - if(n->left->ullman >= UINF) { - // if name involves a fn call - // precompute the address of the fn - tempname(&afun, types[tptr]); - cgen(n->left, &afun); - } - - genlist(n->list); // assign the args - t = n->left->type; - - // call tempname pointer - if(n->left->ullman >= UINF) { - regalloc(&nod, types[tptr], N); - cgen_as(&nod, &afun); - nod.type = t; - ginscall(&nod, proc); - regfree(&nod); - return; - } - - // call pointer - if(n->left->op != ONAME || n->left->class != PFUNC) { - regalloc(&nod, types[tptr], N); - cgen_as(&nod, n->left); - nod.type = t; - ginscall(&nod, proc); - regfree(&nod); - return; - } - - // call direct - n->left->method = 1; - ginscall(n->left, proc); -} - -/* - * call to n has already been generated. - * generate: - * res = return value from call. - */ -void -cgen_callret(Node *n, Node *res) -{ - Node nod; - Type *fp, *t; - Iter flist; - - t = n->left->type; - if(t->etype == TPTR32 || t->etype == TPTR64) - t = t->type; - - fp = structfirst(&flist, getoutarg(t)); - if(fp == T) - fatal("cgen_callret: nil"); - - memset(&nod, 0, sizeof(nod)); - nod.op = OINDREG; - nod.val.u.reg = REG_SP; - nod.addable = 1; - - nod.xoffset = fp->width; - nod.type = fp->type; - cgen_as(res, &nod); -} - -/* - * call to n has already been generated. - * generate: - * res = &return value from call. - */ -void -cgen_aret(Node *n, Node *res) -{ - Node nod1, nod2; - Type *fp, *t; - Iter flist; - - t = n->left->type; - if(isptr[t->etype]) - t = t->type; - - fp = structfirst(&flist, getoutarg(t)); - if(fp == T) - fatal("cgen_aret: nil"); - - memset(&nod1, 0, sizeof(nod1)); - nod1.op = OINDREG; - nod1.val.u.reg = REG_SP; - nod1.addable = 1; - - nod1.xoffset = fp->width; - nod1.type = fp->type; - - if(res->op != OREGISTER) { - regalloc(&nod2, types[tptr], res); - gins(leaptr, &nod1, &nod2); - gins(movptr, &nod2, res); - regfree(&nod2); - } else - gins(leaptr, &nod1, res); -} - -/* - * generate return. - * n->left is assignments to return values. - */ -void -cgen_ret(Node *n) -{ - Prog *p; - - if(n != N) - genlist(n->list); // copy out args - if(hasdefer) - ginscall(deferreturn, 0); - genlist(curfn->exit); - p = gins(ARET, N, N); - if(n != N && n->op == ORETJMP) { - p->to.type = TYPE_MEM; - p->to.name = NAME_EXTERN; - p->to.sym = linksym(n->left->sym); - } -} - -/* - * generate division. - * generates one of: - * res = nl / nr - * res = nl % nr - * according to op. - */ -void -dodiv(int op, Node *nl, Node *nr, Node *res) -{ - int a, check; - Node n3, n4; - Type *t, *t0; - Node ax, dx, ax1, n31, oldax, olddx; - Prog *p1, *p2; - - // Have to be careful about handling - // most negative int divided by -1 correctly. - // The hardware will trap. - // Also the byte divide instruction needs AH, - // which we otherwise don't have to deal with. - // Easiest way to avoid for int8, int16: use int32. - // For int32 and int64, use explicit test. - // Could use int64 hw for int32. - t = nl->type; - t0 = t; - check = 0; - if(issigned[t->etype]) { - check = 1; - if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -(1ULL<<(t->width*8-1))) - check = 0; - else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) - check = 0; - } - if(t->width < 4) { - if(issigned[t->etype]) - t = types[TINT32]; - else - t = types[TUINT32]; - check = 0; - } - a = optoas(op, t); - - regalloc(&n3, t0, N); - if(nl->ullman >= nr->ullman) { - savex(REG_AX, &ax, &oldax, res, t0); - cgen(nl, &ax); - regalloc(&ax, t0, &ax); // mark ax live during cgen - cgen(nr, &n3); - regfree(&ax); - } else { - cgen(nr, &n3); - savex(REG_AX, &ax, &oldax, res, t0); - cgen(nl, &ax); - } - if(t != t0) { - // Convert - ax1 = ax; - n31 = n3; - ax.type = t; - n3.type = t; - gmove(&ax1, &ax); - gmove(&n31, &n3); - } - - p2 = P; - if(nacl) { - // Native Client does not relay the divide-by-zero trap - // to the executing program, so we must insert a check - // for ourselves. - nodconst(&n4, t, 0); - gins(optoas(OCMP, t), &n3, &n4); - p1 = gbranch(optoas(ONE, t), T, +1); - if(panicdiv == N) - panicdiv = sysfunc("panicdivide"); - ginscall(panicdiv, -1); - patch(p1, pc); - } - if(check) { - nodconst(&n4, t, -1); - gins(optoas(OCMP, t), &n3, &n4); - p1 = gbranch(optoas(ONE, t), T, +1); - if(op == ODIV) { - // a / (-1) is -a. - gins(optoas(OMINUS, t), N, &ax); - gmove(&ax, res); - } else { - // a % (-1) is 0. - nodconst(&n4, t, 0); - gmove(&n4, res); - } - p2 = gbranch(AJMP, T, 0); - patch(p1, pc); - } - savex(REG_DX, &dx, &olddx, res, t); - if(!issigned[t->etype]) { - nodconst(&n4, t, 0); - gmove(&n4, &dx); - } else - gins(optoas(OEXTEND, t), N, N); - gins(a, &n3, N); - regfree(&n3); - if(op == ODIV) - gmove(&ax, res); - else - gmove(&dx, res); - restx(&dx, &olddx); - if(check) - patch(p2, pc); - restx(&ax, &oldax); -} - -/* - * register dr is one of the special ones (AX, CX, DI, SI, etc.). - * we need to use it. if it is already allocated as a temporary - * (r > 1; can only happen if a routine like sgen passed a - * special as cgen's res and then cgen used regalloc to reuse - * it as its own temporary), then move it for now to another - * register. caller must call restx to move it back. - * the move is not necessary if dr == res, because res is - * known to be dead. - */ -void -savex(int dr, Node *x, Node *oldx, Node *res, Type *t) -{ - int r; - - r = reg[dr]; - - // save current ax and dx if they are live - // and not the destination - memset(oldx, 0, sizeof *oldx); - nodreg(x, t, dr); - if(r > 1 && !samereg(x, res)) { - regalloc(oldx, types[TINT64], N); - x->type = types[TINT64]; - gmove(x, oldx); - x->type = t; - oldx->ostk = r; // squirrel away old r value - reg[dr] = 1; - } -} - -void -restx(Node *x, Node *oldx) -{ - if(oldx->op != 0) { - x->type = types[TINT64]; - reg[x->val.u.reg] = oldx->ostk; - gmove(oldx, x); - regfree(oldx); - } -} - -/* - * generate division according to op, one of: - * res = nl / nr - * res = nl % nr - */ -void -cgen_div(int op, Node *nl, Node *nr, Node *res) -{ - Node n1, n2, n3; - int w, a; - Magic m; - - if(nr->op != OLITERAL) - goto longdiv; - w = nl->type->width*8; - - // Front end handled 32-bit division. We only need to handle 64-bit. - // try to do division by multiply by (2^w)/d - // see hacker's delight chapter 10 - switch(simtype[nl->type->etype]) { - default: - goto longdiv; - - case TUINT64: - m.w = w; - m.ud = mpgetfix(nr->val.u.xval); - umagic(&m); - if(m.bad) - break; - if(op == OMOD) - goto longmod; - - cgenr(nl, &n1, N); - nodconst(&n2, nl->type, m.um); - regalloc(&n3, nl->type, res); - cgen_hmul(&n1, &n2, &n3); - - if(m.ua) { - // need to add numerator accounting for overflow - gins(optoas(OADD, nl->type), &n1, &n3); - nodconst(&n2, nl->type, 1); - gins(optoas(ORROTC, nl->type), &n2, &n3); - nodconst(&n2, nl->type, m.s-1); - gins(optoas(ORSH, nl->type), &n2, &n3); - } else { - nodconst(&n2, nl->type, m.s); - gins(optoas(ORSH, nl->type), &n2, &n3); // shift dx - } - - gmove(&n3, res); - regfree(&n1); - regfree(&n3); - return; - - case TINT64: - m.w = w; - m.sd = mpgetfix(nr->val.u.xval); - smagic(&m); - if(m.bad) - break; - if(op == OMOD) - goto longmod; - - cgenr(nl, &n1, res); - nodconst(&n2, nl->type, m.sm); - regalloc(&n3, nl->type, N); - cgen_hmul(&n1, &n2, &n3); - - if(m.sm < 0) { - // need to add numerator - gins(optoas(OADD, nl->type), &n1, &n3); - } - - nodconst(&n2, nl->type, m.s); - gins(optoas(ORSH, nl->type), &n2, &n3); // shift n3 - - nodconst(&n2, nl->type, w-1); - gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg - gins(optoas(OSUB, nl->type), &n1, &n3); // added - - if(m.sd < 0) { - // this could probably be removed - // by factoring it into the multiplier - gins(optoas(OMINUS, nl->type), N, &n3); - } - - gmove(&n3, res); - regfree(&n1); - regfree(&n3); - return; - } - goto longdiv; - -longdiv: - // division and mod using (slow) hardware instruction - dodiv(op, nl, nr, res); - return; - -longmod: - // mod using formula A%B = A-(A/B*B) but - // we know that there is a fast algorithm for A/B - regalloc(&n1, nl->type, res); - cgen(nl, &n1); - regalloc(&n2, nl->type, N); - cgen_div(ODIV, &n1, nr, &n2); - a = optoas(OMUL, nl->type); - if(w == 8) { - // use 2-operand 16-bit multiply - // because there is no 2-operand 8-bit multiply - a = AIMULW; - } - if(!smallintconst(nr)) { - regalloc(&n3, nl->type, N); - cgen(nr, &n3); - gins(a, &n3, &n2); - regfree(&n3); - } else - gins(a, nr, &n2); - gins(optoas(OSUB, nl->type), &n2, &n1); - gmove(&n1, res); - regfree(&n1); - regfree(&n2); -} - -/* - * generate high multiply: - * res = (nl*nr) >> width - */ -void -cgen_hmul(Node *nl, Node *nr, Node *res) -{ - Type *t; - int a; - Node n1, n2, ax, dx, *tmp; - - t = nl->type; - a = optoas(OHMUL, t); - if(nl->ullman < nr->ullman) { - tmp = nl; - nl = nr; - nr = tmp; - } - cgenr(nl, &n1, res); - cgenr(nr, &n2, N); - nodreg(&ax, t, REG_AX); - gmove(&n1, &ax); - gins(a, &n2, N); - regfree(&n2); - regfree(&n1); - - if(t->width == 1) { - // byte multiply behaves differently. - nodreg(&ax, t, REG_AH); - nodreg(&dx, t, REG_DX); - gmove(&ax, &dx); - } - nodreg(&dx, t, REG_DX); - gmove(&dx, res); -} - -/* - * generate shift according to op, one of: - * res = nl << nr - * res = nl >> nr - */ -void -cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) -{ - Node n1, n2, n3, n4, n5, cx, oldcx; - int a, rcx; - Prog *p1; - uvlong sc; - Type *tcount; - - a = optoas(op, nl->type); - - if(nr->op == OLITERAL) { - regalloc(&n1, nl->type, res); - cgen(nl, &n1); - sc = mpgetfix(nr->val.u.xval); - if(sc >= nl->type->width*8) { - // large shift gets 2 shifts by width-1 - nodconst(&n3, types[TUINT32], nl->type->width*8-1); - gins(a, &n3, &n1); - gins(a, &n3, &n1); - } else - gins(a, nr, &n1); - gmove(&n1, res); - regfree(&n1); - goto ret; - } - - if(nl->ullman >= UINF) { - tempname(&n4, nl->type); - cgen(nl, &n4); - nl = &n4; - } - if(nr->ullman >= UINF) { - tempname(&n5, nr->type); - cgen(nr, &n5); - nr = &n5; - } - - rcx = reg[REG_CX]; - nodreg(&n1, types[TUINT32], REG_CX); - - // Allow either uint32 or uint64 as shift type, - // to avoid unnecessary conversion from uint32 to uint64 - // just to do the comparison. - tcount = types[simtype[nr->type->etype]]; - if(tcount->etype < TUINT32) - tcount = types[TUINT32]; - - regalloc(&n1, nr->type, &n1); // to hold the shift type in CX - regalloc(&n3, tcount, &n1); // to clear high bits of CX - - nodreg(&cx, types[TUINT64], REG_CX); - memset(&oldcx, 0, sizeof oldcx); - if(rcx > 0 && !samereg(&cx, res)) { - regalloc(&oldcx, types[TUINT64], N); - gmove(&cx, &oldcx); - } - cx.type = tcount; - - if(samereg(&cx, res)) - regalloc(&n2, nl->type, N); - else - regalloc(&n2, nl->type, res); - if(nl->ullman >= nr->ullman) { - cgen(nl, &n2); - cgen(nr, &n1); - gmove(&n1, &n3); - } else { - cgen(nr, &n1); - gmove(&n1, &n3); - cgen(nl, &n2); - } - regfree(&n3); - - // test and fix up large shifts - if(!bounded) { - nodconst(&n3, tcount, nl->type->width*8); - gins(optoas(OCMP, tcount), &n1, &n3); - p1 = gbranch(optoas(OLT, tcount), T, +1); - if(op == ORSH && issigned[nl->type->etype]) { - nodconst(&n3, types[TUINT32], nl->type->width*8-1); - gins(a, &n3, &n2); - } else { - nodconst(&n3, nl->type, 0); - gmove(&n3, &n2); - } - patch(p1, pc); - } - - gins(a, &n1, &n2); - - if(oldcx.op != 0) { - cx.type = types[TUINT64]; - gmove(&oldcx, &cx); - regfree(&oldcx); - } - - gmove(&n2, res); - - regfree(&n1); - regfree(&n2); - -ret: - ; -} - -/* - * generate byte multiply: - * res = nl * nr - * there is no 2-operand byte multiply instruction so - * we do a full-width multiplication and truncate afterwards. - */ -void -cgen_bmul(int op, Node *nl, Node *nr, Node *res) -{ - Node n1, n2, n1b, n2b, *tmp; - Type *t; - int a; - - // largest ullman on left. - if(nl->ullman < nr->ullman) { - tmp = nl; - nl = nr; - nr = tmp; - } - - // generate operands in "8-bit" registers. - regalloc(&n1b, nl->type, res); - cgen(nl, &n1b); - regalloc(&n2b, nr->type, N); - cgen(nr, &n2b); - - // perform full-width multiplication. - t = types[TUINT64]; - if(issigned[nl->type->etype]) - t = types[TINT64]; - nodreg(&n1, t, n1b.val.u.reg); - nodreg(&n2, t, n2b.val.u.reg); - a = optoas(op, t); - gins(a, &n2, &n1); - - // truncate. - gmove(&n1, res); - regfree(&n1b); - regfree(&n2b); -} - -void -clearfat(Node *nl) -{ - int64 w, c, q; - Node n1, oldn1, ax, oldax, di, z; - Prog *p; - - /* clear a fat object */ - if(debug['g']) - dump("\nclearfat", nl); - - w = nl->type->width; - // Avoid taking the address for simple enough types. - if(componentgen(N, nl)) - return; - - c = w % 8; // bytes - q = w / 8; // quads - - if(q < 4) { - // Write sequence of MOV 0, off(base) instead of using STOSQ. - // The hope is that although the code will be slightly longer, - // the MOVs will have no dependencies and pipeline better - // than the unrolled STOSQ loop. - // NOTE: Must use agen, not igen, so that optimizer sees address - // being taken. We are not writing on field boundaries. - agenr(nl, &n1, N); - n1.op = OINDREG; - nodconst(&z, types[TUINT64], 0); - while(q-- > 0) { - n1.type = z.type; - gins(AMOVQ, &z, &n1); - n1.xoffset += 8; - } - if(c >= 4) { - nodconst(&z, types[TUINT32], 0); - n1.type = z.type; - gins(AMOVL, &z, &n1); - n1.xoffset += 4; - c -= 4; - } - nodconst(&z, types[TUINT8], 0); - while(c-- > 0) { - n1.type = z.type; - gins(AMOVB, &z, &n1); - n1.xoffset++; - } - regfree(&n1); - return; - } - - savex(REG_DI, &n1, &oldn1, N, types[tptr]); - agen(nl, &n1); - - savex(REG_AX, &ax, &oldax, N, types[tptr]); - gconreg(AMOVL, 0, REG_AX); - - if(q > 128 || nacl) { - gconreg(movptr, q, REG_CX); - gins(AREP, N, N); // repeat - gins(ASTOSQ, N, N); // STOQ AL,*(DI)+ - } else { - p = gins(ADUFFZERO, N, N); - p->to.type = TYPE_ADDR; - p->to.sym = linksym(pkglookup("duffzero", runtimepkg)); - // 2 and 128 = magic constants: see ../../runtime/asm_amd64.s - p->to.offset = 2*(128-q); - } - - z = ax; - di = n1; - if(w >= 8 && c >= 4) { - di.op = OINDREG; - di.type = z.type = types[TINT64]; - p = gins(AMOVQ, &z, &di); - p->to.scale = 1; - p->to.offset = c-8; - } else if(c >= 4) { - di.op = OINDREG; - di.type = z.type = types[TINT32]; - p = gins(AMOVL, &z, &di); - if(c > 4) { - p = gins(AMOVL, &z, &di); - p->to.scale = 1; - p->to.offset = c-4; - } - } else - while(c > 0) { - gins(ASTOSB, N, N); // STOB AL,*(DI)+ - c--; - } - - restx(&n1, &oldn1); - restx(&ax, &oldax); -} - -// Called after regopt and peep have run. -// Expand CHECKNIL pseudo-op into actual nil pointer check. -void -expandchecks(Prog *firstp) -{ - Prog *p, *p1, *p2; - - for(p = firstp; p != P; p = p->link) { - if(p->as != ACHECKNIL) - continue; - if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers - warnl(p->lineno, "generated nil check"); - // check is - // CMP arg, $0 - // JNE 2(PC) (likely) - // MOV AX, 0 - p1 = mal(sizeof *p1); - p2 = mal(sizeof *p2); - clearp(p1); - clearp(p2); - p1->link = p2; - p2->link = p->link; - p->link = p1; - p1->lineno = p->lineno; - p2->lineno = p->lineno; - p1->pc = 9999; - p2->pc = 9999; - p->as = cmpptr; - p->to.type = TYPE_CONST; - p->to.offset = 0; - p1->as = AJNE; - p1->from.type = TYPE_CONST; - p1->from.offset = 1; // likely - p1->to.type = TYPE_BRANCH; - p1->to.u.branch = p2->link; - // crash by write to memory address 0. - // if possible, since we know arg is 0, use 0(arg), - // which will be shorter to encode than plain 0. - p2->as = AMOVL; - p2->from.type = TYPE_REG; - p2->from.reg = REG_AX; - if(regtyp(&p->from)) { - p2->to.type = TYPE_MEM; - p2->to.reg = p->from.reg; - } else { - p2->to.type = TYPE_MEM; - p2->to.reg = REG_NONE; - } - p2->to.offset = 0; - } -} |