1 files changed, 0 insertions, 1046 deletions
diff --git a/src/cmd/6g/ggen.c b/src/cmd/6g/ggen.c
deleted file mode 100644
index 72104589a3..0000000000
--- a/src/cmd/6g/ggen.c
+++ /dev/null
@@ -1,1046 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#undef	EXTERN
-#define	EXTERN
-#include <u.h>
-#include <libc.h>
-#include "gg.h"
-#include "../gc/popt.h"
-
-static Prog *appendpp(Prog*, int, int, int, vlong, int, int, vlong);
-static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax);
-
-void
-defframe(Prog *ptxt)
-{
-	uint32 frame, ax;
-	Prog *p;
-	vlong hi, lo;
-	NodeList *l;
-	Node *n;
-
-	// fill in argument size, stack size
-	ptxt->to.type = TYPE_TEXTSIZE;
-	ptxt->to.u.argsize = rnd(curfn->type->argwid, widthptr);
-	frame = rnd(stksize+maxarg, widthreg);
-	ptxt->to.offset = frame;
-	
-	// insert code to zero ambiguously live variables
-	// so that the garbage collector only sees initialized values
-	// when it looks for pointers.
-	p = ptxt;
-	lo = hi = 0;
-	ax = 0;
-	// iterate through declarations - they are sorted in decreasing xoffset order.
-	for(l=curfn->dcl; l != nil; l = l->next) {
-		n = l->n;
-		if(!n->needzero)
-			continue;
-		if(n->class != PAUTO)
-			fatal("needzero class %d", n->class);
-		if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0)
-			fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset);
-
-		if(lo != hi && n->xoffset + n->type->width >= lo - 2*widthreg) {
-			// merge with range we already have
-			lo = n->xoffset;
-			continue;
-		}
-		// zero old range
-		p = zerorange(p, frame, lo, hi, &ax);
-
-		// set new range
-		hi = n->xoffset + n->type->width;
-		lo = n->xoffset;
-	}
-	// zero final range
-	zerorange(p, frame, lo, hi, &ax);
-}
-
-static Prog*
-zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax)
-{
-	vlong cnt, i;
-
-	cnt = hi - lo;
-	if(cnt == 0)
-		return p;
-	if(*ax == 0) {
-		p = appendpp(p, AMOVQ, TYPE_CONST, 0, 0, TYPE_REG, REG_AX, 0);
-		*ax = 1;
-	}
-	if(cnt % widthreg != 0) {
-		// should only happen with nacl
-		if(cnt % widthptr != 0)
-			fatal("zerorange count not a multiple of widthptr %d", cnt);
-		p = appendpp(p, AMOVL, TYPE_REG, REG_AX, 0, TYPE_MEM, REG_SP, frame+lo);
-		lo += widthptr;
-		cnt -= widthptr;
-	}
-	if(cnt <= 4*widthreg) {
-		for(i = 0; i < cnt; i += widthreg) {
-			p = appendpp(p, AMOVQ, TYPE_REG, REG_AX, 0, TYPE_MEM, REG_SP, frame+lo+i);
-		}
-	} else if(!nacl && (cnt <= 128*widthreg)) {
-		p = appendpp(p, leaptr, TYPE_MEM, REG_SP, frame+lo, TYPE_REG, REG_DI, 0);
-		p = appendpp(p, ADUFFZERO, TYPE_NONE, 0, 0, TYPE_ADDR, 0, 2*(128-cnt/widthreg));
-		p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
-	} else {
-		p = appendpp(p, AMOVQ, TYPE_CONST, 0, cnt/widthreg, TYPE_REG, REG_CX, 0);
-		p = appendpp(p, leaptr, TYPE_MEM, REG_SP, frame+lo, TYPE_REG, REG_DI, 0);
-		p = appendpp(p, AREP, TYPE_NONE, 0, 0, TYPE_NONE, 0, 0);
-		p = appendpp(p, ASTOSQ, TYPE_NONE, 0, 0, TYPE_NONE, 0, 0);
-	}
-	return p;
-}
-
-static Prog*	
-appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset)	
-{
-	Prog *q;
-	q = mal(sizeof(*q));	
-	clearp(q);	
-	q->as = as;	
-	q->lineno = p->lineno;	
-	q->from.type = ftype;
-	q->from.reg = freg;
-	q->from.offset = foffset;	
-	q->to.type = ttype;	
-	q->to.reg = treg;
-	q->to.offset = toffset;	
-	q->link = p->link;	
-	p->link = q;	
-	return q;	
-}
-
-/*
- * generate:
- *	call f
- *	proc=-1	normal call but no return
- *	proc=0	normal call
- *	proc=1	goroutine run in new proc
- *	proc=2	defer call save away stack
-  *	proc=3	normal call to C pointer (not Go func value)
- */
-void
-ginscall(Node *f, int proc)
-{
-	Prog *p;
-	Node reg, stk;
-	Node r1;
-	int32 extra;
-
-	if(f->type != T) {
-		extra = 0;
-		if(proc == 1 || proc == 2)
-			extra = 2 * widthptr;
-		setmaxarg(f->type, extra);
-	}
-
-	switch(proc) {
-	default:
-		fatal("ginscall: bad proc %d", proc);
-		break;
-
-	case 0:	// normal call
-	case -1:	// normal call but no return
-		if(f->op == ONAME && f->class == PFUNC) {
-			if(f == deferreturn) {
-				// Deferred calls will appear to be returning to
-				// the CALL deferreturn(SB) that we are about to emit.
-				// However, the stack trace code will show the line
-				// of the instruction byte before the return PC. 
-				// To avoid that being an unrelated instruction,
-				// insert an x86 NOP that we will have the right line number.
-				// x86 NOP 0x90 is really XCHG AX, AX; use that description
-				// because the NOP pseudo-instruction would be removed by
-				// the linker.
-				nodreg(&reg, types[TINT], REG_AX);
-				gins(AXCHGL, &reg, &reg);
-			}
-			p = gins(ACALL, N, f);
-			afunclit(&p->to, f);
-			if(proc == -1 || noreturn(p))
-				gins(AUNDEF, N, N);
-			break;
-		}
-		nodreg(&reg, types[tptr], REG_DX);
-		nodreg(&r1, types[tptr], REG_BX);
-		gmove(f, &reg);
-		reg.op = OINDREG;
-		gmove(&reg, &r1);
-		reg.op = OREGISTER;
-		gins(ACALL, &reg, &r1);
-		break;
-	
-	case 3:	// normal call of c function pointer
-		gins(ACALL, N, f);
-		break;
-
-	case 1:	// call in new proc (go)
-	case 2:	// deferred call (defer)
-		memset(&stk, 0, sizeof(stk));
-		stk.op = OINDREG;
-		stk.val.u.reg = REG_SP;
-		stk.xoffset = 0;
-
-		if(widthptr == 8) {
-			// size of arguments at 0(SP)
-			ginscon(AMOVQ, argsize(f->type), &stk);
-
-			// FuncVal* at 8(SP)
-			stk.xoffset = widthptr;
-			nodreg(&reg, types[TINT64], REG_AX);
-			gmove(f, &reg);
-			gins(AMOVQ, &reg, &stk);
-		} else {
-			// size of arguments at 0(SP)
-			ginscon(AMOVL, argsize(f->type), &stk);
-
-			// FuncVal* at 4(SP)
-			stk.xoffset = widthptr;
-			nodreg(&reg, types[TINT32], REG_AX);
-			gmove(f, &reg);
-			gins(AMOVL, &reg, &stk);
-		}
-
-		if(proc == 1)
-			ginscall(newproc, 0);
-		else {
-			if(!hasdefer)
-				fatal("hasdefer=0 but has defer");
-			ginscall(deferproc, 0);
-		}
-		if(proc == 2) {
-			nodreg(&reg, types[TINT32], REG_AX);
-			gins(ATESTL, &reg, &reg);
-			p = gbranch(AJEQ, T, +1);
-			cgen_ret(N);
-			patch(p, pc);
-		}
-		break;
-	}
-}
-
-/*
- * n is call to interface method.
- * generate res = n.
- */
-void
-cgen_callinter(Node *n, Node *res, int proc)
-{
-	Node *i, *f;
-	Node tmpi, nodi, nodo, nodr, nodsp;
-
-	i = n->left;
-	if(i->op != ODOTINTER)
-		fatal("cgen_callinter: not ODOTINTER %O", i->op);
-
-	f = i->right;		// field
-	if(f->op != ONAME)
-		fatal("cgen_callinter: not ONAME %O", f->op);
-
-	i = i->left;		// interface
-
-	if(!i->addable) {
-		tempname(&tmpi, i->type);
-		cgen(i, &tmpi);
-		i = &tmpi;
-	}
-
-	genlist(n->list);		// assign the args
-
-	// i is now addable, prepare an indirected
-	// register to hold its address.
-	igen(i, &nodi, res);		// REG = &inter
-
-	nodindreg(&nodsp, types[tptr], REG_SP);
-	nodsp.xoffset = 0;
-	if(proc != 0)
-		nodsp.xoffset += 2 * widthptr; // leave room for size & fn
-	nodi.type = types[tptr];
-	nodi.xoffset += widthptr;
-	cgen(&nodi, &nodsp);	// {0, 8(nacl), or 16}(SP) = 8(REG) -- i.data
-
-	regalloc(&nodo, types[tptr], res);
-	nodi.type = types[tptr];
-	nodi.xoffset -= widthptr;
-	cgen(&nodi, &nodo);	// REG = 0(REG) -- i.tab
-	regfree(&nodi);
-
-	regalloc(&nodr, types[tptr], &nodo);
-	if(n->left->xoffset == BADWIDTH)
-		fatal("cgen_callinter: badwidth");
-	cgen_checknil(&nodo); // in case offset is huge
-	nodo.op = OINDREG;
-	nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
-	if(proc == 0) {
-		// plain call: use direct c function pointer - more efficient
-		cgen(&nodo, &nodr);	// REG = 32+offset(REG) -- i.tab->fun[f]
-		proc = 3;
-	} else {
-		// go/defer. generate go func value.
-		gins(ALEAQ, &nodo, &nodr);	// REG = &(32+offset(REG)) -- i.tab->fun[f]
-	}
-
-	nodr.type = n->left->type;
-	ginscall(&nodr, proc);
-
-	regfree(&nodr);
-	regfree(&nodo);
-}
-
-/*
- * generate function call;
- *	proc=0	normal call
- *	proc=1	goroutine run in new proc
- *	proc=2	defer call save away stack
- */
-void
-cgen_call(Node *n, int proc)
-{
-	Type *t;
-	Node nod, afun;
-
-	if(n == N)
-		return;
-
-	if(n->left->ullman >= UINF) {
-		// if name involves a fn call
-		// precompute the address of the fn
-		tempname(&afun, types[tptr]);
-		cgen(n->left, &afun);
-	}
-
-	genlist(n->list);		// assign the args
-	t = n->left->type;
-
-	// call tempname pointer
-	if(n->left->ullman >= UINF) {
-		regalloc(&nod, types[tptr], N);
-		cgen_as(&nod, &afun);
-		nod.type = t;
-		ginscall(&nod, proc);
-		regfree(&nod);
-		return;
-	}
-
-	// call pointer
-	if(n->left->op != ONAME || n->left->class != PFUNC) {
-		regalloc(&nod, types[tptr], N);
-		cgen_as(&nod, n->left);
-		nod.type = t;
-		ginscall(&nod, proc);
-		regfree(&nod);
-		return;
-	}
-
-	// call direct
-	n->left->method = 1;
-	ginscall(n->left, proc);
-}
-
-/*
- * call to n has already been generated.
- * generate:
- *	res = return value from call.
- */
-void
-cgen_callret(Node *n, Node *res)
-{
-	Node nod;
-	Type *fp, *t;
-	Iter flist;
-
-	t = n->left->type;
-	if(t->etype == TPTR32 || t->etype == TPTR64)
-		t = t->type;
-
-	fp = structfirst(&flist, getoutarg(t));
-	if(fp == T)
-		fatal("cgen_callret: nil");
-
-	memset(&nod, 0, sizeof(nod));
-	nod.op = OINDREG;
-	nod.val.u.reg = REG_SP;
-	nod.addable = 1;
-
-	nod.xoffset = fp->width;
-	nod.type = fp->type;
-	cgen_as(res, &nod);
-}
-
-/*
- * call to n has already been generated.
- * generate:
- *	res = &return value from call.
- */
-void
-cgen_aret(Node *n, Node *res)
-{
-	Node nod1, nod2;
-	Type *fp, *t;
-	Iter flist;
-
-	t = n->left->type;
-	if(isptr[t->etype])
-		t = t->type;
-
-	fp = structfirst(&flist, getoutarg(t));
-	if(fp == T)
-		fatal("cgen_aret: nil");
-
-	memset(&nod1, 0, sizeof(nod1));
-	nod1.op = OINDREG;
-	nod1.val.u.reg = REG_SP;
-	nod1.addable = 1;
-
-	nod1.xoffset = fp->width;
-	nod1.type = fp->type;
-
-	if(res->op != OREGISTER) {
-		regalloc(&nod2, types[tptr], res);
-		gins(leaptr, &nod1, &nod2);
-		gins(movptr, &nod2, res);
-		regfree(&nod2);
-	} else
-		gins(leaptr, &nod1, res);
-}
-
-/*
- * generate return.
- * n->left is assignments to return values.
- */
-void
-cgen_ret(Node *n)
-{
-	Prog *p;
-
-	if(n != N)
-		genlist(n->list);		// copy out args
-	if(hasdefer)
-		ginscall(deferreturn, 0);
-	genlist(curfn->exit);
-	p = gins(ARET, N, N);
-	if(n != N && n->op == ORETJMP) {
-		p->to.type = TYPE_MEM;
-		p->to.name = NAME_EXTERN;
-		p->to.sym = linksym(n->left->sym);
-	}
-}
-
-/*
- * generate division.
- * generates one of:
- *	res = nl / nr
- *	res = nl % nr
- * according to op.
- */
-void
-dodiv(int op, Node *nl, Node *nr, Node *res)
-{
-	int a, check;
-	Node n3, n4;
-	Type *t, *t0;
-	Node ax, dx, ax1, n31, oldax, olddx;
-	Prog *p1, *p2;
-
-	// Have to be careful about handling
-	// most negative int divided by -1 correctly.
-	// The hardware will trap.
-	// Also the byte divide instruction needs AH,
-	// which we otherwise don't have to deal with.
-	// Easiest way to avoid for int8, int16: use int32.
-	// For int32 and int64, use explicit test.
-	// Could use int64 hw for int32.
-	t = nl->type;
-	t0 = t;
-	check = 0;
-	if(issigned[t->etype]) {
-		check = 1;
-		if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -(1ULL<<(t->width*8-1)))
-			check = 0;
-		else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
-			check = 0;
-	}
-	if(t->width < 4) {
-		if(issigned[t->etype])
-			t = types[TINT32];
-		else
-			t = types[TUINT32];
-		check = 0;
-	}
-	a = optoas(op, t);
-
-	regalloc(&n3, t0, N);
-	if(nl->ullman >= nr->ullman) {
-		savex(REG_AX, &ax, &oldax, res, t0);
-		cgen(nl, &ax);
-		regalloc(&ax, t0, &ax);	// mark ax live during cgen
-		cgen(nr, &n3);
-		regfree(&ax);
-	} else {
-		cgen(nr, &n3);
-		savex(REG_AX, &ax, &oldax, res, t0);
-		cgen(nl, &ax);
-	}
-	if(t != t0) {
-		// Convert
-		ax1 = ax;
-		n31 = n3;
-		ax.type = t;
-		n3.type = t;
-		gmove(&ax1, &ax);
-		gmove(&n31, &n3);
-	}
-
-	p2 = P;
-	if(nacl) {
-		// Native Client does not relay the divide-by-zero trap
-		// to the executing program, so we must insert a check
-		// for ourselves.
-		nodconst(&n4, t, 0);
-		gins(optoas(OCMP, t), &n3, &n4);
-		p1 = gbranch(optoas(ONE, t), T, +1);
-		if(panicdiv == N)
-			panicdiv = sysfunc("panicdivide");
-		ginscall(panicdiv, -1);
-		patch(p1, pc);
-	}
-	if(check) {
-		nodconst(&n4, t, -1);
-		gins(optoas(OCMP, t), &n3, &n4);
-		p1 = gbranch(optoas(ONE, t), T, +1);
-		if(op == ODIV) {
-			// a / (-1) is -a.
-			gins(optoas(OMINUS, t), N, &ax);
-			gmove(&ax, res);
-		} else {
-			// a % (-1) is 0.
-			nodconst(&n4, t, 0);
-			gmove(&n4, res);
-		}
-		p2 = gbranch(AJMP, T, 0);
-		patch(p1, pc);
-	}
-	savex(REG_DX, &dx, &olddx, res, t);
-	if(!issigned[t->etype]) {
-		nodconst(&n4, t, 0);
-		gmove(&n4, &dx);
-	} else
-		gins(optoas(OEXTEND, t), N, N);
-	gins(a, &n3, N);
-	regfree(&n3);
-	if(op == ODIV)
-		gmove(&ax, res);
-	else
-		gmove(&dx, res);
-	restx(&dx, &olddx);
-	if(check)
-		patch(p2, pc);
-	restx(&ax, &oldax);
-}
-
-/*
- * register dr is one of the special ones (AX, CX, DI, SI, etc.).
- * we need to use it.  if it is already allocated as a temporary
- * (r > 1; can only happen if a routine like sgen passed a
- * special as cgen's res and then cgen used regalloc to reuse
- * it as its own temporary), then move it for now to another
- * register.  caller must call restx to move it back.
- * the move is not necessary if dr == res, because res is
- * known to be dead.
- */
-void
-savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
-{
-	int r;
-
-	r = reg[dr];
-
-	// save current ax and dx if they are live
-	// and not the destination
-	memset(oldx, 0, sizeof *oldx);
-	nodreg(x, t, dr);
-	if(r > 1 && !samereg(x, res)) {
-		regalloc(oldx, types[TINT64], N);
-		x->type = types[TINT64];
-		gmove(x, oldx);
-		x->type = t;
-		oldx->ostk = r;	// squirrel away old r value
-		reg[dr] = 1;
-	}
-}
-
-void
-restx(Node *x, Node *oldx)
-{
-	if(oldx->op != 0) {
-		x->type = types[TINT64];
-		reg[x->val.u.reg] = oldx->ostk;
-		gmove(oldx, x);
-		regfree(oldx);
-	}
-}
-
-/*
- * generate division according to op, one of:
- *	res = nl / nr
- *	res = nl % nr
- */
-void
-cgen_div(int op, Node *nl, Node *nr, Node *res)
-{
-	Node n1, n2, n3;
-	int w, a;
-	Magic m;
-
-	if(nr->op != OLITERAL)
-		goto longdiv;
-	w = nl->type->width*8;
-
-	// Front end handled 32-bit division. We only need to handle 64-bit.
-	// try to do division by multiply by (2^w)/d
-	// see hacker's delight chapter 10
-	switch(simtype[nl->type->etype]) {
-	default:
-		goto longdiv;
-
-	case TUINT64:
-		m.w = w;
-		m.ud = mpgetfix(nr->val.u.xval);
-		umagic(&m);
-		if(m.bad)
-			break;
-		if(op == OMOD)
-			goto longmod;
-
-		cgenr(nl, &n1, N);
-		nodconst(&n2, nl->type, m.um);
-		regalloc(&n3, nl->type, res);
-		cgen_hmul(&n1, &n2, &n3);
-
-		if(m.ua) {
-			// need to add numerator accounting for overflow
-			gins(optoas(OADD, nl->type), &n1, &n3);
-			nodconst(&n2, nl->type, 1);
-			gins(optoas(ORROTC, nl->type), &n2, &n3);
-			nodconst(&n2, nl->type, m.s-1);
-			gins(optoas(ORSH, nl->type), &n2, &n3);
-		} else {
-			nodconst(&n2, nl->type, m.s);
-			gins(optoas(ORSH, nl->type), &n2, &n3);	// shift dx
-		}
-
-		gmove(&n3, res);
-		regfree(&n1);
-		regfree(&n3);
-		return;
-
-	case TINT64:
-		m.w = w;
-		m.sd = mpgetfix(nr->val.u.xval);
-		smagic(&m);
-		if(m.bad)
-			break;
-		if(op == OMOD)
-			goto longmod;
-
-		cgenr(nl, &n1, res);
-		nodconst(&n2, nl->type, m.sm);
-		regalloc(&n3, nl->type, N);
-		cgen_hmul(&n1, &n2, &n3);
-
-		if(m.sm < 0) {
-			// need to add numerator
-			gins(optoas(OADD, nl->type), &n1, &n3);
-		}
-
-		nodconst(&n2, nl->type, m.s);
-		gins(optoas(ORSH, nl->type), &n2, &n3);	// shift n3
-
-		nodconst(&n2, nl->type, w-1);
-		gins(optoas(ORSH, nl->type), &n2, &n1);	// -1 iff num is neg
-		gins(optoas(OSUB, nl->type), &n1, &n3);	// added
-
-		if(m.sd < 0) {
-			// this could probably be removed
-			// by factoring it into the multiplier
-			gins(optoas(OMINUS, nl->type), N, &n3);
-		}
-
-		gmove(&n3, res);
-		regfree(&n1);
-		regfree(&n3);
-		return;
-	}
-	goto longdiv;
-
-longdiv:
-	// division and mod using (slow) hardware instruction
-	dodiv(op, nl, nr, res);
-	return;
-
-longmod:
-	// mod using formula A%B = A-(A/B*B) but
-	// we know that there is a fast algorithm for A/B
-	regalloc(&n1, nl->type, res);
-	cgen(nl, &n1);
-	regalloc(&n2, nl->type, N);
-	cgen_div(ODIV, &n1, nr, &n2);
-	a = optoas(OMUL, nl->type);
-	if(w == 8) {
-		// use 2-operand 16-bit multiply
-		// because there is no 2-operand 8-bit multiply
-		a = AIMULW;
-	}
-	if(!smallintconst(nr)) {
-		regalloc(&n3, nl->type, N);
-		cgen(nr, &n3);
-		gins(a, &n3, &n2);
-		regfree(&n3);
-	} else
-		gins(a, nr, &n2);
-	gins(optoas(OSUB, nl->type), &n2, &n1);
-	gmove(&n1, res);
-	regfree(&n1);
-	regfree(&n2);
-}
-
-/*
- * generate high multiply:
- *   res = (nl*nr) >> width
- */
-void
-cgen_hmul(Node *nl, Node *nr, Node *res)
-{
-	Type *t;
-	int a;
-	Node n1, n2, ax, dx, *tmp;
-
-	t = nl->type;
-	a = optoas(OHMUL, t);
-	if(nl->ullman < nr->ullman) {
-		tmp = nl;
-		nl = nr;
-		nr = tmp;
-	}
-	cgenr(nl, &n1, res);
-	cgenr(nr, &n2, N);
-	nodreg(&ax, t, REG_AX);
-	gmove(&n1, &ax);
-	gins(a, &n2, N);
-	regfree(&n2);
-	regfree(&n1);
-
-	if(t->width == 1) {
-		// byte multiply behaves differently.
-		nodreg(&ax, t, REG_AH);
-		nodreg(&dx, t, REG_DX);
-		gmove(&ax, &dx);
-	}
-	nodreg(&dx, t, REG_DX);
-	gmove(&dx, res);
-}
-
-/*
- * generate shift according to op, one of:
- *	res = nl << nr
- *	res = nl >> nr
- */
-void
-cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
-{
-	Node n1, n2, n3, n4, n5, cx, oldcx;
-	int a, rcx;
-	Prog *p1;
-	uvlong sc;
-	Type *tcount;
-
-	a = optoas(op, nl->type);
-
-	if(nr->op == OLITERAL) {
-		regalloc(&n1, nl->type, res);
-		cgen(nl, &n1);
-		sc = mpgetfix(nr->val.u.xval);
-		if(sc >= nl->type->width*8) {
-			// large shift gets 2 shifts by width-1
-			nodconst(&n3, types[TUINT32], nl->type->width*8-1);
-			gins(a, &n3, &n1);
-			gins(a, &n3, &n1);
-		} else
-			gins(a, nr, &n1);
-		gmove(&n1, res);
-		regfree(&n1);
-		goto ret;
-	}
-
-	if(nl->ullman >= UINF) {
-		tempname(&n4, nl->type);
-		cgen(nl, &n4);
-		nl = &n4;
-	}
-	if(nr->ullman >= UINF) {
-		tempname(&n5, nr->type);
-		cgen(nr, &n5);
-		nr = &n5;
-	}
-
-	rcx = reg[REG_CX];
-	nodreg(&n1, types[TUINT32], REG_CX);
-	
-	// Allow either uint32 or uint64 as shift type,
-	// to avoid unnecessary conversion from uint32 to uint64
-	// just to do the comparison.
-	tcount = types[simtype[nr->type->etype]];
-	if(tcount->etype < TUINT32)
-		tcount = types[TUINT32];
-
-	regalloc(&n1, nr->type, &n1);		// to hold the shift type in CX
-	regalloc(&n3, tcount, &n1);	// to clear high bits of CX
-
-	nodreg(&cx, types[TUINT64], REG_CX);
-	memset(&oldcx, 0, sizeof oldcx);
-	if(rcx > 0 && !samereg(&cx, res)) {
-		regalloc(&oldcx, types[TUINT64], N);
-		gmove(&cx, &oldcx);
-	}
-	cx.type = tcount;
-
-	if(samereg(&cx, res))
-		regalloc(&n2, nl->type, N);
-	else
-		regalloc(&n2, nl->type, res);
-	if(nl->ullman >= nr->ullman) {
-		cgen(nl, &n2);
-		cgen(nr, &n1);
-		gmove(&n1, &n3);
-	} else {
-		cgen(nr, &n1);
-		gmove(&n1, &n3);
-		cgen(nl, &n2);
-	}
-	regfree(&n3);
-
-	// test and fix up large shifts
-	if(!bounded) {
-		nodconst(&n3, tcount, nl->type->width*8);
-		gins(optoas(OCMP, tcount), &n1, &n3);
-		p1 = gbranch(optoas(OLT, tcount), T, +1);
-		if(op == ORSH && issigned[nl->type->etype]) {
-			nodconst(&n3, types[TUINT32], nl->type->width*8-1);
-			gins(a, &n3, &n2);
-		} else {
-			nodconst(&n3, nl->type, 0);
-			gmove(&n3, &n2);
-		}
-		patch(p1, pc);
-	}
-
-	gins(a, &n1, &n2);
-
-	if(oldcx.op != 0) {
-		cx.type = types[TUINT64];
-		gmove(&oldcx, &cx);
-		regfree(&oldcx);
-	}
-
-	gmove(&n2, res);
-
-	regfree(&n1);
-	regfree(&n2);
-
-ret:
-	;
-}
-
-/*
- * generate byte multiply:
- *	res = nl * nr
- * there is no 2-operand byte multiply instruction so
- * we do a full-width multiplication and truncate afterwards.
- */
-void
-cgen_bmul(int op, Node *nl, Node *nr, Node *res)
-{
-	Node n1, n2, n1b, n2b, *tmp;
-	Type *t;
-	int a;
-
-	// largest ullman on left.
-	if(nl->ullman < nr->ullman) {
-		tmp = nl;
-		nl = nr;
-		nr = tmp;
-	}
-
-	// generate operands in "8-bit" registers.
-	regalloc(&n1b, nl->type, res);
-	cgen(nl, &n1b);
-	regalloc(&n2b, nr->type, N);
-	cgen(nr, &n2b);
-
-	// perform full-width multiplication.
-	t = types[TUINT64];
-	if(issigned[nl->type->etype])
-		t = types[TINT64];
-	nodreg(&n1, t, n1b.val.u.reg);
-	nodreg(&n2, t, n2b.val.u.reg);
-	a = optoas(op, t);
-	gins(a, &n2, &n1);
-
-	// truncate.
-	gmove(&n1, res);
-	regfree(&n1b);
-	regfree(&n2b);
-}
-
-void
-clearfat(Node *nl)
-{
-	int64 w, c, q;
-	Node n1, oldn1, ax, oldax, di, z;
-	Prog *p;
-
-	/* clear a fat object */
-	if(debug['g'])
-		dump("\nclearfat", nl);
-
-	w = nl->type->width;
-	// Avoid taking the address for simple enough types.
-	if(componentgen(N, nl))
-		return;
-
-	c = w % 8;	// bytes
-	q = w / 8;	// quads
-
-	if(q < 4) {
-		// Write sequence of MOV 0, off(base) instead of using STOSQ.
-		// The hope is that although the code will be slightly longer,
-		// the MOVs will have no dependencies and pipeline better
-		// than the unrolled STOSQ loop.
-		// NOTE: Must use agen, not igen, so that optimizer sees address
-		// being taken. We are not writing on field boundaries.
-		agenr(nl, &n1, N);
-		n1.op = OINDREG;
-		nodconst(&z, types[TUINT64], 0);
-		while(q-- > 0) {
-			n1.type = z.type;
-			gins(AMOVQ, &z, &n1);
-			n1.xoffset += 8;
-		}
-		if(c >= 4) {
-			nodconst(&z, types[TUINT32], 0);
-			n1.type = z.type;
-			gins(AMOVL, &z, &n1);
-			n1.xoffset += 4;
-			c -= 4;
-		}
-		nodconst(&z, types[TUINT8], 0);
-		while(c-- > 0) {
-			n1.type = z.type;
-			gins(AMOVB, &z, &n1);
-			n1.xoffset++;
-		}
-		regfree(&n1);
-		return;
-	}
-
-	savex(REG_DI, &n1, &oldn1, N, types[tptr]);
-	agen(nl, &n1);
-
-	savex(REG_AX, &ax, &oldax, N, types[tptr]);
-	gconreg(AMOVL, 0, REG_AX);
-
-	if(q > 128 || nacl) {
-		gconreg(movptr, q, REG_CX);
-		gins(AREP, N, N);	// repeat
-		gins(ASTOSQ, N, N);	// STOQ AL,*(DI)+
-	} else {
-		p = gins(ADUFFZERO, N, N);
-		p->to.type = TYPE_ADDR;
-		p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
-		// 2 and 128 = magic constants: see ../../runtime/asm_amd64.s
-		p->to.offset = 2*(128-q);
-	}
-
-	z = ax;
-	di = n1;
-	if(w >= 8 && c >= 4) {
-		di.op = OINDREG;
-		di.type = z.type = types[TINT64];
-		p = gins(AMOVQ, &z, &di);
-		p->to.scale = 1;
-		p->to.offset = c-8;
-	} else if(c >= 4) {
-		di.op = OINDREG;
-		di.type = z.type = types[TINT32];
-		p = gins(AMOVL, &z, &di);
-		if(c > 4) {
-			p = gins(AMOVL, &z, &di);
-			p->to.scale = 1;
-			p->to.offset = c-4;
-		}
-	} else
-	while(c > 0) {
-		gins(ASTOSB, N, N);	// STOB AL,*(DI)+
-		c--;
-	}
-
-	restx(&n1, &oldn1);
-	restx(&ax, &oldax);
-}
-
-// Called after regopt and peep have run.
-// Expand CHECKNIL pseudo-op into actual nil pointer check.
-void
-expandchecks(Prog *firstp)
-{
-	Prog *p, *p1, *p2;
-
-	for(p = firstp; p != P; p = p->link) {
-		if(p->as != ACHECKNIL)
-			continue;
-		if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
-			warnl(p->lineno, "generated nil check");
-		// check is
-		//	CMP arg, $0
-		//	JNE 2(PC) (likely)
-		//	MOV AX, 0
-		p1 = mal(sizeof *p1);
-		p2 = mal(sizeof *p2);
-		clearp(p1);
-		clearp(p2);
-		p1->link = p2;
-		p2->link = p->link;
-		p->link = p1;
-		p1->lineno = p->lineno;
-		p2->lineno = p->lineno;
-		p1->pc = 9999;
-		p2->pc = 9999;
-		p->as = cmpptr;
-		p->to.type = TYPE_CONST;
-		p->to.offset = 0;
-		p1->as = AJNE;
-		p1->from.type = TYPE_CONST;
-		p1->from.offset = 1; // likely
-		p1->to.type = TYPE_BRANCH;
-		p1->to.u.branch = p2->link;
-		// crash by write to memory address 0.
-		// if possible, since we know arg is 0, use 0(arg),
-		// which will be shorter to encode than plain 0.
-		p2->as = AMOVL;
-		p2->from.type = TYPE_REG;
-		p2->from.reg = REG_AX;
-		if(regtyp(&p->from)) {
-			p2->to.type = TYPE_MEM;
-			p2->to.reg = p->from.reg;
-		} else {
-			p2->to.type = TYPE_MEM;
-			p2->to.reg = REG_NONE;
-		}
-		p2->to.offset = 0;
-	}
-}