diff options
Diffstat (limited to 'src/cmd/gc/popt.c')
-rw-r--r-- | src/cmd/gc/popt.c | 1022 |
1 files changed, 0 insertions, 1022 deletions
diff --git a/src/cmd/gc/popt.c b/src/cmd/gc/popt.c deleted file mode 100644 index b02d58e663..0000000000 --- a/src/cmd/gc/popt.c +++ /dev/null @@ -1,1022 +0,0 @@ -// Derived from Inferno utils/6c/reg.c -// http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c -// -// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. -// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) -// Portions Copyright © 1997-1999 Vita Nuova Limited -// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) -// Portions Copyright © 2004,2006 Bruce Ellis -// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) -// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others -// Portions Copyright © 2009 The Go Authors. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -// "Portable" optimizations. -// Compiled separately for 5g, 6g, and 8g, so allowed to use gg.h, opt.h. -// Must code to the intersection of the three back ends. - -#include <u.h> -#include <libc.h> -#include "go.h" -#include "popt.h" - -// p is a call instruction. Does the call fail to return? -int -noreturn(Prog *p) -{ - Sym *s; - int i; - static Sym* symlist[10]; - - if(symlist[0] == S) { - symlist[0] = pkglookup("panicindex", runtimepkg); - symlist[1] = pkglookup("panicslice", runtimepkg); - symlist[2] = pkglookup("throwinit", runtimepkg); - symlist[3] = pkglookup("gopanic", runtimepkg); - symlist[4] = pkglookup("panicwrap", runtimepkg); - symlist[5] = pkglookup("throwreturn", runtimepkg); - symlist[6] = pkglookup("selectgo", runtimepkg); - symlist[7] = pkglookup("block", runtimepkg); - } - - if(p->to.node == nil) - return 0; - s = ((Node*)(p->to.node))->sym; - if(s == S) - return 0; - for(i=0; symlist[i]!=S; i++) - if(s == symlist[i]) - return 1; - return 0; -} - -// JMP chasing and removal. -// -// The code generator depends on being able to write out jump -// instructions that it can jump to now but fill in later. -// the linker will resolve them nicely, but they make the code -// longer and more difficult to follow during debugging. -// Remove them. - -/* what instruction does a JMP to p eventually land on? */ -static Prog* -chasejmp(Prog *p, int *jmploop) -{ - int n; - - n = 0; - while(p != P && p->as == AJMP && p->to.type == TYPE_BRANCH) { - if(++n > 10) { - *jmploop = 1; - break; - } - p = p->to.u.branch; - } - return p; -} - -/* - * reuse reg pointer for mark/sweep state. - * leave reg==nil at end because alive==nil. - */ -#define alive ((void*)0) -#define dead ((void*)1) -/*c2go -extern void *alive; -extern void *dead; -*/ - -/* mark all code reachable from firstp as alive */ -static void -mark(Prog *firstp) -{ - Prog *p; - - for(p=firstp; p; p=p->link) { - if(p->opt != dead) - break; - p->opt = alive; - if(p->as != ACALL && p->to.type == TYPE_BRANCH && p->to.u.branch) - mark(p->to.u.branch); - if(p->as == AJMP || p->as == ARET || p->as == AUNDEF) - break; - } -} - -void -fixjmp(Prog *firstp) -{ - int jmploop; - Prog *p, *last; - - if(debug['R'] && debug['v']) - print("\nfixjmp\n"); - - // pass 1: resolve jump to jump, mark all code as dead. - jmploop = 0; - for(p=firstp; p; p=p->link) { - if(debug['R'] && debug['v']) - print("%P\n", p); - if(p->as != ACALL && p->to.type == TYPE_BRANCH && p->to.u.branch && p->to.u.branch->as == AJMP) { - p->to.u.branch = chasejmp(p->to.u.branch, &jmploop); - if(debug['R'] && debug['v']) - print("->%P\n", p); - } - p->opt = dead; - } - if(debug['R'] && debug['v']) - print("\n"); - - // pass 2: mark all reachable code alive - mark(firstp); - - // pass 3: delete dead code (mostly JMPs). - last = nil; - for(p=firstp; p; p=p->link) { - if(p->opt == dead) { - if(p->link == P && p->as == ARET && last && last->as != ARET) { - // This is the final ARET, and the code so far doesn't have one. - // Let it stay. The register allocator assumes that all live code in - // the function can be traversed by starting at all the RET instructions - // and following predecessor links. If we remove the final RET, - // this assumption will not hold in the case of an infinite loop - // at the end of a function. - // Keep the RET but mark it dead for the liveness analysis. - p->mode = 1; - } else { - if(debug['R'] && debug['v']) - print("del %P\n", p); - continue; - } - } - if(last) - last->link = p; - last = p; - } - last->link = P; - - // pass 4: elide JMP to next instruction. - // only safe if there are no jumps to JMPs anymore. - if(!jmploop) { - last = nil; - for(p=firstp; p; p=p->link) { - if(p->as == AJMP && p->to.type == TYPE_BRANCH && p->to.u.branch == p->link) { - if(debug['R'] && debug['v']) - print("del %P\n", p); - continue; - } - if(last) - last->link = p; - last = p; - } - last->link = P; - } - - if(debug['R'] && debug['v']) { - print("\n"); - for(p=firstp; p; p=p->link) - print("%P\n", p); - print("\n"); - } -} - -#undef alive -#undef dead - -// Control flow analysis. The Flow structures hold predecessor and successor -// information as well as basic loop analysis. -// -// graph = flowstart(firstp, 0); -// ... use flow graph ... -// flowend(graph); // free graph -// -// Typical uses of the flow graph are to iterate over all the flow-relevant instructions: -// -// for(f = graph->start; f != nil; f = f->link) -// -// or, given an instruction f, to iterate over all the predecessors, which is -// f->p1 and this list: -// -// for(f2 = f->p2; f2 != nil; f2 = f2->p2link) -// -// The size argument to flowstart specifies an amount of zeroed memory -// to allocate in every f->data field, for use by the client. -// If size == 0, f->data will be nil. - -Graph* -flowstart(Prog *firstp, int size) -{ - int id, nf; - Flow *f, *f1, *start, *last; - Graph *graph; - Prog *p; - ProgInfo info; - char *data; - - // Count and mark instructions to annotate. - nf = 0; - for(p = firstp; p != P; p = p->link) { - p->opt = nil; // should be already, but just in case - thearch.proginfo(&info, p); - if(info.flags & Skip) - continue; - p->opt = (void*)1; - nf++; - } - - if(nf == 0) - return nil; - - if(nf >= 20000) { - // fatal("%S is too big (%d instructions)", curfn->nname->sym, nf); - return nil; - } - - // Allocate annotations and assign to instructions. - graph = calloc(sizeof *graph + sizeof(Flow)*nf + size*nf, 1); - if(graph == nil) - fatal("out of memory"); - start = (Flow*)(graph+1); - last = nil; - f = start; - data = (char*)(f+nf); - if(size == 0) - data = nil; - id = 0; - for(p = firstp; p != P; p = p->link) { - if(p->opt == nil) - continue; - p->opt = f; - f->prog = p; - if(last) - last->link = f; - last = f; - f->data = data; - f->id = id; - f++; - id++; - data += size; - } - - // Fill in pred/succ information. - for(f = start; f != nil; f = f->link) { - p = f->prog; - thearch.proginfo(&info, p); - if(!(info.flags & Break)) { - f1 = f->link; - f->s1 = f1; - f1->p1 = f; - } - if(p->to.type == TYPE_BRANCH) { - if(p->to.u.branch == P) - fatal("pnil %P", p); - f1 = p->to.u.branch->opt; - if(f1 == nil) - fatal("fnil %P / %P", p, p->to.u.branch); - if(f1 == f) { - //fatal("self loop %P", p); - continue; - } - f->s2 = f1; - f->p2link = f1->p2; - f1->p2 = f; - } - } - - graph->start = start; - graph->num = nf; - return graph; -} - -void -flowend(Graph *graph) -{ - Flow *f; - - for(f = graph->start; f != nil; f = f->link) - f->prog->opt = nil; - free(graph); -} - -/* - * find looping structure - * - * 1) find reverse postordering - * 2) find approximate dominators, - * the actual dominators if the flow graph is reducible - * otherwise, dominators plus some other non-dominators. - * See Matthew S. Hecht and Jeffrey D. Ullman, - * "Analysis of a Simple Algorithm for Global Data Flow Problems", - * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, - * Oct. 1-3, 1973, pp. 207-217. - * 3) find all nodes with a predecessor dominated by the current node. - * such a node is a loop head. - * recursively, all preds with a greater rpo number are in the loop - */ -static int32 -postorder(Flow *r, Flow **rpo2r, int32 n) -{ - Flow *r1; - - r->rpo = 1; - r1 = r->s1; - if(r1 && !r1->rpo) - n = postorder(r1, rpo2r, n); - r1 = r->s2; - if(r1 && !r1->rpo) - n = postorder(r1, rpo2r, n); - rpo2r[n] = r; - n++; - return n; -} - -static int32 -rpolca(int32 *idom, int32 rpo1, int32 rpo2) -{ - int32 t; - - if(rpo1 == -1) - return rpo2; - while(rpo1 != rpo2){ - if(rpo1 > rpo2){ - t = rpo2; - rpo2 = rpo1; - rpo1 = t; - } - while(rpo1 < rpo2){ - t = idom[rpo2]; - if(t >= rpo2) - fatal("bad idom"); - rpo2 = t; - } - } - return rpo1; -} - -static int -doms(int32 *idom, int32 r, int32 s) -{ - while(s > r) - s = idom[s]; - return s == r; -} - -static int -loophead(int32 *idom, Flow *r) -{ - int32 src; - - src = r->rpo; - if(r->p1 != nil && doms(idom, src, r->p1->rpo)) - return 1; - for(r = r->p2; r != nil; r = r->p2link) - if(doms(idom, src, r->rpo)) - return 1; - return 0; -} - -static void -loopmark(Flow **rpo2r, int32 head, Flow *r) -{ - if(r->rpo < head || r->active == head) - return; - r->active = head; - r->loop += LOOP; - if(r->p1 != nil) - loopmark(rpo2r, head, r->p1); - for(r = r->p2; r != nil; r = r->p2link) - loopmark(rpo2r, head, r); -} - -void -flowrpo(Graph *g) -{ - Flow *r1; - int32 i, d, me, nr, *idom; - Flow **rpo2r; - - free(g->rpo); - g->rpo = calloc(g->num*sizeof g->rpo[0], 1); - idom = calloc(g->num*sizeof idom[0], 1); - if(g->rpo == nil || idom == nil) - fatal("out of memory"); - - for(r1 = g->start; r1 != nil; r1 = r1->link) - r1->active = 0; - - rpo2r = g->rpo; - d = postorder(g->start, rpo2r, 0); - nr = g->num; - if(d > nr) - fatal("too many reg nodes %d %d", d, nr); - nr = d; - for(i = 0; i < nr / 2; i++) { - r1 = rpo2r[i]; - rpo2r[i] = rpo2r[nr - 1 - i]; - rpo2r[nr - 1 - i] = r1; - } - for(i = 0; i < nr; i++) - rpo2r[i]->rpo = i; - - idom[0] = 0; - for(i = 0; i < nr; i++) { - r1 = rpo2r[i]; - me = r1->rpo; - d = -1; - // rpo2r[r->rpo] == r protects against considering dead code, - // which has r->rpo == 0. - if(r1->p1 != nil && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me) - d = r1->p1->rpo; - for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) - if(rpo2r[r1->rpo] == r1 && r1->rpo < me) - d = rpolca(idom, d, r1->rpo); - idom[i] = d; - } - - for(i = 0; i < nr; i++) { - r1 = rpo2r[i]; - r1->loop++; - if(r1->p2 != nil && loophead(idom, r1)) - loopmark(rpo2r, i, r1); - } - free(idom); - - for(r1 = g->start; r1 != nil; r1 = r1->link) - r1->active = 0; -} - -Flow* -uniqp(Flow *r) -{ - Flow *r1; - - r1 = r->p1; - if(r1 == nil) { - r1 = r->p2; - if(r1 == nil || r1->p2link != nil) - return nil; - } else - if(r->p2 != nil) - return nil; - return r1; -} - -Flow* -uniqs(Flow *r) -{ - Flow *r1; - - r1 = r->s1; - if(r1 == nil) { - r1 = r->s2; - if(r1 == nil) - return nil; - } else - if(r->s2 != nil) - return nil; - return r1; -} - -// The compilers assume they can generate temporary variables -// as needed to preserve the right semantics or simplify code -// generation and the back end will still generate good code. -// This results in a large number of ephemeral temporary variables. -// Merge temps with non-overlapping lifetimes and equal types using the -// greedy algorithm in Poletto and Sarkar, "Linear Scan Register Allocation", -// ACM TOPLAS 1999. - -typedef struct TempVar TempVar; - -struct TempVar -{ - Node *node; - Flow *def; // definition of temp var - Flow *use; // use list, chained through Flow.data - TempVar *freelink; // next free temp in Type.opt list - TempVar *merge; // merge var with this one - vlong start; // smallest Prog.pc in live range - vlong end; // largest Prog.pc in live range - uchar addr; // address taken - no accurate end - uchar removed; // removed from program -}; - -static int -startcmp(const void *va, const void *vb) -{ - TempVar *a, *b; - - a = *(TempVar**)va; - b = *(TempVar**)vb; - - if(a->start < b->start) - return -1; - if(a->start > b->start) - return +1; - // Order what's left by id or symbol name, - // just so that sort is forced into a specific ordering, - // so that the result of the sort does not depend on - // the sort implementation. - if(a->def != b->def) - return a->def->id - b->def->id; - if(a->node != b->node) - return strcmp(a->node->sym->name, b->node->sym->name); - return 0; -} - -// Is n available for merging? -static int -canmerge(Node *n) -{ - return n->class == PAUTO && strncmp(n->sym->name, "autotmp", 7) == 0; -} - -static void mergewalk(TempVar*, Flow*, uint32); -static void varkillwalk(TempVar*, Flow*, uint32); - -void -mergetemp(Prog *firstp) -{ - int i, j, nvar, ninuse, nfree, nkill; - TempVar *var, *v, *v1, **bystart, **inuse; - Flow *f; - NodeList *l, **lp; - Node *n; - Prog *p, *p1; - Type *t; - ProgInfo info, info1; - int32 gen; - Graph *g; - - enum { debugmerge = 1 }; - - g = flowstart(firstp, 0); - if(g == nil) - return; - - // Build list of all mergeable variables. - nvar = 0; - for(l = curfn->dcl; l != nil; l = l->next) - if(canmerge(l->n)) - nvar++; - - var = calloc(nvar*sizeof var[0], 1); - nvar = 0; - for(l = curfn->dcl; l != nil; l = l->next) { - n = l->n; - if(canmerge(n)) { - v = &var[nvar++]; - n->opt = v; - v->node = n; - } - } - - // Build list of uses. - // We assume that the earliest reference to a temporary is its definition. - // This is not true of variables in general but our temporaries are all - // single-use (that's why we have so many!). - for(f = g->start; f != nil; f = f->link) { - p = f->prog; - thearch.proginfo(&info, p); - - if(p->from.node != N && ((Node*)(p->from.node))->opt && p->to.node != N && ((Node*)(p->to.node))->opt) - fatal("double node %P", p); - v = nil; - if((n = p->from.node) != N) - v = n->opt; - if(v == nil && (n = p->to.node) != N) - v = n->opt; - if(v != nil) { - if(v->def == nil) - v->def = f; - f->data = v->use; - v->use = f; - if(n == p->from.node && (info.flags & LeftAddr)) - v->addr = 1; - } - } - - if(debugmerge > 1 && debug['v']) - dumpit("before", g->start, 0); - - nkill = 0; - - // Special case. - for(i = 0; i < nvar; i++) { - v = &var[i]; - if(v->addr) - continue; - // Used in only one instruction, which had better be a write. - if((f = v->use) != nil && (Flow*)f->data == nil) { - p = f->prog; - thearch.proginfo(&info, p); - if(p->to.node == v->node && (info.flags & RightWrite) && !(info.flags & RightRead)) { - p->as = ANOP; - p->to = zprog.to; - v->removed = 1; - if(debugmerge > 0 && debug['v']) - print("drop write-only %S\n", v->node->sym); - } else - fatal("temp used and not set: %P", p); - nkill++; - continue; - } - - // Written in one instruction, read in the next, otherwise unused, - // no jumps to the next instruction. Happens mainly in 386 compiler. - if((f = v->use) != nil && f->link == (Flow*)f->data && (Flow*)((Flow*)f->data)->data == nil && uniqp(f->link) == f) { - p = f->prog; - thearch.proginfo(&info, p); - p1 = f->link->prog; - thearch.proginfo(&info1, p1); - enum { - SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD, - }; - if(p->from.node == v->node && p1->to.node == v->node && (info.flags & Move) && - !((info.flags|info1.flags) & (LeftAddr|RightAddr)) && - (info.flags & SizeAny) == (info1.flags & SizeAny)) { - p1->from = p->from; - thearch.excise(f); - v->removed = 1; - if(debugmerge > 0 && debug['v']) - print("drop immediate-use %S\n", v->node->sym); - } - nkill++; - continue; - } - } - - // Traverse live range of each variable to set start, end. - // Each flood uses a new value of gen so that we don't have - // to clear all the r->active words after each variable. - gen = 0; - for(i = 0; i < nvar; i++) { - v = &var[i]; - gen++; - for(f = v->use; f != nil; f = (Flow*)f->data) - mergewalk(v, f, gen); - if(v->addr) { - gen++; - for(f = v->use; f != nil; f = (Flow*)f->data) - varkillwalk(v, f, gen); - } - } - - // Sort variables by start. - bystart = malloc(nvar*sizeof bystart[0]); - for(i=0; i<nvar; i++) - bystart[i] = &var[i]; - qsort(bystart, nvar, sizeof bystart[0], startcmp); - - // List of in-use variables, sorted by end, so that the ones that - // will last the longest are the earliest ones in the array. - // The tail inuse[nfree:] holds no-longer-used variables. - // In theory we should use a sorted tree so that insertions are - // guaranteed O(log n) and then the loop is guaranteed O(n log n). - // In practice, it doesn't really matter. - inuse = malloc(nvar*sizeof inuse[0]); - ninuse = 0; - nfree = nvar; - for(i=0; i<nvar; i++) { - v = bystart[i]; - if(debugmerge > 0 && debug['v']) - print("consider %#N: removed=%d\n", v->node, v->removed); - - if(v->removed) - continue; - - // Expire no longer in use. - while(ninuse > 0 && inuse[ninuse-1]->end < v->start) { - v1 = inuse[--ninuse]; - inuse[--nfree] = v1; - } - - if(debugmerge > 0 && debug['v']) - print("consider %#N: removed=%d nfree=%d nvar=%d\n", v->node, v->removed, nfree, nvar); - // Find old temp to reuse if possible. - t = v->node->type; - for(j=nfree; j<nvar; j++) { - v1 = inuse[j]; - if(debugmerge > 0 && debug['v']) - print("consider %#N: maybe %#N: type=%T,%T addrtaken=%d,%d\n", v->node, v1->node, t, v1->node->type, v->node->addrtaken, v1->node->addrtaken); - // Require the types to match but also require the addrtaken bits to match. - // If a variable's address is taken, that disables registerization for the individual - // words of the variable (for example, the base,len,cap of a slice). - // We don't want to merge a non-addressed var with an addressed one and - // inhibit registerization of the former. - if(eqtype(t, v1->node->type) && v->node->addrtaken == v1->node->addrtaken) { - inuse[j] = inuse[nfree++]; - if(v1->merge) - v->merge = v1->merge; - else - v->merge = v1; - nkill++; - break; - } - } - - // Sort v into inuse. - j = ninuse++; - while(j > 0 && inuse[j-1]->end < v->end) { - inuse[j] = inuse[j-1]; - j--; - } - inuse[j] = v; - } - - if(debugmerge > 0 && debug['v']) { - print("%S [%d - %d]\n", curfn->nname->sym, nvar, nkill); - for(i = 0; i < nvar; i++) { - v = &var[i]; - print("var %#N %T %lld-%lld", v->node, v->node->type, v->start, v->end); - if(v->addr) - print(" addr=1"); - if(v->removed) - print(" dead=1"); - if(v->merge) - print(" merge %#N", v->merge->node); - if(v->start == v->end && v->def != nil) - print(" %P", v->def->prog); - print("\n"); - } - - if(debugmerge > 1 && debug['v']) - dumpit("after", g->start, 0); - } - - // Update node references to use merged temporaries. - for(f = g->start; f != nil; f = f->link) { - p = f->prog; - if((n = p->from.node) != N && (v = n->opt) != nil && v->merge != nil) - p->from.node = v->merge->node; - if((n = p->to.node) != N && (v = n->opt) != nil && v->merge != nil) - p->to.node = v->merge->node; - } - - // Delete merged nodes from declaration list. - for(lp = &curfn->dcl; (l = *lp); ) { - curfn->dcl->end = l; - n = l->n; - v = n->opt; - if(v && (v->merge || v->removed)) { - *lp = l->next; - continue; - } - lp = &l->next; - } - - // Clear aux structures. - for(i = 0; i < nvar; i++) - var[i].node->opt = nil; - - free(var); - free(bystart); - free(inuse); - flowend(g); -} - -static void -mergewalk(TempVar *v, Flow *f0, uint32 gen) -{ - Prog *p; - Flow *f1, *f, *f2; - - for(f1 = f0; f1 != nil; f1 = f1->p1) { - if(f1->active == gen) - break; - f1->active = gen; - p = f1->prog; - if(v->end < p->pc) - v->end = p->pc; - if(f1 == v->def) { - v->start = p->pc; - break; - } - } - - for(f = f0; f != f1; f = f->p1) - for(f2 = f->p2; f2 != nil; f2 = f2->p2link) - mergewalk(v, f2, gen); -} - -static void -varkillwalk(TempVar *v, Flow *f0, uint32 gen) -{ - Prog *p; - Flow *f1, *f; - - for(f1 = f0; f1 != nil; f1 = f1->s1) { - if(f1->active == gen) - break; - f1->active = gen; - p = f1->prog; - if(v->end < p->pc) - v->end = p->pc; - if(v->start > p->pc) - v->start = p->pc; - if(p->as == ARET || (p->as == AVARKILL && p->to.node == v->node)) - break; - } - - for(f = f0; f != f1; f = f->s1) - varkillwalk(v, f->s2, gen); -} - -// Eliminate redundant nil pointer checks. -// -// The code generation pass emits a CHECKNIL for every possibly nil pointer. -// This pass removes a CHECKNIL if every predecessor path has already -// checked this value for nil. -// -// Simple backwards flood from check to definition. -// Run prog loop backward from end of program to beginning to avoid quadratic -// behavior removing a run of checks. -// -// Assume that stack variables with address not taken can be loaded multiple times -// from memory without being rechecked. Other variables need to be checked on -// each load. - -typedef struct NilVar NilVar; - -static void nilwalkback(Flow *rcheck); -static void nilwalkfwd(Flow *rcheck); - -static int killed; // f->data is either nil or &killed - -void -nilopt(Prog *firstp) -{ - Flow *f; - Prog *p; - Graph *g; - int ncheck, nkill; - - g = flowstart(firstp, 0); - if(g == nil) - return; - - if(debug_checknil > 1 /* || strcmp(curfn->nname->sym->name, "f1") == 0 */) - dumpit("nilopt", g->start, 0); - - ncheck = 0; - nkill = 0; - for(f = g->start; f != nil; f = f->link) { - p = f->prog; - if(p->as != ACHECKNIL || !thearch.regtyp(&p->from)) - continue; - ncheck++; - if(thearch.stackaddr(&p->from)) { - if(debug_checknil && p->lineno > 1) - warnl(p->lineno, "removed nil check of SP address"); - f->data = &killed; - continue; - } - nilwalkfwd(f); - if(f->data != nil) { - if(debug_checknil && p->lineno > 1) - warnl(p->lineno, "removed nil check before indirect"); - continue; - } - nilwalkback(f); - if(f->data != nil) { - if(debug_checknil && p->lineno > 1) - warnl(p->lineno, "removed repeated nil check"); - continue; - } - } - - for(f = g->start; f != nil; f = f->link) { - if(f->data != nil) { - nkill++; - thearch.excise(f); - } - } - - flowend(g); - - if(debug_checknil > 1) - print("%S: removed %d of %d nil checks\n", curfn->nname->sym, nkill, ncheck); -} - -static void -nilwalkback(Flow *fcheck) -{ - Prog *p; - ProgInfo info; - Flow *f; - - for(f = fcheck; f != nil; f = uniqp(f)) { - p = f->prog; - thearch.proginfo(&info, p); - if((info.flags & RightWrite) && thearch.sameaddr(&p->to, &fcheck->prog->from)) { - // Found initialization of value we're checking for nil. - // without first finding the check, so this one is unchecked. - return; - } - if(f != fcheck && p->as == ACHECKNIL && thearch.sameaddr(&p->from, &fcheck->prog->from)) { - fcheck->data = &killed; - return; - } - } - - // Here is a more complex version that scans backward across branches. - // It assumes fcheck->kill = 1 has been set on entry, and its job is to find a reason - // to keep the check (setting fcheck->kill = 0). - // It doesn't handle copying of aggregates as well as I would like, - // nor variables with their address taken, - // and it's too subtle to turn on this late in Go 1.2. Perhaps for Go 1.3. - /* - for(f1 = f0; f1 != nil; f1 = f1->p1) { - if(f1->active == gen) - break; - f1->active = gen; - p = f1->prog; - - // If same check, stop this loop but still check - // alternate predecessors up to this point. - if(f1 != fcheck && p->as == ACHECKNIL && thearch.sameaddr(&p->from, &fcheck->prog->from)) - break; - - thearch.proginfo(&info, p); - if((info.flags & RightWrite) && thearch.sameaddr(&p->to, &fcheck->prog->from)) { - // Found initialization of value we're checking for nil. - // without first finding the check, so this one is unchecked. - fcheck->kill = 0; - return; - } - - if(f1->p1 == nil && f1->p2 == nil) { - print("lost pred for %P\n", fcheck->prog); - for(f1=f0; f1!=nil; f1=f1->p1) { - thearch.proginfo(&info, f1->prog); - print("\t%P %d %d %D %D\n", r1->prog, info.flags&RightWrite, thearch.sameaddr(&f1->prog->to, &fcheck->prog->from), &f1->prog->to, &fcheck->prog->from); - } - fatal("lost pred trail"); - } - } - - for(f = f0; f != f1; f = f->p1) - for(f2 = f->p2; f2 != nil; f2 = f2->p2link) - nilwalkback(fcheck, f2, gen); - */ -} - -static void -nilwalkfwd(Flow *fcheck) -{ - Flow *f, *last; - Prog *p; - ProgInfo info; - - // If the path down from rcheck dereferences the address - // (possibly with a small offset) before writing to memory - // and before any subsequent checks, it's okay to wait for - // that implicit check. Only consider this basic block to - // avoid problems like: - // _ = *x // should panic - // for {} // no writes but infinite loop may be considered visible - last = nil; - for(f = uniqs(fcheck); f != nil; f = uniqs(f)) { - p = f->prog; - thearch.proginfo(&info, p); - - if((info.flags & LeftRead) && thearch.smallindir(&p->from, &fcheck->prog->from)) { - fcheck->data = &killed; - return; - } - if((info.flags & (RightRead|RightWrite)) && thearch.smallindir(&p->to, &fcheck->prog->from)) { - fcheck->data = &killed; - return; - } - - // Stop if another nil check happens. - if(p->as == ACHECKNIL) - return; - // Stop if value is lost. - if((info.flags & RightWrite) && thearch.sameaddr(&p->to, &fcheck->prog->from)) - return; - // Stop if memory write. - if((info.flags & RightWrite) && !thearch.regtyp(&p->to)) - return; - // Stop if we jump backward. - if(last != nil && f->id <= last->id) - return; - last = f; - } -} |