diff options
-rw-r--r-- | doc/HACKING/HelpfulTools.md | 13 | ||||
-rwxr-xr-x | scripts/maint/analyze_callgraph.py | 259 | ||||
-rwxr-xr-x | scripts/maint/display_callgraph.py | 41 | ||||
-rwxr-xr-x | scripts/maint/generate_callgraph.sh | 14 |
4 files changed, 3 insertions, 324 deletions
diff --git a/doc/HACKING/HelpfulTools.md b/doc/HACKING/HelpfulTools.md index 67481ace43..b8ba2aa408 100644 --- a/doc/HACKING/HelpfulTools.md +++ b/doc/HACKING/HelpfulTools.md @@ -226,17 +226,10 @@ performance! See the gperftools manual for more info, but basically: Generating and analyzing a callgraph ------------------------------------ -1. Run `./scripts/maint/generate_callgraph.sh`. This will generate a - bunch of files in a new ./callgraph directory. +0. Build Tor on linux or mac, ideally with -O0 or -fno-inline. -2. Run `./scripts/maint/analyze_callgraph.py callgraph/src/*/*`. This - will do a lot of graph operations and then dump out a new - `callgraph.pkl` file, containing data in Python's 'pickle' format. - -3. Run `./scripts/maint/display_callgraph.py`. It will display: - - the number of functions reachable from each function. - - all strongly-connnected components in the Tor callgraph - - the largest bottlenecks in the largest SCC in the Tor callgraph. +1. Clone 'https://gitweb.torproject.org/user/nickm/calltool.git/' . + Follow the README in that repository. Note that currently the callgraph generator can't detect calls that pass through function pointers. diff --git a/scripts/maint/analyze_callgraph.py b/scripts/maint/analyze_callgraph.py deleted file mode 100755 index 8ce5827f07..0000000000 --- a/scripts/maint/analyze_callgraph.py +++ /dev/null @@ -1,259 +0,0 @@ -#!/usr/bin/python - -import re -import sys -import copy -import cPickle -import os - -class Parser: - def __init__(self): - self.calls = {} - self.definedIn = {} - - def enter_func(self, name): - if self.infunc and not self.extern and self.calledfns: - if self.infunc in self.definedIn: - #print "{}: {} or {}?".format( - # self.infunc, self.definedIn[self.infunc], self.module) - self.definedIn[self.infunc] = 'nil' - else: - self.definedIn[self.infunc] = self.module - self.calls.setdefault(self.infunc, set()).update( self.calledfns ) - - self.calledfns = set() - self.infunc = name - self.extern = False - - def parse_callgraph_file(self, inp, module): - self.infunc = None - self.extern = False - self.calledfns = set() - self.module = module - - for line in inp: - m = re.match(r"Call graph node for function: '([^']+)'", line) - if m: - self.enter_func(m.group(1)) - continue - m = re.match(r" CS<[^>]+> calls external node", line) - if m: - self.extern = True - m = re.match(r" CS<[^>]+> calls function '([^']+)'", line) - if m: - self.calledfns.add(m.group(1)) - self.enter_func(None) - - def extract_callgraph(self): - c = self.calls - self.calls = {} - return c - - -def transitive_closure(g): - passno = 0 - changed = True - g = copy.deepcopy(g) - import random - while changed: - passno += 1 - changed = False - keys = g.keys() - idx = 0 - for k in keys: - idx += 1 - print "Pass %d/?: %d/%d\r" %(passno, idx, len(keys)), - sys.stdout.flush() - newset = g[k].copy() - for fn in g[k]: - newset.update(g.get(fn, set())) - if len(newset) != len(g[k]): - g[k].update( newset ) - changed = True - - print - - return g - -def strongly_connected_components(g): - # From https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm, done stupidly. - index_of = {} - index = [ 0 ] - lowlink = {} - S = [] - onStack = set() - - all_sccs = [] - - def strongconnect(fn): - index_of[fn] = index[0] - lowlink[fn] = index[0] - index[0] += 1 - S.append(fn) - onStack.add(fn) - - for w in g.get(fn, []): - if w not in index_of: - strongconnect(w) - lowlink[fn] = min(lowlink[fn], lowlink[w]) - elif w in onStack: - lowlink[fn] = min(lowlink[fn], index_of[w]) - - if lowlink[fn] == index_of[fn]: - this_scc = [] - all_sccs.append(this_scc) - while True: - w = S.pop() - onStack.remove(w) - this_scc.append(w) - if w == fn: - break - - for v in g.keys(): - if v not in index_of: - strongconnect(v) - - return all_sccs - -def biggest_component(sccs): - return max(len(c) for c in sccs) - -def connection_bottlenecks(callgraph): - - callers = {} - for fn in callgraph: - for fn2 in callgraph[fn]: - callers.setdefault(fn2, set()).add(fn) - - components = strongly_connected_components(callgraph) - components.sort(key=len) - big_component_fns = components[-1] - size = len(big_component_fns) - - function_bottlenecks = fn_results = [] - - total = len(big_component_fns) - idx = 0 - for fn in big_component_fns: - idx += 1 - print "Pass 1/3: %d/%d\r"%(idx, total), - sys.stdout.flush() - cg2 = copy.deepcopy(callgraph) - del cg2[fn] - - fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), fn) ) - - print - bcf_set = set(big_component_fns) - - call_bottlenecks = fn_results = [] - result_set = set() - total = len(big_component_fns) - idx = 0 - for fn in big_component_fns: - fn_callers = callers[fn].intersection(bcf_set) - idx += 1 - if len(fn_callers) != 1: - continue - - print "Pass 2/3: %d/%d\r"%(idx, total), - sys.stdout.flush() - - caller = fn_callers.pop() - assert len(fn_callers) == 0 - cg2 = copy.deepcopy(callgraph) - cg2[caller].remove(fn) - - fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), fn, "called by", caller) ) - result_set.add( (caller, fn) ) - - print - - total = len(big_component_fns) - idx = 0 - for fn in big_component_fns: - fn_calls = callgraph[fn].intersection(bcf_set) - idx += 1 - if len(fn_calls) != 1: - continue - - print "Pass 3/3: %d/%d\r"%(idx, total), - sys.stdout.flush() - - callee = fn_calls.pop() - if (fn, callee) in result_set: - continue - - assert len(fn_calls) == 0 - cg2 = copy.deepcopy(callgraph) - cg2[fn].remove(callee) - - fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), callee, "called by", fn) ) - - print - - - return (function_bottlenecks, call_bottlenecks) - -if __name__ == '__main__': - p = Parser() - for fname in sys.argv[1:]: - modname = re.sub(r'.*/', '', fname).replace('.callgraph', '.c') - with open(fname, 'r') as f: - p.parse_callgraph_file(f, modname) - - sys.stdout.flush() - - print "Building callgraph" - callgraph = p.extract_callgraph() - inModule = p.definedIn - - print "Deriving module callgraph" - modCallgraph = {} - for fn in callgraph: - fnMod = inModule[fn] - for called in callgraph[fn]: - try: - calledMod = inModule[called] - except KeyError: - continue - modCallgraph.setdefault(fnMod, set()).add(calledMod) - del modCallgraph['nil'] - - print "Finding strongly connected components" - sccs = strongly_connected_components(callgraph) - - print "Finding the transitive closure of the callgraph.." - closure = transitive_closure(callgraph) - - print "Finding bottlenecks..." - bottlenecks = connection_bottlenecks(callgraph) - - print "Finding module SCCs" - modSCCS = strongly_connected_components(modCallgraph) - - print "Finding module TC" - modTC = transitive_closure(modCallgraph) - - print "Finding module bottlenecks" - modB = connection_bottlenecks(modCallgraph) - - data = { - 'callgraph' : callgraph, - 'sccs' : sccs, - 'closure' : closure, - 'bottlenecks' : bottlenecks, - 'modules' : p.definedIn, - 'modItems' : { - 'callgraph' : modCallgraph, - 'sccs' : modSCCS, - 'closure' : modTC, - 'bottlenecks' : modB, - } - } - - with open('callgraph.pkl', 'w') as f: - cPickle.dump(data, f) - - - diff --git a/scripts/maint/display_callgraph.py b/scripts/maint/display_callgraph.py deleted file mode 100755 index c9001c6d96..0000000000 --- a/scripts/maint/display_callgraph.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/python - -import cPickle - -data = cPickle.load(open("callgraph.pkl")) - -# data = data['modItems'] - -callgraph = data['callgraph'] -closure = data['closure'] -sccs = data['sccs'] -fn_bottle, call_bottle = data['bottlenecks'] - -for n_reachable, fn in sorted(list((len(r), fn) for fn, r in closure.iteritems())): - print "%s can reach %s other functions." %(fn, n_reachable) - - -c = [ (len(component), component) for component in sccs ] -c.sort() - -print "\n================================" - -for n, component in c: - if n < 2: - continue - print "Strongly connected component of size %d:"%n - print component - - -print "\n================================" - -print "====== Number of functions pulled into blob, by function in blob." -fn_bottle.sort() -for n, fn in fn_bottle[-30:]: - print "%3d: %s"%(n, fn) - -print "====== Number of functions pulled into blob, by call in blob." -call_bottle.sort() -for n, fn1, _, fn2 in call_bottle[-30:]: - print "%3d: %s -> %s "%(n, fn2, fn1) - diff --git a/scripts/maint/generate_callgraph.sh b/scripts/maint/generate_callgraph.sh deleted file mode 100755 index c6b33c0aea..0000000000 --- a/scripts/maint/generate_callgraph.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh - -C_FILES=`echo src/common/*.c src/or/*.c src/tools/*.c` -CFLAGS="-Isrc/ext/trunnel -Isrc/trunnel -I. -Isrc/ext -Isrc/common -DLOCALSTATEDIR=\"\" -DSHARE_DATADIR=\"\" -Dinline=" - -mkdir -p callgraph/src/common -mkdir -p callgraph/src/or -mkdir -p callgraph/src/tools - -for fn in $C_FILES; do - echo $fn - clang $CFLAGS -S -emit-llvm -fno-inline -o - $fn | \ - opt -analyze -print-callgraph >/dev/null 2> "callgraph/${fn}allgraph" -done |