diff options
Diffstat (limited to 'scripts/maint/practracker/includes.py')
-rwxr-xr-x | scripts/maint/practracker/includes.py | 285 |
1 files changed, 285 insertions, 0 deletions
diff --git a/scripts/maint/practracker/includes.py b/scripts/maint/practracker/includes.py new file mode 100755 index 0000000000..397439b4ef --- /dev/null +++ b/scripts/maint/practracker/includes.py @@ -0,0 +1,285 @@ +#!/usr/bin/python +# Copyright 2018 The Tor Project, Inc. See LICENSE file for licensing info. + +"""This script looks through all the directories for files matching *.c or + *.h, and checks their #include directives to make sure that only "permitted" + headers are included. + + Any #include directives with angle brackets (like #include <stdio.h>) are + ignored -- only directives with quotes (like #include "foo.h") are + considered. + + To decide what includes are permitted, this script looks at a .may_include + file in each directory. This file contains empty lines, #-prefixed + comments, filenames (like "lib/foo/bar.h") and file globs (like lib/*/*.h) + for files that are permitted. +""" + + +from __future__ import print_function + +import fnmatch +import os +import re +import sys + +if sys.version_info[0] <= 2: + def open_file(fname): + return open(fname, 'r') +else: + def open_file(fname): + return open(fname, 'r', encoding='utf-8') + +def warn(msg): + print(msg, file=sys.stderr) + +def fname_is_c(fname): + """ Return true iff 'fname' is the name of a file that we should + search for possibly disallowed #include directives. """ + return fname.endswith(".h") or fname.endswith(".c") + +INCLUDE_PATTERN = re.compile(r'\s*#\s*include\s+"([^"]*)"') +RULES_FNAME = ".may_include" + +ALLOWED_PATTERNS = [ + re.compile(r'^.*\*\.(h|inc)$'), + re.compile(r'^.*/.*\.h$'), + re.compile(r'^ext/.*\.c$'), + re.compile(r'^orconfig.h$'), + re.compile(r'^micro-revision.i$'), +] + +def pattern_is_normal(s): + for p in ALLOWED_PATTERNS: + if p.match(s): + return True + return False + +class Error(object): + def __init__(self, location, msg, is_advisory=False): + self.location = location + self.msg = msg + self.is_advisory = is_advisory + + def __str__(self): + return "{} at {}".format(self.msg, self.location) + +class Rules(object): + """ A 'Rules' object is the parsed version of a .may_include file. """ + def __init__(self, dirpath): + self.dirpath = dirpath + if dirpath.startswith("src/"): + self.incpath = dirpath[4:] + else: + self.incpath = dirpath + self.patterns = [] + self.usedPatterns = set() + self.is_advisory = False + + def addPattern(self, pattern): + if pattern == "!advisory": + self.is_advisory = True + return + if not pattern_is_normal(pattern): + warn("Unusual pattern {} in {}".format(pattern, self.dirpath)) + self.patterns.append(pattern) + + def includeOk(self, path): + for pattern in self.patterns: + if fnmatch.fnmatchcase(path, pattern): + self.usedPatterns.add(pattern) + return True + return False + + def applyToLines(self, lines, loc_prefix=""): + lineno = 0 + for line in lines: + lineno += 1 + m = INCLUDE_PATTERN.match(line) + if m: + include = m.group(1) + if not self.includeOk(include): + yield Error("{}{}".format(loc_prefix,str(lineno)), + "Forbidden include of {}".format(include), + is_advisory=self.is_advisory) + + def applyToFile(self, fname, f): + for error in self.applyToLines(iter(f), "{}:".format(fname)): + yield error + + def noteUnusedRules(self): + for p in self.patterns: + if p not in self.usedPatterns: + warn("Pattern {} in {} was never used.".format(p, self.dirpath)) + + def getAllowedDirectories(self): + allowed = [] + for p in self.patterns: + m = re.match(r'^(.*)/\*\.(h|inc)$', p) + if m: + allowed.append(m.group(1)) + continue + m = re.match(r'^(.*)/[^/]*$', p) + if m: + allowed.append(m.group(1)) + continue + + return allowed + +include_rules_cache = {} + +def load_include_rules(fname): + """ Read a rules file from 'fname', and return it as a Rules object. + Return 'None' if fname does not exist. + """ + if fname in include_rules_cache: + return include_rules_cache[fname] + if not os.path.exists(fname): + include_rules_cache[fname] = None + return None + result = Rules(os.path.split(fname)[0]) + with open_file(fname) as f: + for line in f: + line = line.strip() + if line.startswith("#") or not line: + continue + result.addPattern(line) + include_rules_cache[fname] = result + return result + +def get_all_include_rules(): + """Return a list of all the Rules objects we have loaded so far, + sorted by their directory names.""" + return [ rules for (fname,rules) in + sorted(include_rules_cache.items()) + if rules is not None ] + +def remove_self_edges(graph): + """Takes a directed graph in as an adjacency mapping (a mapping from + node to a list of the nodes to which it connects). + + Remove all edges from a node to itself.""" + + for k in list(graph): + graph[k] = [ d for d in graph[k] if d != k ] + +def toposort(graph, limit=100): + """Takes a directed graph in as an adjacency mapping (a mapping from + node to a list of the nodes to which it connects). Tries to + perform a topological sort on the graph, arranging the nodes into + "levels", such that every member of each level is only reachable + by members of later levels. + + Returns a list of the members of each level. + + Modifies the input graph, removing every member that could be + sorted. If the graph does not become empty, then it contains a + cycle. + + "limit" is the max depth of the graph after which we give up trying + to sort it and conclude we have a cycle. + """ + all_levels = [] + + n = 0 + while graph: + n += 0 + cur_level = [] + all_levels.append(cur_level) + for k in list(graph): + graph[k] = [ d for d in graph[k] if d in graph ] + if graph[k] == []: + cur_level.append(k) + for k in cur_level: + del graph[k] + n += 1 + if n > limit: + break + + return all_levels + +def consider_include_rules(fname, f): + dirpath = os.path.split(fname)[0] + rules_fname = os.path.join(dirpath, RULES_FNAME) + rules = load_include_rules(os.path.join(dirpath, RULES_FNAME)) + if rules is None: + return + + for err in rules.applyToFile(fname, f): + yield err + + list_unused = False + log_sorted_levels = False + +def walk_c_files(topdir="src"): + """Run through all .c and .h files under topdir, looking for + include-rule violations. Yield those violations.""" + + for dirpath, dirnames, fnames in os.walk(topdir): + for fname in fnames: + if fname_is_c(fname): + fullpath = os.path.join(dirpath,fname) + with open(fullpath) as f: + for err in consider_include_rules(fullpath, f): + yield err + +def run_check_includes(topdir, list_unused=False, log_sorted_levels=False, + list_advisories=False): + trouble = False + + for err in walk_c_files(topdir): + if err.is_advisory and not list_advisories: + continue + print(err, file=sys.stderr) + if not err.is_advisory: + trouble = True + + if trouble: + err( + """To change which includes are allowed in a C file, edit the {} + files in its enclosing directory.""".format(RULES_FNAME)) + sys.exit(1) + + if list_unused: + for rules in get_all_include_rules(): + rules.noteUnusedRules() + + uses_dirs = { } + for rules in get_all_include_rules(): + uses_dirs[rules.incpath] = rules.getAllowedDirectories() + + remove_self_edges(uses_dirs) + all_levels = toposort(uses_dirs) + + if log_sorted_levels: + for (n, cur_level) in enumerate(all_levels): + if cur_level: + print(n, cur_level) + + if uses_dirs: + print("There are circular .may_include dependencies in here somewhere:", + uses_dirs) + sys.exit(1) + +def main(argv): + import argparse + + progname = argv[0] + parser = argparse.ArgumentParser(prog=progname) + parser.add_argument("--toposort", action="store_true", + help="Print a topologically sorted list of modules") + parser.add_argument("--list-unused", action="store_true", + help="List unused lines in .may_include files.") + parser.add_argument("--list-advisories", action="store_true", + help="List advisories as well as forbidden includes") + parser.add_argument("topdir", default="src", nargs="?", + help="Top-level directory for the tor source") + args = parser.parse_args(argv[1:]) + + run_check_includes(topdir=args.topdir, + log_sorted_levels=args.toposort, + list_unused=args.list_unused, + list_advisories=args.list_advisories) + +if __name__ == '__main__': + main(sys.argv) |