aboutsummaryrefslogtreecommitdiff
path: root/scripts/maint/practracker/includes.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/maint/practracker/includes.py')
-rwxr-xr-xscripts/maint/practracker/includes.py381
1 files changed, 381 insertions, 0 deletions
diff --git a/scripts/maint/practracker/includes.py b/scripts/maint/practracker/includes.py
new file mode 100755
index 0000000000..a5ee728824
--- /dev/null
+++ b/scripts/maint/practracker/includes.py
@@ -0,0 +1,381 @@
+#!/usr/bin/env python
+# Copyright 2018 The Tor Project, Inc. See LICENSE file for licensing info.
+
+"""This script looks through all the directories for files matching *.c or
+ *.h, and checks their #include directives to make sure that only "permitted"
+ headers are included.
+
+ Any #include directives with angle brackets (like #include <stdio.h>) are
+ ignored -- only directives with quotes (like #include "foo.h") are
+ considered.
+
+ To decide what includes are permitted, this script looks at a .may_include
+ file in each directory. This file contains empty lines, #-prefixed
+ comments, filenames (like "lib/foo/bar.h") and file globs (like lib/*/*.h)
+ for files that are permitted.
+
+ The script exits with an error if any non-permitted includes are found.
+ .may_include files that contain "!advisory" are considered advisory.
+ Advisory .may_include files only result in warnings, rather than errors.
+"""
+
+# Future imports for Python 2.7, mandatory in 3.0
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import fnmatch
+import os
+import re
+import sys
+
+if sys.version_info[0] <= 2:
+ def open_file(fname):
+ return open(fname, 'r')
+else:
+ def open_file(fname):
+ return open(fname, 'r', encoding='utf-8')
+
+def warn(msg):
+ print(msg, file=sys.stderr)
+
+def fname_is_c(fname):
+ """ Return true iff 'fname' is the name of a file that we should
+ search for possibly disallowed #include directives. """
+ if fname.endswith(".h") or fname.endswith(".c"):
+ bname = os.path.basename(fname)
+ return not (bname.startswith(".") or bname.startswith("#"))
+ else:
+ return False
+
+INCLUDE_PATTERN = re.compile(r'\s*#\s*include\s+"([^"]*)"')
+RULES_FNAME = ".may_include"
+
+ALLOWED_PATTERNS = [
+ re.compile(r'^.*\*\.(h|inc)$'),
+ re.compile(r'^.*/.*\.h$'),
+ re.compile(r'^ext/.*\.c$'),
+ re.compile(r'^orconfig.h$'),
+ re.compile(r'^micro-revision.i$'),
+]
+
+TOPDIR = "src"
+
+def pattern_is_normal(s):
+ for p in ALLOWED_PATTERNS:
+ if p.match(s):
+ return True
+ return False
+
+class Error(object):
+ def __init__(self, location, msg, is_advisory=False):
+ self.location = location
+ self.msg = msg
+ self.is_advisory = is_advisory
+
+ def __str__(self):
+ return "{} at {}".format(self.msg, self.location)
+
+class Rules(object):
+ """ A 'Rules' object is the parsed version of a .may_include file. """
+ def __init__(self, dirpath):
+ self.dirpath = dirpath
+ if dirpath.startswith("src/"):
+ self.incpath = dirpath[4:]
+ else:
+ self.incpath = dirpath
+ self.patterns = []
+ self.usedPatterns = set()
+ self.is_advisory = False
+
+ def addPattern(self, pattern):
+ if pattern == "!advisory":
+ self.is_advisory = True
+ return
+ if not pattern_is_normal(pattern):
+ warn("Unusual pattern {} in {}".format(pattern, self.dirpath))
+ self.patterns.append(pattern)
+
+ def includeOk(self, path):
+ for pattern in self.patterns:
+ if fnmatch.fnmatchcase(path, pattern):
+ self.usedPatterns.add(pattern)
+ return True
+ return False
+
+ def applyToLines(self, lines, loc_prefix=""):
+ lineno = 0
+ for line in lines:
+ lineno += 1
+ m = INCLUDE_PATTERN.match(line)
+ if m:
+ include = m.group(1)
+ if not self.includeOk(include):
+ yield Error("{}{}".format(loc_prefix,str(lineno)),
+ "Forbidden include of {}".format(include),
+ is_advisory=self.is_advisory)
+
+ def applyToFile(self, fname, f):
+ for error in self.applyToLines(iter(f), "{}:".format(fname)):
+ yield error
+
+ def noteUnusedRules(self):
+ for p in self.patterns:
+ if p not in self.usedPatterns:
+ warn("Pattern {} in {} was never used.".format(p, self.dirpath))
+
+ def getAllowedDirectories(self):
+ allowed = []
+ for p in self.patterns:
+ m = re.match(r'^(.*)/\*\.(h|inc)$', p)
+ if m:
+ allowed.append(m.group(1))
+ continue
+ m = re.match(r'^(.*)/[^/]*$', p)
+ if m:
+ allowed.append(m.group(1))
+ continue
+
+ return allowed
+
+
+def normalize_srcdir(fname):
+ """given the name of a source directory or file, return its name
+ relative to `src` in a unix-like format.
+ """
+ orig = fname
+ dirname, dirfile = os.path.split(fname)
+ if re.match(r'.*\.[ch]$', dirfile):
+ fname = dirname
+
+ # Now we have a directory.
+ dirname, result = os.path.split(fname)
+ for _ in range(100):
+ # prevent excess looping in case I missed a tricky case
+ dirname, dirpart = os.path.split(dirname)
+ if dirpart == 'src' or dirname == "":
+ #print(orig,"=>",result)
+ return result
+ result = "{}/{}".format(dirpart,result)
+
+ print("No progress!")
+ assert False
+
+include_rules_cache = {}
+
+def load_include_rules(fname):
+ """ Read a rules file from 'fname', and return it as a Rules object.
+ Return 'None' if fname does not exist.
+ """
+ if fname in include_rules_cache:
+ return include_rules_cache[fname]
+ if not os.path.exists(fname):
+ include_rules_cache[fname] = None
+ return None
+ result = Rules(os.path.split(fname)[0])
+ with open_file(fname) as f:
+ for line in f:
+ line = line.strip()
+ if line.startswith("#") or not line:
+ continue
+ result.addPattern(line)
+ include_rules_cache[fname] = result
+ return result
+
+def get_all_include_rules():
+ """Return a list of all the Rules objects we have loaded so far,
+ sorted by their directory names."""
+ return [ rules for (fname,rules) in
+ sorted(include_rules_cache.items())
+ if rules is not None ]
+
+def remove_self_edges(graph):
+ """Takes a directed graph in as an adjacency mapping (a mapping from
+ node to a list of the nodes to which it connects).
+
+ Remove all edges from a node to itself."""
+
+ for k in list(graph):
+ graph[k] = [ d for d in graph[k] if d != k ]
+
+def closure(graph):
+ """Takes a directed graph in as an adjacency mapping (a mapping from
+ node to a list of the nodes to which it connects), and completes
+ its closure.
+ """
+ graph = graph.copy()
+ changed = False
+ for k in graph.keys():
+ graph[k] = set(graph[k])
+ while True:
+ for k in graph.keys():
+ sz = len(graph[k])
+ for v in list(graph[k]):
+ graph[k].update(graph.get(v, []))
+ if sz != len(graph[k]):
+ changed = True
+
+ if not changed:
+ return graph
+ changed = False
+
+def toposort(graph, limit=100):
+ """Takes a directed graph in as an adjacency mapping (a mapping from
+ node to a list of the nodes to which it connects). Tries to
+ perform a topological sort on the graph, arranging the nodes into
+ "levels", such that every member of each level is only reachable
+ by members of later levels.
+
+ Returns a list of the members of each level.
+
+ Modifies the input graph, removing every member that could be
+ sorted. If the graph does not become empty, then it contains a
+ cycle.
+
+ "limit" is the max depth of the graph after which we give up trying
+ to sort it and conclude we have a cycle.
+ """
+ all_levels = []
+
+ n = 0
+ while graph:
+ n += 0
+ cur_level = []
+ all_levels.append(cur_level)
+ for k in list(graph):
+ graph[k] = [ d for d in graph[k] if d in graph ]
+ if graph[k] == []:
+ cur_level.append(k)
+ for k in cur_level:
+ del graph[k]
+ n += 1
+ if n > limit:
+ break
+
+ return all_levels
+
+def consider_include_rules(fname, f):
+ dirpath = os.path.split(fname)[0]
+ rules_fname = os.path.join(dirpath, RULES_FNAME)
+ rules = load_include_rules(os.path.join(dirpath, RULES_FNAME))
+ if rules is None:
+ return
+
+ for err in rules.applyToFile(fname, f):
+ yield err
+
+ list_unused = False
+ log_sorted_levels = False
+
+def walk_c_files(topdir="src"):
+ """Run through all .c and .h files under topdir, looking for
+ include-rule violations. Yield those violations."""
+
+ for dirpath, dirnames, fnames in os.walk(topdir):
+ for fname in fnames:
+ if fname_is_c(fname):
+ fullpath = os.path.join(dirpath,fname)
+ with open(fullpath) as f:
+ for err in consider_include_rules(fullpath, f):
+ yield err
+
+def open_or_stdin(fname):
+ if fname == '-':
+ return sys.stdin
+ else:
+ return open(fname)
+
+def check_subsys_file(fname, uses_dirs):
+ if not uses_dirs:
+ # We're doing a distcheck build, or for some other reason there are
+ # no .may_include files.
+ print("SKIPPING")
+ return False
+
+ uses_dirs = { normalize_srcdir(k) : { normalize_srcdir(d) for d in v }
+ for (k,v) in uses_dirs.items() }
+ uses_closure = closure(uses_dirs)
+ ok = True
+ previous_subsystems = []
+
+ with open_or_stdin(fname) as f:
+ for line in f:
+ _, name, fname = line.split()
+ fname = normalize_srcdir(fname)
+ for prev in previous_subsystems:
+ if fname in uses_closure[prev]:
+ print("INVERSION: {} uses {}".format(prev,fname))
+ ok = False
+ previous_subsystems.append(fname)
+ return not ok
+
+def run_check_includes(topdir, list_unused=False, log_sorted_levels=False,
+ list_advisories=False, check_subsystem_order=None):
+ trouble = False
+
+ for err in walk_c_files(topdir):
+ if err.is_advisory and not list_advisories:
+ continue
+ print(err, file=sys.stderr)
+ if not err.is_advisory:
+ trouble = True
+
+ if trouble:
+ warn(
+ """To change which includes are allowed in a C file, edit the {}
+ files in its enclosing directory.""".format(RULES_FNAME))
+ sys.exit(1)
+
+ if list_unused:
+ for rules in get_all_include_rules():
+ rules.noteUnusedRules()
+
+ uses_dirs = { }
+ for rules in get_all_include_rules():
+ uses_dirs[rules.incpath] = rules.getAllowedDirectories()
+
+ remove_self_edges(uses_dirs)
+
+ if check_subsystem_order:
+ if check_subsys_file(check_subsystem_order, uses_dirs):
+ sys.exit(1)
+
+ all_levels = toposort(uses_dirs)
+
+ if log_sorted_levels:
+ for (n, cur_level) in enumerate(all_levels):
+ if cur_level:
+ print(n, cur_level)
+
+ if uses_dirs:
+ print("There are circular .may_include dependencies in here somewhere:",
+ uses_dirs)
+ sys.exit(1)
+
+def main(argv):
+ import argparse
+
+ progname = argv[0]
+ parser = argparse.ArgumentParser(prog=progname)
+ parser.add_argument("--toposort", action="store_true",
+ help="Print a topologically sorted list of modules")
+ parser.add_argument("--list-unused", action="store_true",
+ help="List unused lines in .may_include files.")
+ parser.add_argument("--list-advisories", action="store_true",
+ help="List advisories as well as forbidden includes")
+ parser.add_argument("--check-subsystem-order", action="store",
+ help="Check a list of subsystems for ordering")
+ parser.add_argument("topdir", default="src", nargs="?",
+ help="Top-level directory for the tor source")
+ args = parser.parse_args(argv[1:])
+
+ global TOPDIR
+ TOPDIR = args.topdir
+ run_check_includes(topdir=args.topdir,
+ log_sorted_levels=args.toposort,
+ list_unused=args.list_unused,
+ list_advisories=args.list_advisories,
+ check_subsystem_order=args.check_subsystem_order)
+
+if __name__ == '__main__':
+ main(sys.argv)