1 files changed, 134 insertions, 47 deletions
diff --git a/scripts/maint/practracker/practracker.py b/scripts/maint/practracker/practracker.py
index febb14639d..71741265f6 100755
--- a/scripts/maint/practracker/practracker.py
+++ b/scripts/maint/practracker/practracker.py
@@ -7,7 +7,8 @@ Go through the various .c files and collect metrics about them. If the metrics
 violate some of our best practices and they are not found in the optional
 exceptions file, then log a problem about them.
 
-We currently do metrics about file size, function size and number of includes.
+We currently do metrics about file size, function size and number of includes,
+for C source files and headers.
 
 practracker.py should be run with its second argument pointing to the Tor
 top-level source directory like this:
@@ -25,6 +26,7 @@ import os, sys
 import metrics
 import util
 import problem
+import includes
 
 # The filename of the exceptions file (it should be placed in the practracker directory)
 EXCEPTIONS_FNAME = "./exceptions.txt"
@@ -35,12 +37,23 @@ MAX_FILE_SIZE = 3000 # lines
 MAX_FUNCTION_SIZE = 100 # lines
 # Recommended number of #includes
 MAX_INCLUDE_COUNT = 50
+# Recommended file size for headers
+MAX_H_FILE_SIZE = 500
+# Recommended include count for headers
+MAX_H_INCLUDE_COUNT = 15
+# Recommended number of dependency violations
+MAX_DEP_VIOLATIONS = 0
+
+# Map from problem type to functions that adjust for tolerance
+TOLERANCE_FNS = {
+    'include-count': lambda n: int(n*1.1),
+    'function-size': lambda n: int(n*1.1),
+    'file-size': lambda n: int(n*1.02),
+    'dependency-violation': lambda n: (n+2)
+}
 
 #######################################################
 
-# ProblemVault singleton
-ProblemVault = None
-
 # The Tor source code topdir
 TOR_TOPDIR = None
 
@@ -54,71 +67,73 @@ else:
         return open(fname, 'r', encoding='utf-8')
 
 def consider_file_size(fname, f):
-    """Consider file size issues for 'f' and return True if a new issue was found"""
+    """Consider the size of 'f' and yield an FileSizeItem for it.
+    """
     file_size = metrics.get_file_len(f)
-    if file_size > MAX_FILE_SIZE:
-        p = problem.FileSizeProblem(fname, file_size)
-        return ProblemVault.register_problem(p)
-    return False
+    yield problem.FileSizeItem(fname, file_size)
 
 def consider_includes(fname, f):
-    """Consider #include issues for 'f' and return True if a new issue was found"""
+    """Consider the #include count in for 'f' and yield an IncludeCountItem
+        for it.
+    """
     include_count = metrics.get_include_count(f)
 
-    if include_count > MAX_INCLUDE_COUNT:
-        p = problem.IncludeCountProblem(fname, include_count)
-        return ProblemVault.register_problem(p)
-    return False
+    yield problem.IncludeCountItem(fname, include_count)
 
 def consider_function_size(fname, f):
-    """Consider the function sizes for 'f' and return True if a new issue was found"""
-    found_new_issues = False
+    """yield a FunctionSizeItem for every function in f.
+    """
 
     for name, lines in metrics.get_function_lines(f):
-        # Don't worry about functions within our limits
-        if lines <= MAX_FUNCTION_SIZE:
-            continue
-
-        # That's a big function! Issue a problem!
         canonical_function_name = "%s:%s()" % (fname, name)
-        p = problem.FunctionSizeProblem(canonical_function_name, lines)
-        found_new_issues |= ProblemVault.register_problem(p)
+        yield problem.FunctionSizeItem(canonical_function_name, lines)
+
+def consider_include_violations(fname, real_fname, f):
+    n = 0
+    for item in includes.consider_include_rules(real_fname, f):
+        n += 1
+    if n:
+        yield problem.DependencyViolationItem(fname, n)
 
-    return found_new_issues
 
 #######################################################
 
 def consider_all_metrics(files_list):
-    """Consider metrics for all files, and return True if new issues were found"""
-    found_new_issues = False
+    """Consider metrics for all files, and yield a sequence of problem.Item
+       object for those issues."""
     for fname in files_list:
         with open_file(fname) as f:
-            found_new_issues |= consider_metrics_for_file(fname, f)
-    return found_new_issues
+            for item in consider_metrics_for_file(fname, f):
+                yield item
 
 def consider_metrics_for_file(fname, f):
     """
-    Consider the various metrics for file with filename 'fname' and file descriptor 'f'.
-    Return True if we found new issues.
+       Yield a sequence of problem.Item objects for all of the metrics in
+       'f'.
     """
+    real_fname = fname
     # Strip the useless part of the path
     if fname.startswith(TOR_TOPDIR):
         fname = fname[len(TOR_TOPDIR):]
 
-    found_new_issues = False
-
     # Get file length
-    found_new_issues |= consider_file_size(fname, f)
+    for item in consider_file_size(fname, f):
+        yield item
 
     # Consider number of #includes
     f.seek(0)
-    found_new_issues |= consider_includes(fname, f)
+    for item in consider_includes(fname, f):
+        yield item
 
     # Get function length
     f.seek(0)
-    found_new_issues |= consider_function_size(fname, f)
+    for item in consider_function_size(fname, f):
+        yield item
 
-    return found_new_issues
+    # Check for "upward" includes
+    f.seek(0)
+    for item in consider_include_violations(fname, real_fname, f):
+        yield item
 
 HEADER="""\
 # Welcome to the exceptions file for Tor's best-practices tracker!
@@ -129,8 +144,12 @@ HEADER="""\
 #
 # There are three kinds of problems that we recognize right now:
 #   function-size -- a function of more than {MAX_FUNCTION_SIZE} lines.
-#   file-size -- a file of more than {MAX_FILE_SIZE} lines.
-#   include-count -- a file with more than {MAX_INCLUDE_COUNT} #includes.
+#   file-size -- a .c file of more than {MAX_FILE_SIZE} lines, or a .h
+#      file with more than {MAX_H_FILE_SIZE} lines.
+#   include-count -- a .c file with more than {MAX_INCLUDE_COUNT} #includes,
+       or a .h file with more than {MAX_H_INCLUDE_COUNT} #includes.
+#   dependency-violation -- a file includes a header that it should
+#      not, according to an advisory .may_include file.
 #
 # Each line below represents a single exception that practracker should
 # _ignore_. Each line has four parts:
@@ -161,8 +180,29 @@ def main(argv):
     parser = argparse.ArgumentParser(prog=progname)
     parser.add_argument("--regen", action="store_true",
                         help="Regenerate the exceptions file")
+    parser.add_argument("--list-overbroad", action="store_true",
+                        help="List over-broad exceptions")
     parser.add_argument("--exceptions",
                         help="Override the location for the exceptions file")
+    parser.add_argument("--strict", action="store_true",
+                        help="Make all warnings into errors")
+    parser.add_argument("--terse", action="store_true",
+                        help="Do not emit helpful instructions.")
+    parser.add_argument("--max-h-file-size", default=MAX_H_FILE_SIZE,
+                        help="Maximum lines per .h file")
+    parser.add_argument("--max-h-include-count", default=MAX_H_INCLUDE_COUNT,
+                        help="Maximum includes per .h file")
+    parser.add_argument("--max-file-size", default=MAX_FILE_SIZE,
+                        help="Maximum lines per .c file")
+    parser.add_argument("--max-include-count", default=MAX_INCLUDE_COUNT,
+                        help="Maximum includes per .c file")
+    parser.add_argument("--max-function-size", default=MAX_FUNCTION_SIZE,
+                        help="Maximum lines per function")
+    parser.add_argument("--max-dependency-violations", default=MAX_DEP_VIOLATIONS,
+                        help="Maximum number of dependency violations to allow")
+    parser.add_argument("--include-dir", action="append",
+                        default=["src"],
+                        help="A directory (under topdir) to search for source")
     parser.add_argument("topdir", default=".", nargs="?",
                         help="Top-level directory for the tor source")
     args = parser.parse_args(argv[1:])
@@ -174,24 +214,55 @@ def main(argv):
     else:
         exceptions_file = os.path.join(TOR_TOPDIR, "scripts/maint/practracker", EXCEPTIONS_FNAME)
 
+    # 0) Configure our thresholds of "what is a problem actually"
+    filt = problem.ProblemFilter()
+    filt.addThreshold(problem.FileSizeItem("*.c", int(args.max_file_size)))
+    filt.addThreshold(problem.IncludeCountItem("*.c", int(args.max_include_count)))
+    filt.addThreshold(problem.FileSizeItem("*.h", int(args.max_h_file_size)))
+    filt.addThreshold(problem.IncludeCountItem("*.h", int(args.max_h_include_count)))
+    filt.addThreshold(problem.FunctionSizeItem("*.c", int(args.max_function_size)))
+    filt.addThreshold(problem.DependencyViolationItem("*.c", int(args.max_dependency_violations)))
+    filt.addThreshold(problem.DependencyViolationItem("*.h", int(args.max_dependency_violations)))
+
+    if args.list_overbroad and args.regen:
+        print("Cannot use --regen with --list-overbroad",
+              file=sys.stderr)
+        sys.exit(1)
+
     # 1) Get all the .c files we care about
-    files_list = util.get_tor_c_files(TOR_TOPDIR)
+    files_list = util.get_tor_c_files(TOR_TOPDIR, args.include_dir)
 
     # 2) Initialize problem vault and load an optional exceptions file so that
     # we don't warn about the past
-    global ProblemVault
-
     if args.regen:
         tmpname = exceptions_file + ".tmp"
         tmpfile = open(tmpname, "w")
-        sys.stdout = tmpfile
-        sys.stdout.write(HEADER)
+        problem_file = tmpfile
+        problem_file.write(HEADER)
         ProblemVault = problem.ProblemVault()
     else:
         ProblemVault = problem.ProblemVault(exceptions_file)
+        problem_file = sys.stdout
+
+    if args.list_overbroad:
+        # If we're listing overbroad exceptions, don't list problems.
+        problem_file = util.NullFile()
+
+    # 2.1) Adjust the exceptions so that we warn only about small problems,
+    # and produce errors on big ones.
+    if not (args.regen or args.list_overbroad or args.strict):
+        ProblemVault.set_tolerances(TOLERANCE_FNS)
 
     # 3) Go through all the files and report problems if they are not exceptions
-    found_new_issues = consider_all_metrics(files_list)
+    found_new_issues = 0
+    for item in filt.filter(consider_all_metrics(files_list)):
+        status = ProblemVault.register_problem(item)
+        if status == problem.STATUS_ERR:
+            print(item, file=problem_file)
+            found_new_issues += 1
+        elif status == problem.STATUS_WARN:
+            # warnings always go to stdout.
+            print("(warning) {}".format(item))
 
     if args.regen:
         tmpfile.close()
@@ -199,18 +270,34 @@ def main(argv):
         sys.exit(0)
 
     # If new issues were found, try to give out some advice to the developer on how to resolve it.
-    if found_new_issues and not args.regen:
+    if found_new_issues and not args.regen and not args.terse:
         new_issues_str = """\
-FAILURE: practracker found new problems in the code: see warnings above.
+FAILURE: practracker found {} new problem(s) in the code: see warnings above.
 
 Please fix the problems if you can, and update the exceptions file
 ({}) if you can't.
 
 See doc/HACKING/HelpfulTools.md for more information on using practracker.\
-""".format(exceptions_file)
+
+You can disable this message by setting the TOR_DISABLE_PRACTRACKER environment
+variable.
+""".format(found_new_issues, exceptions_file)
         print(new_issues_str)
 
+    if args.list_overbroad:
+        def k_fn(tup):
+            return tup[0].key()
+        for (ex,p) in sorted(ProblemVault.list_overbroad_exceptions(), key=k_fn):
+            if p is None:
+                print(ex, "->", 0)
+            else:
+                print(ex, "->", p.metric_value)
+
     sys.exit(found_new_issues)
 
 if __name__ == '__main__':
+    if os.environ.get("TOR_DISABLE_PRACTRACKER"):
+        print("TOR_DISABLE_PRACTRACKER is set, skipping practracker tests.",
+              file=sys.stderr)
+        sys.exit(0)
     main(sys.argv)