diff options
Diffstat (limited to 'scripts/maint/annotate_ifdef_directives.py')
-rwxr-xr-x | scripts/maint/annotate_ifdef_directives.py | 289 |
1 files changed, 289 insertions, 0 deletions
diff --git a/scripts/maint/annotate_ifdef_directives.py b/scripts/maint/annotate_ifdef_directives.py new file mode 100755 index 0000000000..6ff9b8ec4c --- /dev/null +++ b/scripts/maint/annotate_ifdef_directives.py @@ -0,0 +1,289 @@ +#!/usr/bin/python +# Copyright (c) 2017-2019, The Tor Project, Inc. +# See LICENSE for licensing information + +r""" +This script iterates over a list of C files. For each file, it looks at the +#if/#else C macros, and annotates them with comments explaining what they +match. + +For example, it replaces this kind of input... + +>>> INPUT = ''' +... #ifdef HAVE_OCELOT +... C code here +... #if MIMSY == BOROGROVE +... block 1 +... block 1 +... block 1 +... block 1 +... #else +... block 2 +... block 2 +... block 2 +... block 2 +... #endif +... #endif +... ''' + +With this kind of output: +>>> EXPECTED_OUTPUT = ''' +... #ifdef HAVE_OCELOT +... C code here +... #if MIMSY == BOROGROVE +... block 1 +... block 1 +... block 1 +... block 1 +... #else /* !(MIMSY == BOROGROVE) */ +... block 2 +... block 2 +... block 2 +... block 2 +... #endif /* MIMSY == BOROGROVE */ +... #endif /* defined(HAVE_OCELOT) */ +... ''' + +Here's how to use it: +>>> import sys +>>> if sys.version_info.major < 3: from cStringIO import StringIO +>>> if sys.version_info.major >= 3: from io import StringIO + +>>> OUTPUT = StringIO() +>>> translate(StringIO(INPUT), OUTPUT) +>>> assert OUTPUT.getvalue() == EXPECTED_OUTPUT + +Note that only #else and #endif lines are annotated. Existing comments +on those lines are removed. +""" + +import re + +# Any block with fewer than this many lines does not need annotations. +LINE_OBVIOUSNESS_LIMIT = 4 + +# Maximum line width. This includes a terminating newline character. +# +# (This is the maximum before encoding, so that if the the operating system +# uses multiple characers to encode newline, that's still okay.) +LINE_WIDTH=80 + +class Problem(Exception): + pass + +def close_parens_needed(expr): + """Return the number of left-parentheses needed to make 'expr' + balanced. + + >>> close_parens_needed("1+2") + 0 + >>> close_parens_needed("(1 + 2)") + 0 + >>> close_parens_needed("(1 + 2") + 1 + >>> close_parens_needed("(1 + (2 *") + 2 + >>> close_parens_needed("(1 + (2 * 3) + (4") + 2 + """ + return expr.count("(") - expr.count(")") + +def truncate_expression(expr, new_width): + """Given a parenthesized C expression in 'expr', try to return a new + expression that is similar to 'expr', but no more than 'new_width' + characters long. + + Try to return an expression with balanced parentheses. + + >>> truncate_expression("1+2+3", 8) + '1+2+3' + >>> truncate_expression("1+2+3+4+5", 8) + '1+2+3...' + >>> truncate_expression("(1+2+3+4)", 8) + '(1+2...)' + >>> truncate_expression("(1+(2+3+4))", 8) + '(1+...)' + >>> truncate_expression("(((((((((", 8) + '((...))' + """ + if len(expr) <= new_width: + # The expression is already short enough. + return expr + + ellipsis = "..." + + # Start this at the minimum that we might truncate. + n_to_remove = len(expr) + len(ellipsis) - new_width + + # Try removing characters, one by one, until we get something where + # re-balancing the parentheses still fits within the limit. + while n_to_remove < len(expr): + truncated = expr[:-n_to_remove] + ellipsis + truncated += ")" * close_parens_needed(truncated) + if len(truncated) <= new_width: + return truncated + n_to_remove += 1 + + return ellipsis + +def commented_line(fmt, argument, maxwidth=LINE_WIDTH): + # (This is a raw docstring so that our doctests can use \.) + r""" + Return fmt%argument, for use as a commented line. If the line would + be longer than maxwidth, truncate argument but try to keep its + parentheses balanced. + + Requires that fmt%"..." will fit into maxwidth characters. + + Requires that fmt ends with a newline. + + >>> commented_line("/* %s */\n", "hello world", 32) + '/* hello world */\n' + >>> commented_line("/* %s */\n", "hello world", 15) + '/* hello... */\n' + >>> commented_line("#endif /* %s */\n", "((1+2) && defined(FOO))", 32) + '#endif /* ((1+2) && defi...) */\n' + + """ + assert fmt.endswith("\n") + result = fmt % argument + if len(result) <= maxwidth: + return result + else: + # How long can we let the argument be? Try filling in the + # format with an empty argument to find out. + max_arg_width = maxwidth - len(fmt % "") + result = fmt % truncate_expression(argument, max_arg_width) + assert len(result) <= maxwidth + return result + +def negate(expr): + """Return a negated version of expr; try to avoid double-negation. + + We usually wrap expressions in parentheses and add a "!". + >>> negate("A && B") + '!(A && B)' + + But if we recognize the expression as negated, we can restore it. + >>> negate(negate("A && B")) + 'A && B' + + The same applies for defined(FOO). + >>> negate("defined(FOO)") + '!defined(FOO)' + >>> negate(negate("defined(FOO)")) + 'defined(FOO)' + + Internal parentheses don't confuse us: + >>> negate("!(FOO) && !(BAR)") + '!(!(FOO) && !(BAR))' + + """ + expr = expr.strip() + # See whether we match !(...), with no intervening close-parens. + m = re.match(r'^!\s*\(([^\)]*)\)$', expr) + if m: + return m.group(1) + + + # See whether we match !?defined(...), with no intervening close-parens. + m = re.match(r'^(!?)\s*(defined\([^\)]*\))$', expr) + if m: + if m.group(1) == "!": + prefix = "" + else: + prefix = "!" + return prefix + m.group(2) + + return "!(%s)" % expr + +def uncomment(s): + """ + Remove existing trailing comments from an #else or #endif line. + """ + s = re.sub(r'//.*','',s) + s = re.sub(r'/\*.*','',s) + return s.strip() + +def translate(f_in, f_out): + """ + Read a file from f_in, and write its annotated version to f_out. + """ + # A stack listing our current if/else state. Each member of the stack + # is a list of directives. Each directive is a 3-tuple of + # (command, rest, lineno) + # where "command" is one of if/ifdef/ifndef/else/elif, and where + # "rest" is an expression in a format suitable for use with #if, and where + # lineno is the line number where the directive occurred. + stack = [] + # the stack element corresponding to the top level of the file. + whole_file = [] + cur_level = whole_file + lineno = 0 + for line in f_in: + lineno += 1 + m = re.match(r'\s*#\s*(if|ifdef|ifndef|else|endif|elif)\b\s*(.*)', + line) + if not m: + # no directive, so we can just write it out. + f_out.write(line) + continue + command,rest = m.groups() + if command in ("if", "ifdef", "ifndef"): + # The #if directive pushes us one level lower on the stack. + if command == 'ifdef': + rest = "defined(%s)"%uncomment(rest) + elif command == 'ifndef': + rest = "!defined(%s)"%uncomment(rest) + elif rest.endswith("\\"): + rest = rest[:-1]+"..." + + rest = uncomment(rest) + + new_level = [ (command, rest, lineno) ] + stack.append(cur_level) + cur_level = new_level + f_out.write(line) + elif command in ("else", "elif"): + # We stay at the same level on the stack. If we have an #else, + # we comment it. + if len(cur_level) == 0 or cur_level[-1][0] == 'else': + raise Problem("Unexpected #%s on %d"% (command,lineno)) + if (len(cur_level) == 1 and command == 'else' and + lineno > cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT): + f_out.write(commented_line("#else /* %s */\n", + negate(cur_level[0][1]))) + else: + f_out.write(line) + cur_level.append((command, rest, lineno)) + else: + # We pop one element on the stack, and comment an endif. + assert command == 'endif' + if len(stack) == 0: + raise Problem("Unmatched #%s on %s"% (command,lineno)) + if lineno <= cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT: + f_out.write(line) + elif len(cur_level) == 1 or ( + len(cur_level) == 2 and cur_level[1][0] == 'else'): + f_out.write(commented_line("#endif /* %s */\n", + cur_level[0][1])) + else: + f_out.write(commented_line("#endif /* %s || ... */\n", + cur_level[0][1])) + cur_level = stack.pop() + if len(stack) or cur_level != whole_file: + raise Problem("Missing #endif") + +if __name__ == '__main__': + + import sys,os + + if sys.argv[1] == "--self-test": + import doctest + doctest.testmod() + sys.exit(0) + + for fn in sys.argv[1:]: + with open(fn+"_OUT", 'w') as output_file: + translate(open(fn, 'r'), output_file) + os.rename(fn+"_OUT", fn) |