diff options
author | Nick Mathewson <nickm@torproject.org> | 2020-03-05 08:23:32 -0500 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2020-03-05 08:23:32 -0500 |
commit | 686494f0f71b9235399b8241aba3e0c2fcb03ea1 (patch) | |
tree | 39256173e40ef50d45cc35c22561da9279f9753b /scripts | |
parent | edc0bf5089df13d1d6a246e67bddb484ac99ad59 (diff) | |
parent | b5ccdd978ea138cde92b3513c9d653ba18b8b463 (diff) | |
download | tor-686494f0f71b9235399b8241aba3e0c2fcb03ea1.tar.gz tor-686494f0f71b9235399b8241aba3e0c2fcb03ea1.zip |
Merge branch 'clang_format_prep_3'
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/maint/checkSpace.pl | 47 | ||||
-rw-r--r-- | scripts/maint/checkspace_tests/expected.txt | 1 | ||||
-rwxr-xr-x | scripts/maint/clang-format.sh | 41 | ||||
-rwxr-xr-x | scripts/maint/codetool.py | 182 |
4 files changed, 253 insertions, 18 deletions
diff --git a/scripts/maint/checkSpace.pl b/scripts/maint/checkSpace.pl index f4e6f733c8..857ce6f6f1 100755 --- a/scripts/maint/checkSpace.pl +++ b/scripts/maint/checkSpace.pl @@ -23,6 +23,25 @@ if ($ARGV[0] =~ /^-/) { $C = ($lang eq '-C'); } +# hashmap of things where we allow spaces between them and (. +our %allow_space_after= map {$_, 1} qw{ + if while for switch return int unsigned elsif WINAPI + void __attribute__ op size_t double uint64_t + bool ssize_t + workqueue_reply_t hs_desc_decode_status_t + PRStatus + SMARTLIST_FOREACH_BEGIN SMARTLIST_FOREACH_END + HT_FOREACH + DIGESTMAP_FOREACH_MODIFY DIGESTMAP_FOREACH + DIGEST256MAP_FOREACH_MODIFY DIGEST256MAP_FOREACH + STRMAP_FOREACH_MODIFY STRMAP_FOREACH + SDMAP_FOREACH EIMAP_FOREACH RIMAP_FOREACH + MAP_FOREACH_MODIFY MAP_FOREACH + TOR_SIMPLEQ_FOREACH TOR_SIMPLEQ_FOREACH_SAFE + TOR_LIST_FOREACH TOR_LIST_FOREACH_SAFE + TOR_SLIST_FOREACH TOR_SLIST_FOREACH_SAFE +}; + our %basenames = (); our %guardnames = (); @@ -58,9 +77,9 @@ for my $fn (@ARGV) { } ## Warn about labels that don't have a space in front of them # (We indent every label at least one space) - if (/^[a-zA-Z_][a-zA-Z_0-9]*:/) { - msg "nosplabel:$fn:$.\n"; - } + #if (/^[a-zA-Z_][a-zA-Z_0-9]*:/) { + # msg "nosplabel:$fn:$.\n"; + #} ## Warn about trailing whitespace. # (We don't allow whitespace at the end of the line; make your # editor highlight it for you so you can stop adding it in.) @@ -111,7 +130,7 @@ for my $fn (@ARGV) { ## Terminals are still 80 columns wide in my world. I refuse to ## accept double-line lines. # (Don't make lines wider than 80 characters, including newline.) - if (/^.{80}/) { + if (/^.{80}/ and not /LCOV_EXCL/) { msg "Wide:$fn:$.\n"; } ### Juju to skip over comments and strings, since the tests @@ -128,12 +147,12 @@ for my $fn (@ARGV) { if ($isheader) { if ($seenguard == 0) { - if (/ifndef\s+(\S+)/) { + if (/^\s*\#\s*ifndef\s+(\S+)/) { ++$seenguard; $guardname = $1; } } elsif ($seenguard == 1) { - if (/^\#define (\S+)/) { + if (/^\s*\#\s*define (\S+)/) { ++$seenguard; if ($1 ne $guardname) { msg "GUARD:$fn:$.: Header guard macro mismatch.\n"; @@ -156,9 +175,8 @@ for my $fn (@ARGV) { # msg "//:$fn:$.\n"; s!//.*!!; } - ## Warn about unquoted braces preceded by non-space. - # (No character except a space should come before a {) - if (/([^\s'])\{/) { + ## Warn about unquoted braces preceded by unexpected character. + if (/([^\s'\)\(\{])\{/) { msg "$1\{:$fn:$.\n"; } ## Warn about double semi-colons at the end of a line. @@ -178,12 +196,7 @@ for my $fn (@ARGV) { # (Don't put a space between the name of a function and its # arguments.) if (/(\w+)\s\(([A-Z]*)/) { - if ($1 ne "if" and $1 ne "while" and $1 ne "for" and - $1 ne "switch" and $1 ne "return" and $1 ne "int" and - $1 ne "elsif" and $1 ne "WINAPI" and $2 ne "WINAPI" and - $1 ne "void" and $1 ne "__attribute__" and $1 ne "op" and - $1 ne "size_t" and $1 ne "double" and $1 ne "uint64_t" and - $1 ne "workqueue_reply_t" and $1 ne "bool") { + if (! $allow_space_after{$1} && $2 ne 'WINAPI') { msg "fn ():$fn:$.\n"; } } @@ -194,8 +207,8 @@ for my $fn (@ARGV) { if ($in_func_head || ($fn !~ /\.h$/ && /^[a-zA-Z0-9_]/ && ! /^(?:const |static )*(?:typedef|struct|union)[^\(]*$/ && - ! /= *\{$/ && ! /;$/)) { - if (/.\{$/){ + ! /= *\{$/ && ! /;$/) && ! /^[a-zA-Z0-9_]+\s*:/) { + if (/[^,\s]\s*\{$/){ msg "fn() {:$fn:$.\n"; $in_func_head = 0; } elsif (/^\S[^\(]* +\**[a-zA-Z0-9_]+\(/) { diff --git a/scripts/maint/checkspace_tests/expected.txt b/scripts/maint/checkspace_tests/expected.txt index 935b750ef9..38595ed373 100644 --- a/scripts/maint/checkspace_tests/expected.txt +++ b/scripts/maint/checkspace_tests/expected.txt @@ -5,7 +5,6 @@ tp fn():./dubious.c:15 Wide:./dubious.c:17 TAB:./dubious.c:24 - nosplabel:./dubious.c:26 CR:./dubious.c:30 Space@EOL:./dubious.c:32 non-K&R {:./dubious.c:39 diff --git a/scripts/maint/clang-format.sh b/scripts/maint/clang-format.sh new file mode 100755 index 0000000000..59832117b4 --- /dev/null +++ b/scripts/maint/clang-format.sh @@ -0,0 +1,41 @@ +#!/bin/sh +# Copyright 2020, The Tor Project, Inc. +# See LICENSE for licensing information. + +# +# DO NOT COMMIT OR MERGE CODE THAT IS RUN THROUGH THIS TOOL YET. +# +# WE ARE STILL DISCUSSING OUR DESIRED STYLE AND ITERATING ON IT. +# (12 Feb 2020) +# + +# This script runs "clang-format" and "codetool" in sequence over each of +# our source files, and replaces the original file only if it has changed. +# +# We can't just use clang-format -i, since we also want to use codetool to +# reformat a few things back to how we want them, and we want avoid changing +# the mtime on files that didn't actually change. + +set -e + +cd "$(dirname "$0")/../../src/" + +# Shellcheck complains that a for loop over find's output is unreliable, +# since there might be special characters in the output. But we happen +# to know that none of our C files have special characters or spaces in +# their names, so this is safe. +# +# shellcheck disable=SC2044 +for fname in $(find lib core feature app test tools -name '[^.]*.[ch]'); do + tmpfname="${fname}.clang_fmt.tmp" + rm -f "${tmpfname}" + clang-format --style=file "${fname}" > "${tmpfname}" + ../scripts/maint/codetool.py "${tmpfname}" + if cmp "${fname}" "${tmpfname}" >/dev/null 2>&1; then + echo "No change in ${fname}" + rm -f "${tmpfname}" + else + echo "Change in ${fname}" + mv "${tmpfname}" "${fname}" + fi +done diff --git a/scripts/maint/codetool.py b/scripts/maint/codetool.py new file mode 100755 index 0000000000..725712c0cc --- /dev/null +++ b/scripts/maint/codetool.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 +# Copyright (c) 2020, The Tor Project, Inc. +# See LICENSE for licensing information. + +# +# DO NOT COMMIT OR MERGE CODE THAT IS RUN THROUGH THIS TOOL YET. +# +# WE ARE STILL DISCUSSING OUR DESIRED STYLE AND ITERATING ON IT, +# ALONG WITH THE TOOLS THAT ACHIEVE IT. +# (12 Feb 2020) +# + +""" + This program uses a set of plugable filters to inspect and transform + our C code. +""" + +import os +import re +import sys + +class Filter: + """A Filter transforms a string containing a C program.""" + def __init__(self): + pass + + def transform(self, s): + return s + +class CompoundFilt(Filter): + """A CompoundFilt runs another set of filters, in sequence.""" + def __init__(self, items=()): + super().__init__() + self._filters = list(items) + + def add(self, filt): + self._filters.append(filt) + return self + + def transform(self, s): + for f in self._filters: + s = f.transform(s) + + return s + +class SplitError(Exception): + """Exception: raised if split_comments() can't understand a C file.""" + pass + +def split_comments(s): + r"""Iterate over the C code in 's', and yield a sequence of (code, + comment) pairs. Each pair will contain either a nonempty piece + of code, a nonempty comment, or both. + + >>> list(split_comments("hello // world\n")) + [('hello ', '// world'), ('\n', '')] + + >>> list(split_comments("a /* b cd */ efg // hi")) + [('a ', '/* b cd */'), (' efg ', '// hi')] + """ + + # Matches a block of code without any comments. + PAT_CODE = re.compile(r'''^(?: [^/"']+ | + "(?:[^\\"]+|\\.)*" | + '(?:[^\\']+|\\.)*' | + /[^/*] + )*''', re.VERBOSE|re.DOTALL) + + # Matches a C99 "//" comment. + PAT_C99_COMMENT = re.compile(r'^//.*$', re.MULTILINE) + + # Matches a C "/* */" comment. + PAT_C_COMMENT = re.compile(r'^/\*(?:[^*]|\*+[^*/])*\*+/', re.DOTALL) + + while True: + # Find some non-comment code at the start of the string. + m = PAT_CODE.match(s) + + # If we found some code here, save it and advance the string. + # Otherwise set 'code' to "". + if m: + code = m.group(0) + s = s[m.end():] + else: + code = "" + + # Now we have a comment, or the end of the string. Find out which + # one, and how long it is. + if s.startswith("//"): + m = PAT_C99_COMMENT.match(s) + else: + m = PAT_C_COMMENT.match(s) + + # If we got a comment, save it and advance the string. Otherwise + # set 'comment' to "". + if m: + comment = m.group(0) + s = s[m.end():] + else: + comment = "" + + # If we found no code and no comment, we should be at the end of + # the string... + if code == "" and comment == "": + if s: + # But in case we *aren't* at the end of the string, raise + # an error. + raise SplitError() + # ... all is well, we're done scanning the code. + return + + yield (code, comment) + +class IgnoreCommentsFilt(Filter): + """Wrapper: applies another filter to C code only, excluding comments. + """ + def __init__(self, filt): + super().__init__() + self._filt = filt + + def transform(self, s): + result = [] + for code, comment in split_comments(s): + result.append(self._filt.transform(code)) + result.append(comment) + return "".join(result) + + +class RegexFilt(Filter): + """A regex filter applies a regular expression to some C code.""" + def __init__(self, pat, replacement, flags=0): + super().__init__() + self._pat = re.compile(pat, flags) + self._replacement = replacement + + def transform(self, s): + s, _ = self._pat.subn(self._replacement, s) + return s + +def revise(fname, filt): + """Run 'filt' on the contents of the file in 'fname'. If any + changes are made, then replace the file with its new contents. + Otherwise, leave the file alone. + """ + contents = open(fname, 'r').read() + result = filt.transform(contents) + if result == contents: + return + + tmpname = "{}_codetool_tmp".format(fname) + try: + with open(tmpname, 'w') as f: + f.write(result) + os.rename(tmpname, fname) + except: + os.unlink(tmpname) + raise + +############################## +# Filtering rules. +############################## + +# Make sure that there is a newline after the first comma in a MOCK_IMPL() +BREAK_MOCK_IMPL = RegexFilt( + r'^MOCK_IMPL\(([^,]+),\s*(\S+)', + r'MOCK_IMPL(\1,\n\2', + re.MULTILINE) + +# Make sure there is no newline between } and a loop iteration terminator. +RESTORE_SMARTLIST_END = RegexFilt( + r'}\s*(SMARTLIST|DIGESTMAP|DIGEST256MAP|STRMAP|MAP)_FOREACH_END\s*\(', + r'} \1_FOREACH_END (', + re.MULTILINE) + +F = CompoundFilt() +F.add(IgnoreCommentsFilt(CompoundFilt([ + RESTORE_SMARTLIST_END, + BREAK_MOCK_IMPL]))) + +if __name__ == '__main__': + for fname in sys.argv[1:]: + revise(fname, F) |