Merge branch 'clang_format_prep_3'

author: Nick Mathewson <nickm@torproject.org> 2020-03-05 08:23:32 -0500
committer: Nick Mathewson <nickm@torproject.org> 2020-03-05 08:23:32 -0500
commit: 686494f0f71b9235399b8241aba3e0c2fcb03ea1 (patch)
tree: 39256173e40ef50d45cc35c22561da9279f9753b /scripts/maint
parent: edc0bf5089df13d1d6a246e67bddb484ac99ad59 (diff)
parent: b5ccdd978ea138cde92b3513c9d653ba18b8b463 (diff)
download: tor-686494f0f71b9235399b8241aba3e0c2fcb03ea1.tar.gz
tor-686494f0f71b9235399b8241aba3e0c2fcb03ea1.zip
4 files changed, 253 insertions, 18 deletions
diff --git a/scripts/maint/checkSpace.pl b/scripts/maint/checkSpace.pl
index f4e6f733c8..857ce6f6f1 100755
--- a/scripts/maint/checkSpace.pl
+++ b/scripts/maint/checkSpace.pl
@@ -23,6 +23,25 @@ if ($ARGV[0] =~ /^-/) {
     $C = ($lang eq '-C');
 }
 
+# hashmap of things where we allow spaces between them and (.
+our %allow_space_after= map {$_, 1} qw{
+    if while for switch return int unsigned elsif WINAPI
+    void __attribute__ op size_t double uint64_t
+    bool ssize_t
+    workqueue_reply_t hs_desc_decode_status_t
+    PRStatus
+    SMARTLIST_FOREACH_BEGIN SMARTLIST_FOREACH_END
+    HT_FOREACH
+    DIGESTMAP_FOREACH_MODIFY DIGESTMAP_FOREACH
+    DIGEST256MAP_FOREACH_MODIFY DIGEST256MAP_FOREACH
+    STRMAP_FOREACH_MODIFY STRMAP_FOREACH
+    SDMAP_FOREACH EIMAP_FOREACH RIMAP_FOREACH
+    MAP_FOREACH_MODIFY MAP_FOREACH
+    TOR_SIMPLEQ_FOREACH TOR_SIMPLEQ_FOREACH_SAFE
+    TOR_LIST_FOREACH TOR_LIST_FOREACH_SAFE
+    TOR_SLIST_FOREACH TOR_SLIST_FOREACH_SAFE
+};
+
 our %basenames = ();
 
 our %guardnames = ();
@@ -58,9 +77,9 @@ for my $fn (@ARGV) {
         }
         ## Warn about labels that don't have a space in front of them
         #    (We indent every label at least one space)
-        if (/^[a-zA-Z_][a-zA-Z_0-9]*:/) {
-            msg "nosplabel:$fn:$.\n";
-        }
+        #if (/^[a-zA-Z_][a-zA-Z_0-9]*:/) {
+        #    msg "nosplabel:$fn:$.\n";
+        #}
         ## Warn about trailing whitespace.
         #    (We don't allow whitespace at the end of the line; make your
         #    editor highlight it for you so you can stop adding it in.)
@@ -111,7 +130,7 @@ for my $fn (@ARGV) {
         ## Terminals are still 80 columns wide in my world.  I refuse to
         ## accept double-line lines.
         #   (Don't make lines wider than 80 characters, including newline.)
-        if (/^.{80}/) {
+        if (/^.{80}/ and not /LCOV_EXCL/) {
             msg "Wide:$fn:$.\n";
         }
         ### Juju to skip over comments and strings, since the tests
@@ -128,12 +147,12 @@ for my $fn (@ARGV) {
 
             if ($isheader) {
                 if ($seenguard == 0) {
-                    if (/ifndef\s+(\S+)/) {
+                    if (/^\s*\#\s*ifndef\s+(\S+)/) {
                         ++$seenguard;
                         $guardname = $1;
                     }
                 } elsif ($seenguard == 1) {
-                    if (/^\#define (\S+)/) {
+                    if (/^\s*\#\s*define (\S+)/) {
                         ++$seenguard;
                         if ($1 ne $guardname) {
                             msg "GUARD:$fn:$.: Header guard macro mismatch.\n";
@@ -156,9 +175,8 @@ for my $fn (@ARGV) {
                 #    msg "//:$fn:$.\n";
                 s!//.*!!;
             }
-            ## Warn about unquoted braces preceded by non-space.
-            #   (No character except a space should come before a {)
-            if (/([^\s'])\{/) {
+            ## Warn about unquoted braces preceded by unexpected character.
+            if (/([^\s'\)\(\{])\{/) {
                 msg "$1\{:$fn:$.\n";
             }
             ## Warn about double semi-colons at the end of a line.
@@ -178,12 +196,7 @@ for my $fn (@ARGV) {
             #   (Don't put a space between the name of a function and its
             #   arguments.)
             if (/(\w+)\s\(([A-Z]*)/) {
-                if ($1 ne "if" and $1 ne "while" and $1 ne "for" and
-                    $1 ne "switch" and $1 ne "return" and $1 ne "int" and
-                    $1 ne "elsif" and $1 ne "WINAPI" and $2 ne "WINAPI" and
-                    $1 ne "void" and $1 ne "__attribute__" and $1 ne "op" and
-                    $1 ne "size_t" and $1 ne "double" and $1 ne "uint64_t" and
-                    $1 ne "workqueue_reply_t" and $1 ne "bool") {
+                if (! $allow_space_after{$1} && $2 ne 'WINAPI') {
                     msg "fn ():$fn:$.\n";
                 }
             }
@@ -194,8 +207,8 @@ for my $fn (@ARGV) {
             if ($in_func_head ||
                 ($fn !~ /\.h$/ && /^[a-zA-Z0-9_]/ &&
                  ! /^(?:const |static )*(?:typedef|struct|union)[^\(]*$/ &&
-                 ! /= *\{$/ && ! /;$/)) {
-                if (/.\{$/){
+                 ! /= *\{$/ && ! /;$/) && ! /^[a-zA-Z0-9_]+\s*:/) {
+                if (/[^,\s]\s*\{$/){
                     msg "fn() {:$fn:$.\n";
                     $in_func_head = 0;
                 } elsif (/^\S[^\(]* +\**[a-zA-Z0-9_]+\(/) {
diff --git a/scripts/maint/checkspace_tests/expected.txt b/scripts/maint/checkspace_tests/expected.txt
index 935b750ef9..38595ed373 100644
--- a/scripts/maint/checkspace_tests/expected.txt
+++ b/scripts/maint/checkspace_tests/expected.txt
@@ -5,7 +5,6 @@
    tp fn():./dubious.c:15
       Wide:./dubious.c:17
        TAB:./dubious.c:24
- nosplabel:./dubious.c:26
         CR:./dubious.c:30
  Space@EOL:./dubious.c:32
  non-K&R {:./dubious.c:39
diff --git a/scripts/maint/clang-format.sh b/scripts/maint/clang-format.sh
new file mode 100755
index 0000000000..59832117b4
--- /dev/null
+++ b/scripts/maint/clang-format.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+# Copyright 2020, The Tor Project, Inc.
+# See LICENSE for licensing information.
+
+#
+# DO NOT COMMIT OR MERGE CODE THAT IS RUN THROUGH THIS TOOL YET.
+#
+# WE ARE STILL DISCUSSING OUR DESIRED STYLE AND ITERATING ON IT.
+#     (12 Feb 2020)
+#
+
+# This script runs "clang-format" and "codetool" in sequence over each of
+# our source files, and replaces the original file only if it has changed.
+#
+# We can't just use clang-format -i, since we also want to use codetool to
+# reformat a few things back to how we want them, and we want avoid changing
+# the mtime on files that didn't actually change.
+
+set -e
+
+cd "$(dirname "$0")/../../src/"
+
+# Shellcheck complains that a for loop over find's output is unreliable,
+# since there might be special characters in the output.  But we happen
+# to know that none of our C files have special characters or spaces in
+# their names, so this is safe.
+#
+# shellcheck disable=SC2044
+for fname in $(find lib core feature app test tools -name '[^.]*.[ch]'); do
+    tmpfname="${fname}.clang_fmt.tmp"
+    rm -f "${tmpfname}"
+    clang-format --style=file "${fname}" > "${tmpfname}"
+    ../scripts/maint/codetool.py "${tmpfname}"
+    if cmp "${fname}" "${tmpfname}" >/dev/null 2>&1; then
+        echo "No change in ${fname}"
+        rm -f "${tmpfname}"
+    else
+        echo "Change in ${fname}"
+        mv "${tmpfname}" "${fname}"
+    fi
+done
diff --git a/scripts/maint/codetool.py b/scripts/maint/codetool.py
new file mode 100755
index 0000000000..725712c0cc
--- /dev/null
+++ b/scripts/maint/codetool.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+# Copyright (c) 2020, The Tor Project, Inc.
+# See LICENSE for licensing information.
+
+#
+# DO NOT COMMIT OR MERGE CODE THAT IS RUN THROUGH THIS TOOL YET.
+#
+# WE ARE STILL DISCUSSING OUR DESIRED STYLE AND ITERATING ON IT,
+# ALONG WITH THE TOOLS THAT ACHIEVE IT.
+#     (12 Feb 2020)
+#
+
+"""
+   This program uses a set of plugable filters to inspect and transform
+   our C code.
+"""
+
+import os
+import re
+import sys
+
+class Filter:
+    """A Filter transforms a string containing a C program."""
+    def __init__(self):
+        pass
+
+    def transform(self, s):
+        return s
+
+class CompoundFilt(Filter):
+    """A CompoundFilt runs another set of filters, in sequence."""
+    def __init__(self, items=()):
+        super().__init__()
+        self._filters = list(items)
+
+    def add(self, filt):
+        self._filters.append(filt)
+        return self
+
+    def transform(self, s):
+        for f in self._filters:
+            s = f.transform(s)
+
+        return s
+
+class SplitError(Exception):
+    """Exception: raised if split_comments() can't understand a C file."""
+    pass
+
+def split_comments(s):
+    r"""Iterate over the C code in 's', and yield a sequence of (code,
+       comment) pairs.  Each pair will contain either a nonempty piece
+       of code, a nonempty comment, or both.
+
+       >>> list(split_comments("hello // world\n"))
+       [('hello ', '// world'), ('\n', '')]
+
+       >>> list(split_comments("a /* b cd */ efg // hi"))
+       [('a ', '/* b cd */'), (' efg ', '// hi')]
+    """
+
+    # Matches a block of code without any comments.
+    PAT_CODE = re.compile(r'''^(?: [^/"']+ |
+                                   "(?:[^\\"]+|\\.)*" |
+                                   '(?:[^\\']+|\\.)*' |
+                                   /[^/*]
+                               )*''', re.VERBOSE|re.DOTALL)
+
+    # Matches a C99 "//" comment.
+    PAT_C99_COMMENT = re.compile(r'^//.*$', re.MULTILINE)
+
+    # Matches a C "/*  */" comment.
+    PAT_C_COMMENT = re.compile(r'^/\*(?:[^*]|\*+[^*/])*\*+/', re.DOTALL)
+
+    while True:
+        # Find some non-comment code at the start of the string.
+        m = PAT_CODE.match(s)
+
+        # If we found some code here, save it and advance the string.
+        # Otherwise set 'code' to "".
+        if m:
+            code = m.group(0)
+            s = s[m.end():]
+        else:
+            code = ""
+
+        # Now we have a comment, or the end of the string.  Find out which
+        # one, and how long it is.
+        if s.startswith("//"):
+            m = PAT_C99_COMMENT.match(s)
+        else:
+            m = PAT_C_COMMENT.match(s)
+
+        # If we got a comment, save it and advance the string.  Otherwise
+        # set 'comment' to "".
+        if m:
+            comment = m.group(0)
+            s = s[m.end():]
+        else:
+            comment = ""
+
+        # If we found no code and no comment, we should be at the end of
+        # the string...
+        if code == "" and comment == "":
+            if s:
+                # But in case we *aren't* at the end of the string, raise
+                # an error.
+                raise SplitError()
+            # ... all is well, we're done scanning the code.
+            return
+
+        yield (code, comment)
+
+class IgnoreCommentsFilt(Filter):
+    """Wrapper: applies another filter to C code only, excluding comments.
+    """
+    def __init__(self, filt):
+        super().__init__()
+        self._filt = filt
+
+    def transform(self, s):
+        result = []
+        for code, comment in split_comments(s):
+            result.append(self._filt.transform(code))
+            result.append(comment)
+        return "".join(result)
+
+
+class RegexFilt(Filter):
+    """A regex filter applies a regular expression to some C code."""
+    def __init__(self, pat, replacement, flags=0):
+        super().__init__()
+        self._pat = re.compile(pat, flags)
+        self._replacement = replacement
+
+    def transform(self, s):
+        s, _ = self._pat.subn(self._replacement, s)
+        return s
+
+def revise(fname, filt):
+    """Run 'filt' on the contents of the file in 'fname'.  If any
+       changes are made, then replace the file with its new contents.
+       Otherwise, leave the file alone.
+    """
+    contents = open(fname, 'r').read()
+    result = filt.transform(contents)
+    if result == contents:
+        return
+
+    tmpname = "{}_codetool_tmp".format(fname)
+    try:
+        with open(tmpname, 'w') as f:
+            f.write(result)
+            os.rename(tmpname, fname)
+    except:
+        os.unlink(tmpname)
+        raise
+
+##############################
+# Filtering rules.
+##############################
+
+# Make sure that there is a newline after the first comma in a MOCK_IMPL()
+BREAK_MOCK_IMPL = RegexFilt(
+    r'^MOCK_IMPL\(([^,]+),\s*(\S+)',
+    r'MOCK_IMPL(\1,\n\2',
+    re.MULTILINE)
+
+# Make sure there is no newline between } and a loop iteration terminator.
+RESTORE_SMARTLIST_END = RegexFilt(
+    r'}\s*(SMARTLIST|DIGESTMAP|DIGEST256MAP|STRMAP|MAP)_FOREACH_END\s*\(',
+    r'} \1_FOREACH_END (',
+    re.MULTILINE)
+
+F = CompoundFilt()
+F.add(IgnoreCommentsFilt(CompoundFilt([
+    RESTORE_SMARTLIST_END,
+    BREAK_MOCK_IMPL])))
+
+if __name__ == '__main__':
+    for fname in sys.argv[1:]:
+        revise(fname, F)
author	Nick Mathewson <nickm@torproject.org>	2020-03-05 08:23:32 -0500
committer	Nick Mathewson <nickm@torproject.org>	2020-03-05 08:23:32 -0500
commit	686494f0f71b9235399b8241aba3e0c2fcb03ea1 (patch)
tree	39256173e40ef50d45cc35c22561da9279f9753b /scripts/maint
parent	edc0bf5089df13d1d6a246e67bddb484ac99ad59 (diff)
parent	b5ccdd978ea138cde92b3513c9d653ba18b8b463 (diff)
download	tor-686494f0f71b9235399b8241aba3e0c2fcb03ea1.tar.gz tor-686494f0f71b9235399b8241aba3e0c2fcb03ea1.zip