summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2020-03-05 08:23:32 -0500
committerNick Mathewson <nickm@torproject.org>2020-03-05 08:23:32 -0500
commit686494f0f71b9235399b8241aba3e0c2fcb03ea1 (patch)
tree39256173e40ef50d45cc35c22561da9279f9753b /scripts
parentedc0bf5089df13d1d6a246e67bddb484ac99ad59 (diff)
parentb5ccdd978ea138cde92b3513c9d653ba18b8b463 (diff)
downloadtor-686494f0f71b9235399b8241aba3e0c2fcb03ea1.tar.gz
tor-686494f0f71b9235399b8241aba3e0c2fcb03ea1.zip
Merge branch 'clang_format_prep_3'
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/maint/checkSpace.pl47
-rw-r--r--scripts/maint/checkspace_tests/expected.txt1
-rwxr-xr-xscripts/maint/clang-format.sh41
-rwxr-xr-xscripts/maint/codetool.py182
4 files changed, 253 insertions, 18 deletions
diff --git a/scripts/maint/checkSpace.pl b/scripts/maint/checkSpace.pl
index f4e6f733c8..857ce6f6f1 100755
--- a/scripts/maint/checkSpace.pl
+++ b/scripts/maint/checkSpace.pl
@@ -23,6 +23,25 @@ if ($ARGV[0] =~ /^-/) {
$C = ($lang eq '-C');
}
+# hashmap of things where we allow spaces between them and (.
+our %allow_space_after= map {$_, 1} qw{
+ if while for switch return int unsigned elsif WINAPI
+ void __attribute__ op size_t double uint64_t
+ bool ssize_t
+ workqueue_reply_t hs_desc_decode_status_t
+ PRStatus
+ SMARTLIST_FOREACH_BEGIN SMARTLIST_FOREACH_END
+ HT_FOREACH
+ DIGESTMAP_FOREACH_MODIFY DIGESTMAP_FOREACH
+ DIGEST256MAP_FOREACH_MODIFY DIGEST256MAP_FOREACH
+ STRMAP_FOREACH_MODIFY STRMAP_FOREACH
+ SDMAP_FOREACH EIMAP_FOREACH RIMAP_FOREACH
+ MAP_FOREACH_MODIFY MAP_FOREACH
+ TOR_SIMPLEQ_FOREACH TOR_SIMPLEQ_FOREACH_SAFE
+ TOR_LIST_FOREACH TOR_LIST_FOREACH_SAFE
+ TOR_SLIST_FOREACH TOR_SLIST_FOREACH_SAFE
+};
+
our %basenames = ();
our %guardnames = ();
@@ -58,9 +77,9 @@ for my $fn (@ARGV) {
}
## Warn about labels that don't have a space in front of them
# (We indent every label at least one space)
- if (/^[a-zA-Z_][a-zA-Z_0-9]*:/) {
- msg "nosplabel:$fn:$.\n";
- }
+ #if (/^[a-zA-Z_][a-zA-Z_0-9]*:/) {
+ # msg "nosplabel:$fn:$.\n";
+ #}
## Warn about trailing whitespace.
# (We don't allow whitespace at the end of the line; make your
# editor highlight it for you so you can stop adding it in.)
@@ -111,7 +130,7 @@ for my $fn (@ARGV) {
## Terminals are still 80 columns wide in my world. I refuse to
## accept double-line lines.
# (Don't make lines wider than 80 characters, including newline.)
- if (/^.{80}/) {
+ if (/^.{80}/ and not /LCOV_EXCL/) {
msg "Wide:$fn:$.\n";
}
### Juju to skip over comments and strings, since the tests
@@ -128,12 +147,12 @@ for my $fn (@ARGV) {
if ($isheader) {
if ($seenguard == 0) {
- if (/ifndef\s+(\S+)/) {
+ if (/^\s*\#\s*ifndef\s+(\S+)/) {
++$seenguard;
$guardname = $1;
}
} elsif ($seenguard == 1) {
- if (/^\#define (\S+)/) {
+ if (/^\s*\#\s*define (\S+)/) {
++$seenguard;
if ($1 ne $guardname) {
msg "GUARD:$fn:$.: Header guard macro mismatch.\n";
@@ -156,9 +175,8 @@ for my $fn (@ARGV) {
# msg "//:$fn:$.\n";
s!//.*!!;
}
- ## Warn about unquoted braces preceded by non-space.
- # (No character except a space should come before a {)
- if (/([^\s'])\{/) {
+ ## Warn about unquoted braces preceded by unexpected character.
+ if (/([^\s'\)\(\{])\{/) {
msg "$1\{:$fn:$.\n";
}
## Warn about double semi-colons at the end of a line.
@@ -178,12 +196,7 @@ for my $fn (@ARGV) {
# (Don't put a space between the name of a function and its
# arguments.)
if (/(\w+)\s\(([A-Z]*)/) {
- if ($1 ne "if" and $1 ne "while" and $1 ne "for" and
- $1 ne "switch" and $1 ne "return" and $1 ne "int" and
- $1 ne "elsif" and $1 ne "WINAPI" and $2 ne "WINAPI" and
- $1 ne "void" and $1 ne "__attribute__" and $1 ne "op" and
- $1 ne "size_t" and $1 ne "double" and $1 ne "uint64_t" and
- $1 ne "workqueue_reply_t" and $1 ne "bool") {
+ if (! $allow_space_after{$1} && $2 ne 'WINAPI') {
msg "fn ():$fn:$.\n";
}
}
@@ -194,8 +207,8 @@ for my $fn (@ARGV) {
if ($in_func_head ||
($fn !~ /\.h$/ && /^[a-zA-Z0-9_]/ &&
! /^(?:const |static )*(?:typedef|struct|union)[^\(]*$/ &&
- ! /= *\{$/ && ! /;$/)) {
- if (/.\{$/){
+ ! /= *\{$/ && ! /;$/) && ! /^[a-zA-Z0-9_]+\s*:/) {
+ if (/[^,\s]\s*\{$/){
msg "fn() {:$fn:$.\n";
$in_func_head = 0;
} elsif (/^\S[^\(]* +\**[a-zA-Z0-9_]+\(/) {
diff --git a/scripts/maint/checkspace_tests/expected.txt b/scripts/maint/checkspace_tests/expected.txt
index 935b750ef9..38595ed373 100644
--- a/scripts/maint/checkspace_tests/expected.txt
+++ b/scripts/maint/checkspace_tests/expected.txt
@@ -5,7 +5,6 @@
tp fn():./dubious.c:15
Wide:./dubious.c:17
TAB:./dubious.c:24
- nosplabel:./dubious.c:26
CR:./dubious.c:30
Space@EOL:./dubious.c:32
non-K&R {:./dubious.c:39
diff --git a/scripts/maint/clang-format.sh b/scripts/maint/clang-format.sh
new file mode 100755
index 0000000000..59832117b4
--- /dev/null
+++ b/scripts/maint/clang-format.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+# Copyright 2020, The Tor Project, Inc.
+# See LICENSE for licensing information.
+
+#
+# DO NOT COMMIT OR MERGE CODE THAT IS RUN THROUGH THIS TOOL YET.
+#
+# WE ARE STILL DISCUSSING OUR DESIRED STYLE AND ITERATING ON IT.
+# (12 Feb 2020)
+#
+
+# This script runs "clang-format" and "codetool" in sequence over each of
+# our source files, and replaces the original file only if it has changed.
+#
+# We can't just use clang-format -i, since we also want to use codetool to
+# reformat a few things back to how we want them, and we want avoid changing
+# the mtime on files that didn't actually change.
+
+set -e
+
+cd "$(dirname "$0")/../../src/"
+
+# Shellcheck complains that a for loop over find's output is unreliable,
+# since there might be special characters in the output. But we happen
+# to know that none of our C files have special characters or spaces in
+# their names, so this is safe.
+#
+# shellcheck disable=SC2044
+for fname in $(find lib core feature app test tools -name '[^.]*.[ch]'); do
+ tmpfname="${fname}.clang_fmt.tmp"
+ rm -f "${tmpfname}"
+ clang-format --style=file "${fname}" > "${tmpfname}"
+ ../scripts/maint/codetool.py "${tmpfname}"
+ if cmp "${fname}" "${tmpfname}" >/dev/null 2>&1; then
+ echo "No change in ${fname}"
+ rm -f "${tmpfname}"
+ else
+ echo "Change in ${fname}"
+ mv "${tmpfname}" "${fname}"
+ fi
+done
diff --git a/scripts/maint/codetool.py b/scripts/maint/codetool.py
new file mode 100755
index 0000000000..725712c0cc
--- /dev/null
+++ b/scripts/maint/codetool.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+# Copyright (c) 2020, The Tor Project, Inc.
+# See LICENSE for licensing information.
+
+#
+# DO NOT COMMIT OR MERGE CODE THAT IS RUN THROUGH THIS TOOL YET.
+#
+# WE ARE STILL DISCUSSING OUR DESIRED STYLE AND ITERATING ON IT,
+# ALONG WITH THE TOOLS THAT ACHIEVE IT.
+# (12 Feb 2020)
+#
+
+"""
+ This program uses a set of plugable filters to inspect and transform
+ our C code.
+"""
+
+import os
+import re
+import sys
+
+class Filter:
+ """A Filter transforms a string containing a C program."""
+ def __init__(self):
+ pass
+
+ def transform(self, s):
+ return s
+
+class CompoundFilt(Filter):
+ """A CompoundFilt runs another set of filters, in sequence."""
+ def __init__(self, items=()):
+ super().__init__()
+ self._filters = list(items)
+
+ def add(self, filt):
+ self._filters.append(filt)
+ return self
+
+ def transform(self, s):
+ for f in self._filters:
+ s = f.transform(s)
+
+ return s
+
+class SplitError(Exception):
+ """Exception: raised if split_comments() can't understand a C file."""
+ pass
+
+def split_comments(s):
+ r"""Iterate over the C code in 's', and yield a sequence of (code,
+ comment) pairs. Each pair will contain either a nonempty piece
+ of code, a nonempty comment, or both.
+
+ >>> list(split_comments("hello // world\n"))
+ [('hello ', '// world'), ('\n', '')]
+
+ >>> list(split_comments("a /* b cd */ efg // hi"))
+ [('a ', '/* b cd */'), (' efg ', '// hi')]
+ """
+
+ # Matches a block of code without any comments.
+ PAT_CODE = re.compile(r'''^(?: [^/"']+ |
+ "(?:[^\\"]+|\\.)*" |
+ '(?:[^\\']+|\\.)*' |
+ /[^/*]
+ )*''', re.VERBOSE|re.DOTALL)
+
+ # Matches a C99 "//" comment.
+ PAT_C99_COMMENT = re.compile(r'^//.*$', re.MULTILINE)
+
+ # Matches a C "/* */" comment.
+ PAT_C_COMMENT = re.compile(r'^/\*(?:[^*]|\*+[^*/])*\*+/', re.DOTALL)
+
+ while True:
+ # Find some non-comment code at the start of the string.
+ m = PAT_CODE.match(s)
+
+ # If we found some code here, save it and advance the string.
+ # Otherwise set 'code' to "".
+ if m:
+ code = m.group(0)
+ s = s[m.end():]
+ else:
+ code = ""
+
+ # Now we have a comment, or the end of the string. Find out which
+ # one, and how long it is.
+ if s.startswith("//"):
+ m = PAT_C99_COMMENT.match(s)
+ else:
+ m = PAT_C_COMMENT.match(s)
+
+ # If we got a comment, save it and advance the string. Otherwise
+ # set 'comment' to "".
+ if m:
+ comment = m.group(0)
+ s = s[m.end():]
+ else:
+ comment = ""
+
+ # If we found no code and no comment, we should be at the end of
+ # the string...
+ if code == "" and comment == "":
+ if s:
+ # But in case we *aren't* at the end of the string, raise
+ # an error.
+ raise SplitError()
+ # ... all is well, we're done scanning the code.
+ return
+
+ yield (code, comment)
+
+class IgnoreCommentsFilt(Filter):
+ """Wrapper: applies another filter to C code only, excluding comments.
+ """
+ def __init__(self, filt):
+ super().__init__()
+ self._filt = filt
+
+ def transform(self, s):
+ result = []
+ for code, comment in split_comments(s):
+ result.append(self._filt.transform(code))
+ result.append(comment)
+ return "".join(result)
+
+
+class RegexFilt(Filter):
+ """A regex filter applies a regular expression to some C code."""
+ def __init__(self, pat, replacement, flags=0):
+ super().__init__()
+ self._pat = re.compile(pat, flags)
+ self._replacement = replacement
+
+ def transform(self, s):
+ s, _ = self._pat.subn(self._replacement, s)
+ return s
+
+def revise(fname, filt):
+ """Run 'filt' on the contents of the file in 'fname'. If any
+ changes are made, then replace the file with its new contents.
+ Otherwise, leave the file alone.
+ """
+ contents = open(fname, 'r').read()
+ result = filt.transform(contents)
+ if result == contents:
+ return
+
+ tmpname = "{}_codetool_tmp".format(fname)
+ try:
+ with open(tmpname, 'w') as f:
+ f.write(result)
+ os.rename(tmpname, fname)
+ except:
+ os.unlink(tmpname)
+ raise
+
+##############################
+# Filtering rules.
+##############################
+
+# Make sure that there is a newline after the first comma in a MOCK_IMPL()
+BREAK_MOCK_IMPL = RegexFilt(
+ r'^MOCK_IMPL\(([^,]+),\s*(\S+)',
+ r'MOCK_IMPL(\1,\n\2',
+ re.MULTILINE)
+
+# Make sure there is no newline between } and a loop iteration terminator.
+RESTORE_SMARTLIST_END = RegexFilt(
+ r'}\s*(SMARTLIST|DIGESTMAP|DIGEST256MAP|STRMAP|MAP)_FOREACH_END\s*\(',
+ r'} \1_FOREACH_END (',
+ re.MULTILINE)
+
+F = CompoundFilt()
+F.add(IgnoreCommentsFilt(CompoundFilt([
+ RESTORE_SMARTLIST_END,
+ BREAK_MOCK_IMPL])))
+
+if __name__ == '__main__':
+ for fname in sys.argv[1:]:
+ revise(fname, F)