scripts/maint/codetool.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

#!/usr/bin/env python3
# Copyright (c) 2020, The Tor Project, Inc.
# See LICENSE for licensing information.

#
# DO NOT COMMIT OR MERGE CODE THAT IS RUN THROUGH THIS TOOL YET.
#
# WE ARE STILL DISCUSSING OUR DESIRED STYLE AND ITERATING ON IT,
# ALONG WITH THE TOOLS THAT ACHIEVE IT.
#     (12 Feb 2020)
#

"""
   This program uses a set of plugable filters to inspect and transform
   our C code.
"""

import os
import re
import sys

class Filter:
    """A Filter transforms a string containing a C program."""
    def __init__(self):
        pass

    def transform(self, s):
        return s

class CompoundFilt(Filter):
    """A CompoundFilt runs another set of filters, in sequence."""
    def __init__(self, items=()):
        super().__init__()
        self._filters = list(items)

    def add(self, filt):
        self._filters.append(filt)
        return self

    def transform(self, s):
        for f in self._filters:
            s = f.transform(s)

        return s

class SplitError(Exception):
    """Exception: raised if split_comments() can't understand a C file."""
    pass

def split_comments(s):
    r"""Iterate over the C code in 's', and yield a sequence of (code,
       comment) pairs.  Each pair will contain either a nonempty piece
       of code, a nonempty comment, or both.

       >>> list(split_comments("hello // world\n"))
       [('hello ', '// world'), ('\n', '')]

       >>> list(split_comments("a /* b cd */ efg // hi"))
       [('a ', '/* b cd */'), (' efg ', '// hi')]
    """

    # Matches a block of code without any comments.
    PAT_CODE = re.compile(r'''^(?: [^/"']+ |
                                   "(?:[^\\"]+|\\.)*" |
                                   '(?:[^\\']+|\\.)*' |
                                   /[^/*]
                               )*''', re.VERBOSE|re.DOTALL)

    # Matches a C99 "//" comment.
    PAT_C99_COMMENT = re.compile(r'^//.*$', re.MULTILINE)

    # Matches a C "/*  */" comment.
    PAT_C_COMMENT = re.compile(r'^/\*(?:[^*]|\*+[^*/])*\*+/', re.DOTALL)

    while True:
        # Find some non-comment code at the start of the string.
        m = PAT_CODE.match(s)

        # If we found some code here, save it and advance the string.
        # Otherwise set 'code' to "".
        if m:
            code = m.group(0)
            s = s[m.end():]
        else:
            code = ""

        # Now we have a comment, or the end of the string.  Find out which
        # one, and how long it is.
        if s.startswith("//"):
            m = PAT_C99_COMMENT.match(s)
        else:
            m = PAT_C_COMMENT.match(s)

        # If we got a comment, save it and advance the string.  Otherwise
        # set 'comment' to "".
        if m:
            comment = m.group(0)
            s = s[m.end():]
        else:
            comment = ""

        # If we found no code and no comment, we should be at the end of
        # the string...
        if code == "" and comment == "":
            if s:
                # But in case we *aren't* at the end of the string, raise
                # an error.
                raise SplitError()
            # ... all is well, we're done scanning the code.
            return

        yield (code, comment)

class IgnoreCommentsFilt(Filter):
    """Wrapper: applies another filter to C code only, excluding comments.
    """
    def __init__(self, filt):
        super().__init__()
        self._filt = filt

    def transform(self, s):
        result = []
        for code, comment in split_comments(s):
            result.append(self._filt.transform(code))
            result.append(comment)
        return "".join(result)


class RegexFilt(Filter):
    """A regex filter applies a regular expression to some C code."""
    def __init__(self, pat, replacement, flags=0):
        super().__init__()
        self._pat = re.compile(pat, flags)
        self._replacement = replacement

    def transform(self, s):
        s, _ = self._pat.subn(self._replacement, s)
        return s

def revise(fname, filt):
    """Run 'filt' on the contents of the file in 'fname'.  If any
       changes are made, then replace the file with its new contents.
       Otherwise, leave the file alone.
    """
    contents = open(fname, 'r').read()
    result = filt.transform(contents)
    if result == contents:
        return

    tmpname = "{}_codetool_tmp".format(fname)
    try:
        with open(tmpname, 'w') as f:
            f.write(result)
            os.rename(tmpname, fname)
    except:
        os.unlink(tmpname)
        raise

##############################
# Filtering rules.
##############################

# Make sure that there is a newline after the first comma in a MOCK_IMPL()
BREAK_MOCK_IMPL = RegexFilt(
    r'^MOCK_IMPL\(([^,]+),\s*(\S+)',
    r'MOCK_IMPL(\1,\n\2',
    re.MULTILINE)

# Make sure there is no newline between } and a loop iteration terminator.
RESTORE_SMARTLIST_END = RegexFilt(
    r'}\s*(SMARTLIST|DIGESTMAP|DIGEST256MAP|STRMAP|MAP)_FOREACH_END\s*\(',
    r'} \1_FOREACH_END (',
    re.MULTILINE)

F = CompoundFilt()
F.add(IgnoreCommentsFilt(CompoundFilt([
    RESTORE_SMARTLIST_END,
    BREAK_MOCK_IMPL])))

if __name__ == '__main__':
    for fname in sys.argv[1:]:
        revise(fname, F)