xref: /illumos-gate/usr/src/tools/scripts/git-pbchk.py (revision dfc115332c94a2f62058ac7f2bce7631fbd20b3d)
1#!@PYTHON@
2#
3#  This program is free software; you can redistribute it and/or modify
4#  it under the terms of the GNU General Public License version 2
5#  as published by the Free Software Foundation.
6#
7#  This program is distributed in the hope that it will be useful,
8#  but WITHOUT ANY WARRANTY; without even the implied warranty of
9#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10#  GNU General Public License for more details.
11#
12#  You should have received a copy of the GNU General Public License
13#  along with this program; if not, write to the Free Software
14#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15#
16
17#
18# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
19# Copyright 2008, 2012 Richard Lowe
20# Copyright 2014 Garrett D'Amore <garrett@damore.org>
21# Copyright (c) 2014, Joyent, Inc.
22# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
23#
24
25import getopt
26import os
27import re
28import subprocess
29import sys
30import tempfile
31
32from cStringIO import StringIO
33
34#
35# Adjust the load path based on our location and the version of python into
36# which it is being loaded.  This assumes the normal onbld directory
37# structure, where we are in bin/ and the modules are in
38# lib/python(version)?/onbld/Scm/.  If that changes so too must this.
39#
40sys.path.insert(1, os.path.join(os.path.dirname(__file__), "..", "lib",
41                                "python%d.%d" % sys.version_info[:2]))
42
43#
44# Add the relative path to usr/src/tools to the load path, such that when run
45# from the source tree we use the modules also within the source tree.
46#
47sys.path.insert(2, os.path.join(os.path.dirname(__file__), ".."))
48
49from onbld.Scm import Ignore
50from onbld.Checks import Comments, Copyright, CStyle, HdrChk
51from onbld.Checks import JStyle, Keywords, ManLint, Mapfile, SpellCheck
52
53
54class GitError(Exception):
55    pass
56
57def git(command):
58    """Run a command and return a stream containing its stdout (and write its
59    stderr to its stdout)"""
60
61    if type(command) != list:
62        command = command.split()
63
64    command = ["git"] + command
65
66    try:
67        tmpfile = tempfile.TemporaryFile(prefix="git-nits")
68    except EnvironmentError, e:
69        raise GitError("Could not create temporary file: %s\n" % e)
70
71    try:
72        p = subprocess.Popen(command,
73                             stdout=tmpfile,
74                             stderr=subprocess.PIPE)
75    except OSError, e:
76        raise GitError("could not execute %s: %s\n" % (command, e))
77
78    err = p.wait()
79    if err != 0:
80        raise GitError(p.stderr.read())
81
82    tmpfile.seek(0)
83    return tmpfile
84
85
86def git_root():
87    """Return the root of the current git workspace"""
88
89    p = git('rev-parse --git-dir')
90
91    if not p:
92        sys.stderr.write("Failed finding git workspace\n")
93        sys.exit(err)
94
95    return os.path.abspath(os.path.join(p.readlines()[0],
96                                        os.path.pardir))
97
98
99def git_branch():
100    """Return the current git branch"""
101
102    p = git('branch')
103
104    if not p:
105        sys.stderr.write("Failed finding git branch\n")
106        sys.exit(err)
107
108    for elt in p:
109        if elt[0] == '*':
110            if elt.endswith('(no branch)'):
111                return None
112            return elt.split()[1]
113
114
115def git_parent_branch(branch):
116    """Return the parent of the current git branch.
117
118    If this branch tracks a remote branch, return the remote branch which is
119    tracked.  If not, default to origin/master."""
120
121    if not branch:
122        return None
123
124    p = git("for-each-ref --format=%(refname:short) %(upstream:short) " +
125            "refs/heads/")
126
127    if not p:
128        sys.stderr.write("Failed finding git parent branch\n")
129        sys.exit(err)
130
131    for line in p:
132        # Git 1.7 will leave a ' ' trailing any non-tracking branch
133        if ' ' in line and not line.endswith(' \n'):
134            local, remote = line.split()
135            if local == branch:
136                return remote
137    return 'origin/master'
138
139
140def git_comments(parent):
141    """Return a list of any checkin comments on this git branch"""
142
143    p = git('log --pretty=tformat:%%B:SEP: %s..' % parent)
144
145    if not p:
146        sys.stderr.write("Failed getting git comments\n")
147        sys.exit(err)
148
149    return [x.strip() for x in p.readlines() if x != ':SEP:\n']
150
151
152def git_file_list(parent, paths=None):
153    """Return the set of files which have ever changed on this branch.
154
155    NB: This includes files which no longer exist, or no longer actually
156    differ."""
157
158    p = git("log --name-only --pretty=format: %s.. %s" %
159             (parent, ' '.join(paths)))
160
161    if not p:
162        sys.stderr.write("Failed building file-list from git\n")
163        sys.exit(err)
164
165    ret = set()
166    for fname in p:
167        if fname and not fname.isspace() and fname not in ret:
168            ret.add(fname.strip())
169
170    return ret
171
172
173def not_check(root, cmd):
174    """Return a function which returns True if a file given as an argument
175    should be excluded from the check named by 'cmd'"""
176
177    ignorefiles = filter(os.path.exists,
178                         [os.path.join(root, ".git", "%s.NOT" % cmd),
179                          os.path.join(root, "exception_lists", cmd)])
180    return Ignore.ignore(root, ignorefiles)
181
182
183def gen_files(root, parent, paths, exclude):
184    """Return a function producing file names, relative to the current
185    directory, of any file changed on this branch (limited to 'paths' if
186    requested), and excluding files for which exclude returns a true value """
187
188    # Taken entirely from Python 2.6's os.path.relpath which we would use if we
189    # could.
190    def relpath(path, here):
191        c = os.path.abspath(os.path.join(root, path)).split(os.path.sep)
192        s = os.path.abspath(here).split(os.path.sep)
193        l = len(os.path.commonprefix((s, c)))
194        return os.path.join(*[os.path.pardir] * (len(s)-l) + c[l:])
195
196    def ret(select=None):
197        if not select:
198            select = lambda x: True
199
200        for f in git_file_list(parent, paths):
201            f = relpath(f, '.')
202            try:
203                res = git("diff %s HEAD %s" % (parent, f))
204            except GitError, e:
205                # This ignores all the errors that can be thrown. Usually, this means
206                # that git returned non-zero because the file doesn't exist, but it
207                # could also fail if git can't create a new file or it can't be
208                # executed.  Such errors are 1) unlikely, and 2) will be caught by other
209                # invocations of git().
210                continue
211            empty = not res.readline()
212            if (os.path.isfile(f) and not empty and select(f) and not exclude(f)):
213                yield f
214    return ret
215
216
217def comchk(root, parent, flist, output):
218    output.write("Comments:\n")
219
220    return Comments.comchk(git_comments(parent), check_db=True,
221                           output=output)
222
223
224def mapfilechk(root, parent, flist, output):
225    ret = 0
226
227    # We are interested in examining any file that has the following
228    # in its final path segment:
229    #    - Contains the word 'mapfile'
230    #    - Begins with 'map.'
231    #    - Ends with '.map'
232    # We don't want to match unless these things occur in final path segment
233    # because directory names with these strings don't indicate a mapfile.
234    # We also ignore files with suffixes that tell us that the files
235    # are not mapfiles.
236    MapfileRE = re.compile(r'.*((mapfile[^/]*)|(/map\.+[^/]*)|(\.map))$',
237        re.IGNORECASE)
238    NotMapSuffixRE = re.compile(r'.*\.[ch]$', re.IGNORECASE)
239
240    output.write("Mapfile comments:\n")
241
242    for f in flist(lambda x: MapfileRE.match(x) and not
243                   NotMapSuffixRE.match(x)):
244        fh = open(f, 'r')
245        ret |= Mapfile.mapfilechk(fh, output=output)
246        fh.close()
247    return ret
248
249
250def copyright(root, parent, flist, output):
251    ret = 0
252    output.write("Copyrights:\n")
253    for f in flist():
254        fh = open(f, 'r')
255        ret |= Copyright.copyright(fh, output=output)
256        fh.close()
257    return ret
258
259
260def hdrchk(root, parent, flist, output):
261    ret = 0
262    output.write("Header format:\n")
263    for f in flist(lambda x: x.endswith('.h')):
264        fh = open(f, 'r')
265        ret |= HdrChk.hdrchk(fh, lenient=True, output=output)
266        fh.close()
267    return ret
268
269
270def cstyle(root, parent, flist, output):
271    ret = 0
272    output.write("C style:\n")
273    for f in flist(lambda x: x.endswith('.c') or x.endswith('.h')):
274        fh = open(f, 'r')
275        ret |= CStyle.cstyle(fh, output=output, picky=True,
276                             check_posix_types=True,
277                             check_continuation=True)
278        fh.close()
279    return ret
280
281
282def jstyle(root, parent, flist, output):
283    ret = 0
284    output.write("Java style:\n")
285    for f in flist(lambda x: x.endswith('.java')):
286        fh = open(f, 'r')
287        ret |= JStyle.jstyle(fh, output=output, picky=True)
288        fh.close()
289    return ret
290
291
292def manlint(root, parent, flist, output):
293    ret = 0
294    output.write("Man page format/spelling:\n")
295    ManfileRE = re.compile(r'.*\.[0-9][a-z]*$', re.IGNORECASE)
296    for f in flist(lambda x: ManfileRE.match(x)):
297        fh = open(f, 'r')
298        ret |= ManLint.manlint(fh, output=output, picky=True)
299        ret |= SpellCheck.spellcheck(fh, output=output)
300        fh.close()
301    return ret
302
303def keywords(root, parent, flist, output):
304    ret = 0
305    output.write("SCCS Keywords:\n")
306    for f in flist():
307        fh = open(f, 'r')
308        ret |= Keywords.keywords(fh, output=output)
309        fh.close()
310    return ret
311
312
313def run_checks(root, parent, cmds, paths='', opts={}):
314    """Run the checks given in 'cmds', expected to have well-known signatures,
315    and report results for any which fail.
316
317    Return failure if any of them did.
318
319    NB: the function name of the commands passed in is used to name the NOT
320    file which excepts files from them."""
321
322    ret = 0
323
324    for cmd in cmds:
325        s = StringIO()
326
327        exclude = not_check(root, cmd.func_name)
328        result = cmd(root, parent, gen_files(root, parent, paths, exclude),
329                     output=s)
330        ret |= result
331
332        if result != 0:
333            print s.getvalue()
334
335    return ret
336
337
338def nits(root, parent, paths):
339    cmds = [copyright,
340            cstyle,
341            hdrchk,
342            jstyle,
343            keywords,
344            manlint,
345            mapfilechk]
346    run_checks(root, parent, cmds, paths)
347
348
349def pbchk(root, parent, paths):
350    cmds = [comchk,
351            copyright,
352            cstyle,
353            hdrchk,
354            jstyle,
355            keywords,
356            manlint,
357            mapfilechk]
358    run_checks(root, parent, cmds)
359
360
361def main(cmd, args):
362    parent_branch = None
363
364    try:
365        opts, args = getopt.getopt(args, 'b:')
366    except getopt.GetoptError, e:
367        sys.stderr.write(str(e) + '\n')
368        sys.stderr.write("Usage: %s [-b branch] [path...]\n" % cmd)
369        sys.exit(1)
370
371    for opt, arg in opts:
372        if opt == '-b':
373            parent_branch = arg
374
375    if not parent_branch:
376        parent_branch = git_parent_branch(git_branch())
377
378    func = nits
379    if cmd == 'git-pbchk':
380        func = pbchk
381        if args:
382            sys.stderr.write("only complete workspaces may be pbchk'd\n");
383            sys.exit(1)
384
385    func(git_root(), parent_branch, args)
386
387if __name__ == '__main__':
388    try:
389        main(os.path.basename(sys.argv[0]), sys.argv[1:])
390    except GitError, e:
391        sys.stderr.write("failed to run git:\n %s\n" % str(e))
392        sys.exit(1)
393