xref: /illumos-gate/usr/src/tools/scripts/git-pbchk.py (revision 10a40e179c111088c21d8e895198ac95dcb83d14)
1#!@TOOLS_PYTHON@ -Es
2#
3#  This program is free software; you can redistribute it and/or modify
4#  it under the terms of the GNU General Public License version 2
5#  as published by the Free Software Foundation.
6#
7#  This program is distributed in the hope that it will be useful,
8#  but WITHOUT ANY WARRANTY; without even the implied warranty of
9#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10#  GNU General Public License for more details.
11#
12#  You should have received a copy of the GNU General Public License
13#  along with this program; if not, write to the Free Software
14#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15#
16
17#
18# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
19# Copyright 2008, 2012 Richard Lowe
20# Copyright 2019 Garrett D'Amore <garrett@damore.org>
21# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
22# Copyright 2016 Nexenta Systems, Inc.
23# Copyright (c) 2019, Joyent, Inc.
24# Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
25# Copyright 2023 Bill Sommerfeld
26#
27
28from __future__ import print_function
29
30import getopt
31import io
32import os
33import re
34import subprocess
35import sys
36import tempfile
37
38if sys.version_info[0] < 3:
39    from cStringIO import StringIO
40else:
41    from io import StringIO
42
43#
44# Adjust the load path based on our location and the version of python into
45# which it is being loaded.  This assumes the normal onbld directory
46# structure, where we are in bin/ and the modules are in
47# lib/python(version)?/onbld/Scm/.  If that changes so too must this.
48#
49sys.path.insert(1, os.path.join(os.path.dirname(__file__), "..", "lib",
50                                "python%d.%d" % sys.version_info[:2]))
51
52#
53# Add the relative path to usr/src/tools to the load path, such that when run
54# from the source tree we use the modules also within the source tree.
55#
56sys.path.insert(2, os.path.join(os.path.dirname(__file__), ".."))
57
58from onbld.Scm import Ignore
59from onbld.Checks import Comments, Copyright, CStyle, HdrChk, WsCheck
60from onbld.Checks import JStyle, Keywords, ManLint, Mapfile, SpellCheck
61from onbld.Checks import ShellLint, PkgFmt
62
63class GitError(Exception):
64    pass
65
66def git(command):
67    """Run a command and return a stream containing its stdout (and write its
68    stderr to its stdout)"""
69
70    if type(command) != list:
71        command = command.split()
72
73    command = ["git"] + command
74
75    try:
76        tmpfile = tempfile.TemporaryFile(prefix="git-nits", mode="w+b")
77    except EnvironmentError as e:
78        raise GitError("Could not create temporary file: %s\n" % e)
79
80    try:
81        p = subprocess.Popen(command,
82                             stdout=tmpfile,
83                             stderr=subprocess.PIPE)
84    except OSError as e:
85        raise GitError("could not execute %s: %s\n" % (command, e))
86
87    err = p.wait()
88    if err != 0:
89        raise GitError(p.stderr.read())
90
91    tmpfile.seek(0)
92    lines = []
93    for l in tmpfile:
94        lines.append(l.decode('utf-8', 'replace'))
95    return lines
96
97def git_root():
98    """Return the root of the current git workspace"""
99
100    p = git('rev-parse --git-dir')
101    dir = p[0]
102
103    return os.path.abspath(os.path.join(dir, os.path.pardir))
104
105def git_branch():
106    """Return the current git branch"""
107
108    p = git('branch')
109
110    for elt in p:
111        if elt[0] == '*':
112            if elt.endswith('(no branch)'):
113                return None
114            return elt.split()[1]
115
116def git_parent_branch(branch):
117    """Return the parent of the current git branch.
118
119    If this branch tracks a remote branch, return the remote branch which is
120    tracked.  If not, default to origin/master."""
121
122    if not branch:
123        return None
124
125    p = git(["for-each-ref", "--format=%(refname:short) %(upstream:short)",
126            "refs/heads/"])
127
128    if not p:
129        sys.stderr.write("Failed finding git parent branch\n")
130        sys.exit(1)
131
132    for line in p:
133        # Git 1.7 will leave a ' ' trailing any non-tracking branch
134        if ' ' in line and not line.endswith(' \n'):
135            local, remote = line.split()
136            if local == branch:
137                return remote
138    return 'origin/master'
139
140def git_comments(parent):
141    """Return a list of any checkin comments on this git branch"""
142
143    p = git('log --pretty=tformat:%%B:SEP: %s..' % parent)
144
145    if not p:
146        sys.stderr.write("No outgoing changesets found - missing -p option?\n");
147        sys.exit(1)
148
149    return [x.strip() for x in p if x != ':SEP:\n']
150
151def git_file_list(parent, paths=None):
152    """Return the set of files which have ever changed on this branch.
153
154    NB: This includes files which no longer exist, or no longer actually
155    differ."""
156
157    p = git("log --name-only --pretty=format: %s.. %s" %
158             (parent, ' '.join(paths)))
159
160    if not p:
161        sys.stderr.write("Failed building file-list from git\n")
162        sys.exit(1)
163
164    ret = set()
165    for fname in p:
166        if fname and not fname.isspace() and fname not in ret:
167            ret.add(fname.strip())
168
169    return ret
170
171def not_check(root, cmd):
172    """Return a function which returns True if a file given as an argument
173    should be excluded from the check named by 'cmd'"""
174
175    ignorefiles = list(filter(os.path.exists,
176                         [os.path.join(root, ".git/info", "%s.NOT" % cmd),
177                          os.path.join(root, "exception_lists", cmd)]))
178    return Ignore.ignore(root, ignorefiles)
179
180def gen_files(root, parent, paths, exclude, filter=None):
181    """Return a function producing file names, relative to the current
182    directory, of any file changed on this branch (limited to 'paths' if
183    requested), and excluding files for which exclude returns a true value """
184
185    if filter is None:
186        filter = lambda x: os.path.isfile(x)
187
188    # Taken entirely from Python 2.6's os.path.relpath which we would use if we
189    # could.
190    def relpath(path, here):
191        c = os.path.abspath(os.path.join(root, path)).split(os.path.sep)
192        s = os.path.abspath(here).split(os.path.sep)
193        l = len(os.path.commonprefix((s, c)))
194        return os.path.join(*[os.path.pardir] * (len(s)-l) + c[l:])
195
196    def ret(select=None):
197        if not select:
198            select = lambda x: True
199
200        for abspath in git_file_list(parent, paths):
201            path = relpath(abspath, '.')
202            try:
203                res = git("diff %s HEAD %s" % (parent, path))
204            except GitError as e:
205                # This ignores all the errors that can be thrown. Usually, this
206                # means that git returned non-zero because the file doesn't
207                # exist, but it could also fail if git can't create a new file
208                # or it can't be executed.  Such errors are 1) unlikely, and 2)
209                # will be caught by other invocations of git().
210                continue
211            empty = not res
212            if (filter(path) and not empty and
213                select(path) and not exclude(abspath)):
214                yield path
215    return ret
216
217def gen_links(root, parent, paths, exclude):
218    """Return a function producing symbolic link names, relative to the current
219    directory, of any file changed on this branch (limited to 'paths' if
220    requested), and excluding files for which exclude returns a true value """
221
222    return gen_files(root, parent, paths, exclude, lambda x: os.path.islink(x))
223
224def comchk(root, parent, flist, output):
225    output.write("Comments:\n")
226
227    comments = git_comments(parent)
228    if len(comments) > 2:
229        if re.match('^Change-Id: I[0-9a-f]+', comments[-1]):
230            if comments[-2] == '':
231                print('Note: Gerrit Change Id present in comments')
232                comments = comments[:-2]
233
234    return Comments.comchk(comments, check_db=True,
235                           output=output)
236
237
238def mapfilechk(root, parent, flist, output):
239    ret = 0
240
241    # We are interested in examining any file that has the following
242    # in its final path segment:
243    #    - Contains the word 'mapfile'
244    #    - Begins with 'map.'
245    #    - Ends with '.map'
246    # We don't want to match unless these things occur in final path segment
247    # because directory names with these strings don't indicate a mapfile.
248    # We also ignore files with suffixes that tell us that the files
249    # are not mapfiles.
250    MapfileRE = re.compile(r'.*((mapfile[^/]*)|(/map\.+[^/]*)|(\.map))$',
251        re.IGNORECASE)
252    NotMapSuffixRE = re.compile(r'.*\.[ch]$', re.IGNORECASE)
253
254    output.write("Mapfile comments:\n")
255
256    for f in flist(lambda x: MapfileRE.match(x) and not
257                   NotMapSuffixRE.match(x)):
258        with io.open(f, encoding='utf-8', errors='replace') as fh:
259            ret |= Mapfile.mapfilechk(fh, output=output)
260    return ret
261
262def copyright(root, parent, flist, output):
263    ret = 0
264    output.write("Copyrights:\n")
265    for f in flist():
266        with io.open(f, encoding='utf-8', errors='replace') as fh:
267            ret |= Copyright.copyright(fh, output=output)
268    return ret
269
270def hdrchk(root, parent, flist, output):
271    ret = 0
272    output.write("Header format:\n")
273    for f in flist(lambda x: x.endswith('.h')):
274        with io.open(f, encoding='utf-8', errors='replace') as fh:
275            ret |= HdrChk.hdrchk(fh, lenient=True, output=output)
276    return ret
277
278def cstyle(root, parent, flist, output):
279    ret = 0
280    output.write("C style:\n")
281    for f in flist(lambda x: x.endswith('.c') or x.endswith('.h')):
282        with io.open(f, mode='rb') as fh:
283            ret |= CStyle.cstyle(fh, output=output, picky=True,
284                             check_posix_types=True,
285                             check_continuation=True)
286    return ret
287
288def jstyle(root, parent, flist, output):
289    ret = 0
290    output.write("Java style:\n")
291    for f in flist(lambda x: x.endswith('.java')):
292        with io.open(f, mode='rb') as fh:
293            ret |= JStyle.jstyle(fh, output=output, picky=True)
294    return ret
295
296def manlint(root, parent, flist, output):
297    ret = 0
298    output.write("Man page format/spelling:\n")
299    ManfileRE = re.compile(r'.*\.[0-9][a-z]*$', re.IGNORECASE)
300    for f in flist(lambda x: ManfileRE.match(x)):
301        with io.open(f, mode='rb') as fh:
302            ret |= ManLint.manlint(fh, output=output, picky=True)
303            ret |= SpellCheck.spellcheck(fh, output=output)
304    return ret
305
306def shelllint(root, parent, flist, output):
307    ret = 0
308    output.write("Shell lint:\n")
309
310    def isshell(x):
311        (_, ext) = os.path.splitext(x)
312        if ext in ['.sh', '.ksh']:
313            return True
314        if ext == '':
315            with io.open(x, mode='r', errors='ignore') as fh:
316                if re.match(r'^#.*\bk?sh\b', fh.readline()):
317                    return True
318        return False
319
320    for f in flist(isshell):
321        with io.open(f, mode='rb') as fh:
322            ret |= ShellLint.lint(fh, output=output)
323
324    return ret
325
326def pkgfmt(root, parent, flist, output):
327    ret = 0
328    output.write("Package manifests:\n")
329
330    for f in flist(lambda x: x.endswith('.p5m')):
331        with io.open(f, mode='rb') as fh:
332            ret |= PkgFmt.check(fh, output=output)
333
334    return ret
335
336def keywords(root, parent, flist, output):
337    ret = 0
338    output.write("SCCS Keywords:\n")
339    for f in flist():
340        with io.open(f, encoding='utf-8', errors='replace') as fh:
341            ret |= Keywords.keywords(fh, output=output)
342    return ret
343
344def wscheck(root, parent, flist, output):
345    ret = 0
346    output.write("white space nits:\n")
347    for f in flist():
348        with io.open(f, encoding='utf-8', errors='replace') as fh:
349            ret |= WsCheck.wscheck(fh, output=output)
350    return ret
351
352def symlinks(root, parent, flist, output):
353    ret = 0
354    output.write("Symbolic links:\n")
355    for f in flist():
356        output.write("  "+f+"\n")
357        ret |= 1
358    return ret
359
360def iswinreserved(name):
361    reserved = [
362        'con', 'prn', 'aux', 'nul',
363        'com1', 'com2', 'com3', 'com4', 'com5',
364        'com6', 'com7', 'com8', 'com9', 'com0',
365        'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5',
366        'lpt6', 'lpt7', 'lpt8', 'lpt9', 'lpt0' ]
367    l = name.lower()
368    for r in reserved:
369        if l == r or l.startswith(r+"."):
370            return True
371    return False
372
373def haswinspecial(name):
374    specials = '<>:"\\|?*'
375    for c in name:
376        if c in specials:
377            return True
378    return False
379
380def winnames(root, parent, flist, output):
381    ret = 0
382    output.write("Illegal filenames (Windows):\n")
383    for f in flist():
384        if haswinspecial(f):
385            output.write("  "+f+": invalid character in name\n")
386            ret |= 1
387            continue
388
389        parts = f.split('/')
390        for p in parts:
391            if iswinreserved(p):
392                output.write("  "+f+": reserved file name\n")
393                ret |= 1
394                break
395
396    return ret
397
398def run_checks(root, parent, cmds, scmds, paths='', opts={}):
399    """Run the checks given in 'cmds', expected to have well-known signatures,
400    and report results for any which fail.
401
402    Return failure if any of them did.
403
404    NB: the function name of the commands passed in is used to name the NOT
405    file which excepts files from them."""
406
407    ret = 0
408
409    for cmd in cmds:
410        s = StringIO()
411
412        exclude = not_check(root, cmd.__name__)
413        result = cmd(root, parent, gen_files(root, parent, paths, exclude),
414                     output=s)
415        ret |= result
416
417        if result != 0:
418            print(s.getvalue())
419
420    for cmd in scmds:
421        s = StringIO()
422
423        exclude = not_check(root, cmd.__name__)
424        result = cmd(root, parent, gen_links(root, parent, paths, exclude),
425                     output=s)
426        ret |= result
427
428        if result != 0:
429            print(s.getvalue())
430
431    return ret
432
433def nits(root, parent, paths):
434    cmds = [copyright,
435            cstyle,
436            hdrchk,
437            jstyle,
438            keywords,
439            manlint,
440            mapfilechk,
441            shelllint,
442            pkgfmt,
443            winnames,
444            wscheck]
445    scmds = [symlinks]
446    run_checks(root, parent, cmds, scmds, paths)
447
448def pbchk(root, parent, paths):
449    cmds = [comchk,
450            copyright,
451            cstyle,
452            hdrchk,
453            jstyle,
454            keywords,
455            manlint,
456            mapfilechk,
457            shelllint,
458            pkgfmt,
459            winnames,
460            wscheck]
461    scmds = [symlinks]
462    run_checks(root, parent, cmds, scmds)
463
464def main(cmd, args):
465    parent_branch = None
466    checkname = None
467
468    try:
469        opts, args = getopt.getopt(args, 'b:c:p:')
470    except getopt.GetoptError as e:
471        sys.stderr.write(str(e) + '\n')
472        sys.stderr.write("Usage: %s [-c check] [-p branch] [path...]\n" % cmd)
473        sys.exit(1)
474
475    for opt, arg in opts:
476        # We accept "-b" as an alias of "-p" for backwards compatibility.
477        if opt == '-p' or opt == '-b':
478            parent_branch = arg
479        elif opt == '-c':
480            checkname = arg
481
482    if not parent_branch:
483        parent_branch = git_parent_branch(git_branch())
484
485    if checkname is None:
486        if cmd == 'git-pbchk':
487            checkname = 'pbchk'
488        else:
489            checkname = 'nits'
490
491    if checkname == 'pbchk':
492        if args:
493            sys.stderr.write("only complete workspaces may be pbchk'd\n");
494            sys.exit(1)
495        pbchk(git_root(), parent_branch, None)
496    elif checkname == 'nits':
497        nits(git_root(), parent_branch, args)
498    else:
499        run_checks(git_root(), parent_branch, [eval(checkname)], args)
500
501if __name__ == '__main__':
502    try:
503        main(os.path.basename(sys.argv[0]), sys.argv[1:])
504    except GitError as e:
505        sys.stderr.write("failed to run git:\n %s\n" % str(e))
506        sys.exit(1)
507