xref: /illumos-gate/usr/src/tools/scripts/git-pbchk.py (revision e03bdbe7dd7f6bad6cde6093a1bae7c429f4fec4)
1#!@TOOLS_PYTHON@ -Es
2#
3#  This program is free software; you can redistribute it and/or modify
4#  it under the terms of the GNU General Public License version 2
5#  as published by the Free Software Foundation.
6#
7#  This program is distributed in the hope that it will be useful,
8#  but WITHOUT ANY WARRANTY; without even the implied warranty of
9#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10#  GNU General Public License for more details.
11#
12#  You should have received a copy of the GNU General Public License
13#  along with this program; if not, write to the Free Software
14#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15#
16
17#
18# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
19# Copyright 2008, 2012 Richard Lowe
20# Copyright 2019 Garrett D'Amore <garrett@damore.org>
21# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
22# Copyright 2016 Nexenta Systems, Inc.
23# Copyright (c) 2019, Joyent, Inc.
24# Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
25# Copyright 2024 Bill Sommerfeld
26#
27
28from __future__ import print_function
29
30import getopt
31import io
32import os
33import re
34import subprocess
35import sys
36import tempfile
37
38if sys.version_info[0] < 3:
39    from cStringIO import StringIO
40else:
41    from io import StringIO
42
43#
44# Adjust the load path based on our location and the version of python into
45# which it is being loaded.  This assumes the normal onbld directory
46# structure, where we are in bin/ and the modules are in
47# lib/python(version)?/onbld/Scm/.  If that changes so too must this.
48#
49sys.path.insert(1, os.path.join(os.path.dirname(__file__), "..", "lib",
50                                "python%d.%d" % sys.version_info[:2]))
51
52#
53# Add the relative path to usr/src/tools to the load path, such that when run
54# from the source tree we use the modules also within the source tree.
55#
56sys.path.insert(2, os.path.join(os.path.dirname(__file__), ".."))
57
58from onbld.Scm import Ignore
59from onbld.Checks import Comments, Copyright, CStyle, HdrChk, WsCheck
60from onbld.Checks import JStyle, Keywords, ManLint, Mapfile, SpellCheck
61from onbld.Checks import ShellLint, PkgFmt
62
63class GitError(Exception):
64    pass
65
66def git(command):
67    """Run a command and return a stream containing its stdout (and write its
68    stderr to its stdout)"""
69
70    if type(command) != list:
71        command = command.split()
72
73    command = ["git"] + command
74
75    try:
76        tmpfile = tempfile.TemporaryFile(prefix="git-nits", mode="w+b")
77    except EnvironmentError as e:
78        raise GitError("Could not create temporary file: %s\n" % e)
79
80    try:
81        p = subprocess.Popen(command,
82                             stdout=tmpfile,
83                             stderr=subprocess.PIPE)
84    except OSError as e:
85        raise GitError("could not execute %s: %s\n" % (command, e))
86
87    err = p.wait()
88    if err != 0:
89        raise GitError(p.stderr.read())
90
91    tmpfile.seek(0)
92    lines = []
93    for l in tmpfile:
94        lines.append(l.decode('utf-8', 'replace'))
95    return lines
96
97def git_root():
98    """Return the root of the current git workspace"""
99
100    p = git('rev-parse --show-toplevel')
101    dir = p[0].strip()
102
103    return os.path.abspath(dir)
104
105def git_branch():
106    """Return the current git branch"""
107
108    p = git('branch')
109
110    for elt in p:
111        if elt[0] == '*':
112            if elt.endswith('(no branch)'):
113                return None
114            return elt.split()[1]
115
116def git_parent_branch(branch):
117    """Return the parent of the current git branch.
118
119    If this branch tracks a remote branch, return the remote branch which is
120    tracked.  If not, default to origin/master."""
121
122    if not branch:
123        return None
124
125    p = git(["for-each-ref", "--format=%(refname:short) %(upstream:short)",
126            "refs/heads/"])
127
128    if not p:
129        sys.stderr.write("Failed finding git parent branch\n")
130        sys.exit(1)
131
132    for line in p:
133        # Git 1.7 will leave a ' ' trailing any non-tracking branch
134        if ' ' in line and not line.endswith(' \n'):
135            local, remote = line.split()
136            if local == branch:
137                return remote
138    return 'origin/master'
139
140def git_comments(parent):
141    """Return a list of any checkin comments on this git branch"""
142
143    p = git('log --pretty=tformat:%%B:SEP: %s..' % parent)
144
145    if not p:
146        sys.stderr.write("No outgoing changesets found - missing -p option?\n");
147        sys.exit(1)
148
149    return [x.strip() for x in p if x != ':SEP:\n']
150
151def git_file_list(parent, paths=None):
152    """Return the set of files which have ever changed on this branch.
153
154    NB: This includes files which no longer exist, or no longer actually
155    differ."""
156
157    p = git("log --name-only --pretty=format: %s.. %s" %
158             (parent, ' '.join(paths)))
159
160    if not p:
161        sys.stderr.write("Failed building file-list from git\n")
162        sys.exit(1)
163
164    ret = set()
165    for fname in p:
166        if fname and not fname.isspace() and fname not in ret:
167            ret.add(fname.strip())
168
169    return ret
170
171def not_check(root, cmd):
172    """Return a function which returns True if a file given as an argument
173    should be excluded from the check named by 'cmd'"""
174
175    ignorefiles = list(filter(os.path.exists,
176                         [os.path.join(root, ".git/info", "%s.NOT" % cmd),
177                          os.path.join(root, "exception_lists", cmd)]))
178    return Ignore.ignore(root, ignorefiles)
179
180def gen_files(root, parent, paths, exclude, filter=None):
181    """Return a function producing file names, relative to the current
182    directory, of any file changed on this branch (limited to 'paths' if
183    requested), and excluding files for which exclude returns a true value """
184
185    if filter is None:
186        filter = lambda x: os.path.isfile(x)
187
188    def ret(select=None):
189        if not select:
190            select = lambda x: True
191
192        for abspath in git_file_list(parent, paths):
193            path = os.path.relpath(os.path.join(root, abspath), '.')
194            try:
195                res = git("diff %s HEAD %s" % (parent, path))
196            except GitError as e:
197                # This ignores all the errors that can be thrown. Usually, this
198                # means that git returned non-zero because the file doesn't
199                # exist, but it could also fail if git can't create a new file
200                # or it can't be executed.  Such errors are 1) unlikely, and 2)
201                # will be caught by other invocations of git().
202                continue
203            empty = not res
204            if (filter(path) and not empty and
205                select(path) and not exclude(abspath)):
206                yield path
207    return ret
208
209def gen_links(root, parent, paths, exclude):
210    """Return a function producing symbolic link names, relative to the current
211    directory, of any file changed on this branch (limited to 'paths' if
212    requested), and excluding files for which exclude returns a true value """
213
214    return gen_files(root, parent, paths, exclude, lambda x: os.path.islink(x))
215
216def comchk(root, parent, flist, output):
217    output.write("Comments:\n")
218
219    comments = git_comments(parent)
220    if len(comments) > 2:
221        if re.match('^Change-Id: I[0-9a-f]+', comments[-1]):
222            if comments[-2] == '':
223                print('Note: Gerrit Change Id present in comments')
224                comments = comments[:-2]
225
226    return Comments.comchk(comments, check_db=True,
227                           output=output)
228
229
230def mapfilechk(root, parent, flist, output):
231    ret = 0
232
233    # We are interested in examining any file that has the following
234    # in its final path segment:
235    #    - Contains the word 'mapfile'
236    #    - Begins with 'map.'
237    #    - Ends with '.map'
238    # We don't want to match unless these things occur in final path segment
239    # because directory names with these strings don't indicate a mapfile.
240    # We also ignore files with suffixes that tell us that the files
241    # are not mapfiles.
242    MapfileRE = re.compile(r'.*((mapfile[^/]*)|(/map\.+[^/]*)|(\.map))$',
243        re.IGNORECASE)
244    NotMapSuffixRE = re.compile(r'.*\.[ch]$', re.IGNORECASE)
245
246    output.write("Mapfile comments:\n")
247
248    for f in flist(lambda x: MapfileRE.match(x) and not
249                   NotMapSuffixRE.match(x)):
250        with io.open(f, encoding='utf-8', errors='replace') as fh:
251            ret |= Mapfile.mapfilechk(fh, output=output)
252    return ret
253
254def copyright(root, parent, flist, output):
255    ret = 0
256    output.write("Copyrights:\n")
257    for f in flist():
258        with io.open(f, encoding='utf-8', errors='replace') as fh:
259            ret |= Copyright.copyright(fh, output=output)
260    return ret
261
262def hdrchk(root, parent, flist, output):
263    ret = 0
264    output.write("Header format:\n")
265    for f in flist(lambda x: x.endswith('.h')):
266        with io.open(f, encoding='utf-8', errors='replace') as fh:
267            ret |= HdrChk.hdrchk(fh, lenient=True, output=output)
268    return ret
269
270def cstyle(root, parent, flist, output):
271    ret = 0
272    output.write("C style:\n")
273    for f in flist(lambda x: x.endswith('.c') or x.endswith('.h')):
274        with io.open(f, mode='rb') as fh:
275            ret |= CStyle.cstyle(fh, output=output, picky=True,
276                             check_posix_types=True,
277                             check_continuation=True)
278    return ret
279
280def jstyle(root, parent, flist, output):
281    ret = 0
282    output.write("Java style:\n")
283    for f in flist(lambda x: x.endswith('.java')):
284        with io.open(f, mode='rb') as fh:
285            ret |= JStyle.jstyle(fh, output=output, picky=True)
286    return ret
287
288def manlint(root, parent, flist, output):
289    ret = 0
290    output.write("Man page format/spelling:\n")
291    ManfileRE = re.compile(r'.*\.[0-9][a-z]*$', re.IGNORECASE)
292    for f in flist(lambda x: ManfileRE.match(x)):
293        with io.open(f, mode='rb') as fh:
294            ret |= ManLint.manlint(fh, output=output, picky=True)
295            ret |= SpellCheck.spellcheck(fh, output=output)
296    return ret
297
298def shelllint(root, parent, flist, output):
299    ret = 0
300    output.write("Shell lint:\n")
301
302    def isshell(x):
303        (_, ext) = os.path.splitext(x)
304        if ext in ['.sh', '.ksh']:
305            return True
306        if ext == '':
307            with io.open(x, mode='r', errors='ignore') as fh:
308                if re.match(r'^#.*\bk?sh\b', fh.readline()):
309                    return True
310        return False
311
312    for f in flist(isshell):
313        with io.open(f, mode='rb') as fh:
314            ret |= ShellLint.lint(fh, output=output)
315
316    return ret
317
318def pkgfmt(root, parent, flist, output):
319    ret = 0
320    output.write("Package manifests:\n")
321
322    for f in flist(lambda x: x.endswith('.p5m')):
323        with io.open(f, mode='rb') as fh:
324            ret |= PkgFmt.check(fh, output=output)
325
326    return ret
327
328def keywords(root, parent, flist, output):
329    ret = 0
330    output.write("SCCS Keywords:\n")
331    for f in flist():
332        with io.open(f, encoding='utf-8', errors='replace') as fh:
333            ret |= Keywords.keywords(fh, output=output)
334    return ret
335
336def wscheck(root, parent, flist, output):
337    ret = 0
338    output.write("white space nits:\n")
339    for f in flist():
340        with io.open(f, encoding='utf-8', errors='replace') as fh:
341            ret |= WsCheck.wscheck(fh, output=output)
342    return ret
343
344def symlinks(root, parent, flist, output):
345    ret = 0
346    output.write("Symbolic links:\n")
347    for f in flist():
348        output.write("  "+f+"\n")
349        ret |= 1
350    return ret
351
352def iswinreserved(name):
353    reserved = [
354        'con', 'prn', 'aux', 'nul',
355        'com1', 'com2', 'com3', 'com4', 'com5',
356        'com6', 'com7', 'com8', 'com9', 'com0',
357        'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5',
358        'lpt6', 'lpt7', 'lpt8', 'lpt9', 'lpt0' ]
359    l = name.lower()
360    for r in reserved:
361        if l == r or l.startswith(r+"."):
362            return True
363    return False
364
365def haswinspecial(name):
366    specials = '<>:"\\|?*'
367    for c in name:
368        if c in specials:
369            return True
370    return False
371
372def winnames(root, parent, flist, output):
373    ret = 0
374    output.write("Illegal filenames (Windows):\n")
375    for f in flist():
376        if haswinspecial(f):
377            output.write("  "+f+": invalid character in name\n")
378            ret |= 1
379            continue
380
381        parts = f.split('/')
382        for p in parts:
383            if iswinreserved(p):
384                output.write("  "+f+": reserved file name\n")
385                ret |= 1
386                break
387
388    return ret
389
390def run_checks(root, parent, cmds, scmds, paths='', opts={}):
391    """Run the checks given in 'cmds', expected to have well-known signatures,
392    and report results for any which fail.
393
394    Return failure if any of them did.
395
396    NB: the function name of the commands passed in is used to name the NOT
397    file which excepts files from them."""
398
399    ret = 0
400
401    for cmd in cmds:
402        s = StringIO()
403
404        exclude = not_check(root, cmd.__name__)
405        result = cmd(root, parent, gen_files(root, parent, paths, exclude),
406                     output=s)
407        ret |= result
408
409        if result != 0:
410            print(s.getvalue())
411
412    for cmd in scmds:
413        s = StringIO()
414
415        exclude = not_check(root, cmd.__name__)
416        result = cmd(root, parent, gen_links(root, parent, paths, exclude),
417                     output=s)
418        ret |= result
419
420        if result != 0:
421            print(s.getvalue())
422
423    return ret
424
425def nits(root, parent, paths):
426    cmds = [copyright,
427            cstyle,
428            hdrchk,
429            jstyle,
430            keywords,
431            manlint,
432            mapfilechk,
433            shelllint,
434            pkgfmt,
435            winnames,
436            wscheck]
437    scmds = [symlinks]
438    run_checks(root, parent, cmds, scmds, paths)
439
440def pbchk(root, parent, paths):
441    cmds = [comchk,
442            copyright,
443            cstyle,
444            hdrchk,
445            jstyle,
446            keywords,
447            manlint,
448            mapfilechk,
449            shelllint,
450            pkgfmt,
451            winnames,
452            wscheck]
453    scmds = [symlinks]
454    run_checks(root, parent, cmds, scmds)
455
456def main(cmd, args):
457    parent_branch = None
458    checkname = None
459
460    try:
461        opts, args = getopt.getopt(args, 'b:c:p:')
462    except getopt.GetoptError as e:
463        sys.stderr.write(str(e) + '\n')
464        sys.stderr.write("Usage: %s [-c check] [-p branch] [path...]\n" % cmd)
465        sys.exit(1)
466
467    for opt, arg in opts:
468        # We accept "-b" as an alias of "-p" for backwards compatibility.
469        if opt == '-p' or opt == '-b':
470            parent_branch = arg
471        elif opt == '-c':
472            checkname = arg
473
474    if not parent_branch:
475        parent_branch = git_parent_branch(git_branch())
476
477    if checkname is None:
478        if cmd == 'git-pbchk':
479            checkname = 'pbchk'
480        else:
481            checkname = 'nits'
482
483    if checkname == 'pbchk':
484        if args:
485            sys.stderr.write("only complete workspaces may be pbchk'd\n");
486            sys.exit(1)
487        pbchk(git_root(), parent_branch, None)
488    elif checkname == 'nits':
489        nits(git_root(), parent_branch, args)
490    else:
491        run_checks(git_root(), parent_branch, [eval(checkname)], args)
492
493if __name__ == '__main__':
494    try:
495        main(os.path.basename(sys.argv[0]), sys.argv[1:])
496    except GitError as e:
497        sys.stderr.write("failed to run git:\n %s\n" % str(e))
498        sys.exit(1)
499