xref: /illumos-gate/usr/src/tools/scripts/git-pbchk.py (revision d48be21240dfd051b689384ce2b23479d757f2d8)
1#!@TOOLS_PYTHON@ -Es
2#
3#  This program is free software; you can redistribute it and/or modify
4#  it under the terms of the GNU General Public License version 2
5#  as published by the Free Software Foundation.
6#
7#  This program is distributed in the hope that it will be useful,
8#  but WITHOUT ANY WARRANTY; without even the implied warranty of
9#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10#  GNU General Public License for more details.
11#
12#  You should have received a copy of the GNU General Public License
13#  along with this program; if not, write to the Free Software
14#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15#
16
17#
18# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
19# Copyright 2008, 2012 Richard Lowe
20# Copyright 2019 Garrett D'Amore <garrett@damore.org>
21# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
22# Copyright 2016 Nexenta Systems, Inc.
23# Copyright (c) 2019, Joyent, Inc.
24# Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
25#
26
27from __future__ import print_function
28
29import getopt
30import io
31import os
32import re
33import subprocess
34import sys
35import tempfile
36
37if sys.version_info[0] < 3:
38    from cStringIO import StringIO
39else:
40    from io import StringIO
41
42#
43# Adjust the load path based on our location and the version of python into
44# which it is being loaded.  This assumes the normal onbld directory
45# structure, where we are in bin/ and the modules are in
46# lib/python(version)?/onbld/Scm/.  If that changes so too must this.
47#
48sys.path.insert(1, os.path.join(os.path.dirname(__file__), "..", "lib",
49                                "python%d.%d" % sys.version_info[:2]))
50
51#
52# Add the relative path to usr/src/tools to the load path, such that when run
53# from the source tree we use the modules also within the source tree.
54#
55sys.path.insert(2, os.path.join(os.path.dirname(__file__), ".."))
56
57from onbld.Scm import Ignore
58from onbld.Checks import Comments, Copyright, CStyle, HdrChk, WsCheck
59from onbld.Checks import JStyle, Keywords, ManLint, Mapfile, SpellCheck
60from onbld.Checks import ShellLint, PkgFmt
61
62class GitError(Exception):
63    pass
64
65def git(command):
66    """Run a command and return a stream containing its stdout (and write its
67    stderr to its stdout)"""
68
69    if type(command) != list:
70        command = command.split()
71
72    command = ["git"] + command
73
74    try:
75        tmpfile = tempfile.TemporaryFile(prefix="git-nits", mode="w+b")
76    except EnvironmentError as e:
77        raise GitError("Could not create temporary file: %s\n" % e)
78
79    try:
80        p = subprocess.Popen(command,
81                             stdout=tmpfile,
82                             stderr=subprocess.PIPE)
83    except OSError as e:
84        raise GitError("could not execute %s: %s\n" % (command, e))
85
86    err = p.wait()
87    if err != 0:
88        raise GitError(p.stderr.read())
89
90    tmpfile.seek(0)
91    lines = []
92    for l in tmpfile:
93        lines.append(l.decode('utf-8', 'replace'))
94    return lines
95
96def git_root():
97    """Return the root of the current git workspace"""
98
99    p = git('rev-parse --git-dir')
100    dir = p[0]
101
102    return os.path.abspath(os.path.join(dir, os.path.pardir))
103
104def git_branch():
105    """Return the current git branch"""
106
107    p = git('branch')
108
109    for elt in p:
110        if elt[0] == '*':
111            if elt.endswith('(no branch)'):
112                return None
113            return elt.split()[1]
114
115def git_parent_branch(branch):
116    """Return the parent of the current git branch.
117
118    If this branch tracks a remote branch, return the remote branch which is
119    tracked.  If not, default to origin/master."""
120
121    if not branch:
122        return None
123
124    p = git(["for-each-ref", "--format=%(refname:short) %(upstream:short)",
125            "refs/heads/"])
126
127    if not p:
128        sys.stderr.write("Failed finding git parent branch\n")
129        sys.exit(1)
130
131    for line in p:
132        # Git 1.7 will leave a ' ' trailing any non-tracking branch
133        if ' ' in line and not line.endswith(' \n'):
134            local, remote = line.split()
135            if local == branch:
136                return remote
137    return 'origin/master'
138
139def git_comments(parent):
140    """Return a list of any checkin comments on this git branch"""
141
142    p = git('log --pretty=tformat:%%B:SEP: %s..' % parent)
143
144    if not p:
145        sys.stderr.write("No outgoing changesets found - missing -p option?\n");
146        sys.exit(1)
147
148    return [x.strip() for x in p if x != ':SEP:\n']
149
150def git_file_list(parent, paths=None):
151    """Return the set of files which have ever changed on this branch.
152
153    NB: This includes files which no longer exist, or no longer actually
154    differ."""
155
156    p = git("log --name-only --pretty=format: %s.. %s" %
157             (parent, ' '.join(paths)))
158
159    if not p:
160        sys.stderr.write("Failed building file-list from git\n")
161        sys.exit(1)
162
163    ret = set()
164    for fname in p:
165        if fname and not fname.isspace() and fname not in ret:
166            ret.add(fname.strip())
167
168    return ret
169
170def not_check(root, cmd):
171    """Return a function which returns True if a file given as an argument
172    should be excluded from the check named by 'cmd'"""
173
174    ignorefiles = list(filter(os.path.exists,
175                         [os.path.join(root, ".git/info", "%s.NOT" % cmd),
176                          os.path.join(root, "exception_lists", cmd)]))
177    return Ignore.ignore(root, ignorefiles)
178
179def gen_files(root, parent, paths, exclude, filter=None):
180    """Return a function producing file names, relative to the current
181    directory, of any file changed on this branch (limited to 'paths' if
182    requested), and excluding files for which exclude returns a true value """
183
184    if filter is None:
185        filter = lambda x: os.path.isfile(x)
186
187    # Taken entirely from Python 2.6's os.path.relpath which we would use if we
188    # could.
189    def relpath(path, here):
190        c = os.path.abspath(os.path.join(root, path)).split(os.path.sep)
191        s = os.path.abspath(here).split(os.path.sep)
192        l = len(os.path.commonprefix((s, c)))
193        return os.path.join(*[os.path.pardir] * (len(s)-l) + c[l:])
194
195    def ret(select=None):
196        if not select:
197            select = lambda x: True
198
199        for abspath in git_file_list(parent, paths):
200            path = relpath(abspath, '.')
201            try:
202                res = git("diff %s HEAD %s" % (parent, path))
203            except GitError as e:
204                # This ignores all the errors that can be thrown. Usually, this
205                # means that git returned non-zero because the file doesn't
206                # exist, but it could also fail if git can't create a new file
207                # or it can't be executed.  Such errors are 1) unlikely, and 2)
208                # will be caught by other invocations of git().
209                continue
210            empty = not res
211            if (filter(path) and not empty and
212                select(path) and not exclude(abspath)):
213                yield path
214    return ret
215
216def gen_links(root, parent, paths, exclude):
217    """Return a function producing symbolic link names, relative to the current
218    directory, of any file changed on this branch (limited to 'paths' if
219    requested), and excluding files for which exclude returns a true value """
220
221    return gen_files(root, parent, paths, exclude, lambda x: os.path.islink(x))
222
223def comchk(root, parent, flist, output):
224    output.write("Comments:\n")
225
226    return Comments.comchk(git_comments(parent), check_db=True,
227                           output=output)
228
229
230def mapfilechk(root, parent, flist, output):
231    ret = 0
232
233    # We are interested in examining any file that has the following
234    # in its final path segment:
235    #    - Contains the word 'mapfile'
236    #    - Begins with 'map.'
237    #    - Ends with '.map'
238    # We don't want to match unless these things occur in final path segment
239    # because directory names with these strings don't indicate a mapfile.
240    # We also ignore files with suffixes that tell us that the files
241    # are not mapfiles.
242    MapfileRE = re.compile(r'.*((mapfile[^/]*)|(/map\.+[^/]*)|(\.map))$',
243        re.IGNORECASE)
244    NotMapSuffixRE = re.compile(r'.*\.[ch]$', re.IGNORECASE)
245
246    output.write("Mapfile comments:\n")
247
248    for f in flist(lambda x: MapfileRE.match(x) and not
249                   NotMapSuffixRE.match(x)):
250        with io.open(f, encoding='utf-8', errors='replace') as fh:
251            ret |= Mapfile.mapfilechk(fh, output=output)
252    return ret
253
254def copyright(root, parent, flist, output):
255    ret = 0
256    output.write("Copyrights:\n")
257    for f in flist():
258        with io.open(f, encoding='utf-8', errors='replace') as fh:
259            ret |= Copyright.copyright(fh, output=output)
260    return ret
261
262def hdrchk(root, parent, flist, output):
263    ret = 0
264    output.write("Header format:\n")
265    for f in flist(lambda x: x.endswith('.h')):
266        with io.open(f, encoding='utf-8', errors='replace') as fh:
267            ret |= HdrChk.hdrchk(fh, lenient=True, output=output)
268    return ret
269
270def cstyle(root, parent, flist, output):
271    ret = 0
272    output.write("C style:\n")
273    for f in flist(lambda x: x.endswith('.c') or x.endswith('.h')):
274        with io.open(f, mode='rb') as fh:
275            ret |= CStyle.cstyle(fh, output=output, picky=True,
276                             check_posix_types=True,
277                             check_continuation=True)
278    return ret
279
280def jstyle(root, parent, flist, output):
281    ret = 0
282    output.write("Java style:\n")
283    for f in flist(lambda x: x.endswith('.java')):
284        with io.open(f, mode='rb') as fh:
285            ret |= JStyle.jstyle(fh, output=output, picky=True)
286    return ret
287
288def manlint(root, parent, flist, output):
289    ret = 0
290    output.write("Man page format/spelling:\n")
291    ManfileRE = re.compile(r'.*\.[0-9][a-z]*$', re.IGNORECASE)
292    for f in flist(lambda x: ManfileRE.match(x)):
293        with io.open(f, mode='rb') as fh:
294            ret |= ManLint.manlint(fh, output=output, picky=True)
295            ret |= SpellCheck.spellcheck(fh, output=output)
296    return ret
297
298def shelllint(root, parent, flist, output):
299    ret = 0
300    output.write("Shell lint:\n")
301
302    def isshell(x):
303        (_, ext) = os.path.splitext(x)
304        if ext in ['.sh', '.ksh']:
305            return True
306        if ext == '':
307            with io.open(x, mode='r', errors='ignore') as fh:
308                if re.match(r'^#.*\bk?sh\b', fh.readline()):
309                    return True
310        return False
311
312    for f in flist(isshell):
313        with io.open(f, mode='rb') as fh:
314            ret |= ShellLint.lint(fh, output=output)
315
316    return ret
317
318def pkgfmt(root, parent, flist, output):
319    ret = 0
320    output.write("Package manifests:\n")
321
322    for f in flist(lambda x: x.endswith('.p5m')):
323        with io.open(f, mode='rb') as fh:
324            ret |= PkgFmt.check(fh, output=output)
325
326    return ret
327
328def keywords(root, parent, flist, output):
329    ret = 0
330    output.write("SCCS Keywords:\n")
331    for f in flist():
332        with io.open(f, encoding='utf-8', errors='replace') as fh:
333            ret |= Keywords.keywords(fh, output=output)
334    return ret
335
336def wscheck(root, parent, flist, output):
337    ret = 0
338    output.write("white space nits:\n")
339    for f in flist():
340        with io.open(f, encoding='utf-8', errors='replace') as fh:
341            ret |= WsCheck.wscheck(fh, output=output)
342    return ret
343
344def symlinks(root, parent, flist, output):
345    ret = 0
346    output.write("Symbolic links:\n")
347    for f in flist():
348        output.write("  "+f+"\n")
349        ret |= 1
350    return ret
351
352def iswinreserved(name):
353    reserved = [
354        'con', 'prn', 'aux', 'nul',
355        'com1', 'com2', 'com3', 'com4', 'com5',
356        'com6', 'com7', 'com8', 'com9', 'com0',
357        'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5',
358        'lpt6', 'lpt7', 'lpt8', 'lpt9', 'lpt0' ]
359    l = name.lower()
360    for r in reserved:
361        if l == r or l.startswith(r+"."):
362            return True
363    return False
364
365def haswinspecial(name):
366    specials = '<>:"\\|?*'
367    for c in name:
368        if c in specials:
369            return True
370    return False
371
372def winnames(root, parent, flist, output):
373    ret = 0
374    output.write("Illegal filenames (Windows):\n")
375    for f in flist():
376        if haswinspecial(f):
377            output.write("  "+f+": invalid character in name\n")
378            ret |= 1
379            continue
380
381        parts = f.split('/')
382        for p in parts:
383            if iswinreserved(p):
384                output.write("  "+f+": reserved file name\n")
385                ret |= 1
386                break
387
388    return ret
389
390def run_checks(root, parent, cmds, scmds, paths='', opts={}):
391    """Run the checks given in 'cmds', expected to have well-known signatures,
392    and report results for any which fail.
393
394    Return failure if any of them did.
395
396    NB: the function name of the commands passed in is used to name the NOT
397    file which excepts files from them."""
398
399    ret = 0
400
401    for cmd in cmds:
402        s = StringIO()
403
404        exclude = not_check(root, cmd.__name__)
405        result = cmd(root, parent, gen_files(root, parent, paths, exclude),
406                     output=s)
407        ret |= result
408
409        if result != 0:
410            print(s.getvalue())
411
412    for cmd in scmds:
413        s = StringIO()
414
415        exclude = not_check(root, cmd.__name__)
416        result = cmd(root, parent, gen_links(root, parent, paths, exclude),
417                     output=s)
418        ret |= result
419
420        if result != 0:
421            print(s.getvalue())
422
423    return ret
424
425def nits(root, parent, paths):
426    cmds = [copyright,
427            cstyle,
428            hdrchk,
429            jstyle,
430            keywords,
431            manlint,
432            mapfilechk,
433            shelllint,
434            pkgfmt,
435            winnames,
436            wscheck]
437    scmds = [symlinks]
438    run_checks(root, parent, cmds, scmds, paths)
439
440def pbchk(root, parent, paths):
441    cmds = [comchk,
442            copyright,
443            cstyle,
444            hdrchk,
445            jstyle,
446            keywords,
447            manlint,
448            mapfilechk,
449            shelllint,
450            pkgfmt,
451            winnames,
452            wscheck]
453    scmds = [symlinks]
454    run_checks(root, parent, cmds, scmds)
455
456def main(cmd, args):
457    parent_branch = None
458    checkname = None
459
460    try:
461        opts, args = getopt.getopt(args, 'b:c:p:')
462    except getopt.GetoptError as e:
463        sys.stderr.write(str(e) + '\n')
464        sys.stderr.write("Usage: %s [-c check] [-p branch] [path...]\n" % cmd)
465        sys.exit(1)
466
467    for opt, arg in opts:
468        # We accept "-b" as an alias of "-p" for backwards compatibility.
469        if opt == '-p' or opt == '-b':
470            parent_branch = arg
471        elif opt == '-c':
472            checkname = arg
473
474    if not parent_branch:
475        parent_branch = git_parent_branch(git_branch())
476
477    if checkname is None:
478        if cmd == 'git-pbchk':
479            checkname = 'pbchk'
480        else:
481            checkname = 'nits'
482
483    if checkname == 'pbchk':
484        if args:
485            sys.stderr.write("only complete workspaces may be pbchk'd\n");
486            sys.exit(1)
487        pbchk(git_root(), parent_branch, None)
488    elif checkname == 'nits':
489        nits(git_root(), parent_branch, args)
490    else:
491        run_checks(git_root(), parent_branch, [eval(checkname)], args)
492
493if __name__ == '__main__':
494    try:
495        main(os.path.basename(sys.argv[0]), sys.argv[1:])
496    except GitError as e:
497        sys.stderr.write("failed to run git:\n %s\n" % str(e))
498        sys.exit(1)
499