xref: /illumos-gate/usr/src/tools/scripts/git-pbchk.py (revision 3b3c8f601c5530d4928a71e238c573c623491c34)
1#!@TOOLS_PYTHON@ -Es
2#
3#  This program is free software; you can redistribute it and/or modify
4#  it under the terms of the GNU General Public License version 2
5#  as published by the Free Software Foundation.
6#
7#  This program is distributed in the hope that it will be useful,
8#  but WITHOUT ANY WARRANTY; without even the implied warranty of
9#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10#  GNU General Public License for more details.
11#
12#  You should have received a copy of the GNU General Public License
13#  along with this program; if not, write to the Free Software
14#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15#
16
17#
18# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
19# Copyright 2008, 2012 Richard Lowe
20# Copyright 2019 Garrett D'Amore <garrett@damore.org>
21# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
22# Copyright 2016 Nexenta Systems, Inc.
23# Copyright (c) 2019, Joyent, Inc.
24# Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
25#
26
27from __future__ import print_function
28
29import getopt
30import io
31import os
32import re
33import subprocess
34import sys
35import tempfile
36
37if sys.version_info[0] < 3:
38    from cStringIO import StringIO
39else:
40    from io import StringIO
41
42#
43# Adjust the load path based on our location and the version of python into
44# which it is being loaded.  This assumes the normal onbld directory
45# structure, where we are in bin/ and the modules are in
46# lib/python(version)?/onbld/Scm/.  If that changes so too must this.
47#
48sys.path.insert(1, os.path.join(os.path.dirname(__file__), "..", "lib",
49                                "python%d.%d" % sys.version_info[:2]))
50
51#
52# Add the relative path to usr/src/tools to the load path, such that when run
53# from the source tree we use the modules also within the source tree.
54#
55sys.path.insert(2, os.path.join(os.path.dirname(__file__), ".."))
56
57from onbld.Scm import Ignore
58from onbld.Checks import Comments, Copyright, CStyle, HdrChk, WsCheck
59from onbld.Checks import JStyle, Keywords, ManLint, Mapfile, SpellCheck
60from onbld.Checks import ShellLint
61
62class GitError(Exception):
63    pass
64
65def git(command):
66    """Run a command and return a stream containing its stdout (and write its
67    stderr to its stdout)"""
68
69    if type(command) != list:
70        command = command.split()
71
72    command = ["git"] + command
73
74    try:
75        tmpfile = tempfile.TemporaryFile(prefix="git-nits", mode="w+b")
76    except EnvironmentError as e:
77        raise GitError("Could not create temporary file: %s\n" % e)
78
79    try:
80        p = subprocess.Popen(command,
81                             stdout=tmpfile,
82                             stderr=subprocess.PIPE)
83    except OSError as e:
84        raise GitError("could not execute %s: %s\n" % (command, e))
85
86    err = p.wait()
87    if err != 0:
88        raise GitError(p.stderr.read())
89
90    tmpfile.seek(0)
91    lines = []
92    for l in tmpfile:
93        lines.append(l.decode('utf-8', 'replace'))
94    return lines
95
96def git_root():
97    """Return the root of the current git workspace"""
98
99    p = git('rev-parse --git-dir')
100    dir = p[0]
101
102    return os.path.abspath(os.path.join(dir, os.path.pardir))
103
104def git_branch():
105    """Return the current git branch"""
106
107    p = git('branch')
108
109    for elt in p:
110        if elt[0] == '*':
111            if elt.endswith('(no branch)'):
112                return None
113            return elt.split()[1]
114
115def git_parent_branch(branch):
116    """Return the parent of the current git branch.
117
118    If this branch tracks a remote branch, return the remote branch which is
119    tracked.  If not, default to origin/master."""
120
121    if not branch:
122        return None
123
124    p = git(["for-each-ref", "--format=%(refname:short) %(upstream:short)",
125            "refs/heads/"])
126
127    if not p:
128        sys.stderr.write("Failed finding git parent branch\n")
129        sys.exit(1)
130
131    for line in p:
132        # Git 1.7 will leave a ' ' trailing any non-tracking branch
133        if ' ' in line and not line.endswith(' \n'):
134            local, remote = line.split()
135            if local == branch:
136                return remote
137    return 'origin/master'
138
139def git_comments(parent):
140    """Return a list of any checkin comments on this git branch"""
141
142    p = git('log --pretty=tformat:%%B:SEP: %s..' % parent)
143
144    if not p:
145        sys.stderr.write("No outgoing changesets found - missing -p option?\n");
146        sys.exit(1)
147
148    return [x.strip() for x in p if x != ':SEP:\n']
149
150def git_file_list(parent, paths=None):
151    """Return the set of files which have ever changed on this branch.
152
153    NB: This includes files which no longer exist, or no longer actually
154    differ."""
155
156    p = git("log --name-only --pretty=format: %s.. %s" %
157             (parent, ' '.join(paths)))
158
159    if not p:
160        sys.stderr.write("Failed building file-list from git\n")
161        sys.exit(1)
162
163    ret = set()
164    for fname in p:
165        if fname and not fname.isspace() and fname not in ret:
166            ret.add(fname.strip())
167
168    return ret
169
170def not_check(root, cmd):
171    """Return a function which returns True if a file given as an argument
172    should be excluded from the check named by 'cmd'"""
173
174    ignorefiles = list(filter(os.path.exists,
175                         [os.path.join(root, ".git", "%s.NOT" % cmd),
176                          os.path.join(root, "exception_lists", cmd)]))
177    return Ignore.ignore(root, ignorefiles)
178
179def gen_files(root, parent, paths, exclude, filter=None):
180    """Return a function producing file names, relative to the current
181    directory, of any file changed on this branch (limited to 'paths' if
182    requested), and excluding files for which exclude returns a true value """
183
184    if filter is None:
185        filter = lambda x: os.path.isfile(x)
186
187    # Taken entirely from Python 2.6's os.path.relpath which we would use if we
188    # could.
189    def relpath(path, here):
190        c = os.path.abspath(os.path.join(root, path)).split(os.path.sep)
191        s = os.path.abspath(here).split(os.path.sep)
192        l = len(os.path.commonprefix((s, c)))
193        return os.path.join(*[os.path.pardir] * (len(s)-l) + c[l:])
194
195    def ret(select=None):
196        if not select:
197            select = lambda x: True
198
199        for abspath in git_file_list(parent, paths):
200            path = relpath(abspath, '.')
201            try:
202                res = git("diff %s HEAD %s" % (parent, path))
203            except GitError as e:
204                # This ignores all the errors that can be thrown. Usually, this
205                # means that git returned non-zero because the file doesn't
206                # exist, but it could also fail if git can't create a new file
207                # or it can't be executed.  Such errors are 1) unlikely, and 2)
208                # will be caught by other invocations of git().
209                continue
210            empty = not res
211            if (filter(path) and not empty and
212                select(path) and not exclude(abspath)):
213                yield path
214    return ret
215
216def gen_links(root, parent, paths, exclude):
217    """Return a function producing symbolic link names, relative to the current
218    directory, of any file changed on this branch (limited to 'paths' if
219    requested), and excluding files for which exclude returns a true value """
220
221    return gen_files(root, parent, paths, exclude, lambda x: os.path.islink(x))
222
223def comchk(root, parent, flist, output):
224    output.write("Comments:\n")
225
226    return Comments.comchk(git_comments(parent), check_db=True,
227                           output=output)
228
229
230def mapfilechk(root, parent, flist, output):
231    ret = 0
232
233    # We are interested in examining any file that has the following
234    # in its final path segment:
235    #    - Contains the word 'mapfile'
236    #    - Begins with 'map.'
237    #    - Ends with '.map'
238    # We don't want to match unless these things occur in final path segment
239    # because directory names with these strings don't indicate a mapfile.
240    # We also ignore files with suffixes that tell us that the files
241    # are not mapfiles.
242    MapfileRE = re.compile(r'.*((mapfile[^/]*)|(/map\.+[^/]*)|(\.map))$',
243        re.IGNORECASE)
244    NotMapSuffixRE = re.compile(r'.*\.[ch]$', re.IGNORECASE)
245
246    output.write("Mapfile comments:\n")
247
248    for f in flist(lambda x: MapfileRE.match(x) and not
249                   NotMapSuffixRE.match(x)):
250        with io.open(f, encoding='utf-8', errors='replace') as fh:
251            ret |= Mapfile.mapfilechk(fh, output=output)
252    return ret
253
254def copyright(root, parent, flist, output):
255    ret = 0
256    output.write("Copyrights:\n")
257    for f in flist():
258        with io.open(f, encoding='utf-8', errors='replace') as fh:
259            ret |= Copyright.copyright(fh, output=output)
260    return ret
261
262def hdrchk(root, parent, flist, output):
263    ret = 0
264    output.write("Header format:\n")
265    for f in flist(lambda x: x.endswith('.h')):
266        with io.open(f, encoding='utf-8', errors='replace') as fh:
267            ret |= HdrChk.hdrchk(fh, lenient=True, output=output)
268    return ret
269
270def cstyle(root, parent, flist, output):
271    ret = 0
272    output.write("C style:\n")
273    for f in flist(lambda x: x.endswith('.c') or x.endswith('.h')):
274        with io.open(f, mode='rb') as fh:
275            ret |= CStyle.cstyle(fh, output=output, picky=True,
276                             check_posix_types=True,
277                             check_continuation=True)
278    return ret
279
280def jstyle(root, parent, flist, output):
281    ret = 0
282    output.write("Java style:\n")
283    for f in flist(lambda x: x.endswith('.java')):
284        with io.open(f, mode='rb') as fh:
285            ret |= JStyle.jstyle(fh, output=output, picky=True)
286    return ret
287
288def manlint(root, parent, flist, output):
289    ret = 0
290    output.write("Man page format/spelling:\n")
291    ManfileRE = re.compile(r'.*\.[0-9][a-z]*$', re.IGNORECASE)
292    for f in flist(lambda x: ManfileRE.match(x)):
293        with io.open(f, mode='rb') as fh:
294            ret |= ManLint.manlint(fh, output=output, picky=True)
295            ret |= SpellCheck.spellcheck(fh, output=output)
296    return ret
297
298def shelllint(root, parent, flist, output):
299    ret = 0
300    output.write("Shell lint:\n")
301
302    def isshell(x):
303        (_, ext) = os.path.splitext(x)
304        if ext in ['.sh', '.ksh']:
305            return True
306        if ext == '':
307            with io.open(x, mode='r', errors='ignore') as fh:
308                if re.match(r'^#.*\bk?sh\b', fh.readline()):
309                    return True
310        return False
311
312    for f in flist(isshell):
313        with io.open(f, mode='rb') as fh:
314            ret |= ShellLint.lint(fh, output=output)
315
316    return ret
317
318def keywords(root, parent, flist, output):
319    ret = 0
320    output.write("SCCS Keywords:\n")
321    for f in flist():
322        with io.open(f, encoding='utf-8', errors='replace') as fh:
323            ret |= Keywords.keywords(fh, output=output)
324    return ret
325
326def wscheck(root, parent, flist, output):
327    ret = 0
328    output.write("white space nits:\n")
329    for f in flist():
330        with io.open(f, encoding='utf-8', errors='replace') as fh:
331            ret |= WsCheck.wscheck(fh, output=output)
332    return ret
333
334def symlinks(root, parent, flist, output):
335    ret = 0
336    output.write("Symbolic links:\n")
337    for f in flist():
338        output.write("  "+f+"\n")
339        ret |= 1
340    return ret
341
342def iswinreserved(name):
343    reserved = [
344        'con', 'prn', 'aux', 'nul',
345        'com1', 'com2', 'com3', 'com4', 'com5',
346        'com6', 'com7', 'com8', 'com9', 'com0',
347        'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5',
348        'lpt6', 'lpt7', 'lpt8', 'lpt9', 'lpt0' ]
349    l = name.lower()
350    for r in reserved:
351        if l == r or l.startswith(r+"."):
352            return True
353    return False
354
355def haswinspecial(name):
356    specials = '<>:"\\|?*'
357    for c in name:
358        if c in specials:
359            return True
360    return False
361
362def winnames(root, parent, flist, output):
363    ret = 0
364    output.write("Illegal filenames (Windows):\n")
365    for f in flist():
366        if haswinspecial(f):
367            output.write("  "+f+": invalid character in name\n")
368            ret |= 1
369            continue
370
371        parts = f.split('/')
372        for p in parts:
373            if iswinreserved(p):
374                output.write("  "+f+": reserved file name\n")
375                ret |= 1
376                break
377
378    return ret
379
380def run_checks(root, parent, cmds, scmds, paths='', opts={}):
381    """Run the checks given in 'cmds', expected to have well-known signatures,
382    and report results for any which fail.
383
384    Return failure if any of them did.
385
386    NB: the function name of the commands passed in is used to name the NOT
387    file which excepts files from them."""
388
389    ret = 0
390
391    for cmd in cmds:
392        s = StringIO()
393
394        exclude = not_check(root, cmd.__name__)
395        result = cmd(root, parent, gen_files(root, parent, paths, exclude),
396                     output=s)
397        ret |= result
398
399        if result != 0:
400            print(s.getvalue())
401
402    for cmd in scmds:
403        s = StringIO()
404
405        exclude = not_check(root, cmd.__name__)
406        result = cmd(root, parent, gen_links(root, parent, paths, exclude),
407                     output=s)
408        ret |= result
409
410        if result != 0:
411            print(s.getvalue())
412
413    return ret
414
415def nits(root, parent, paths):
416    cmds = [copyright,
417            cstyle,
418            hdrchk,
419            jstyle,
420            keywords,
421            manlint,
422            mapfilechk,
423            shelllint,
424            winnames,
425            wscheck]
426    scmds = [symlinks]
427    run_checks(root, parent, cmds, scmds, paths)
428
429def pbchk(root, parent, paths):
430    cmds = [comchk,
431            copyright,
432            cstyle,
433            hdrchk,
434            jstyle,
435            keywords,
436            manlint,
437            mapfilechk,
438            shelllint,
439            winnames,
440            wscheck]
441    scmds = [symlinks]
442    run_checks(root, parent, cmds, scmds)
443
444def main(cmd, args):
445    parent_branch = None
446    checkname = None
447
448    try:
449        opts, args = getopt.getopt(args, 'b:c:p:')
450    except getopt.GetoptError as e:
451        sys.stderr.write(str(e) + '\n')
452        sys.stderr.write("Usage: %s [-c check] [-p branch] [path...]\n" % cmd)
453        sys.exit(1)
454
455    for opt, arg in opts:
456        # We accept "-b" as an alias of "-p" for backwards compatibility.
457        if opt == '-p' or opt == '-b':
458            parent_branch = arg
459        elif opt == '-c':
460            checkname = arg
461
462    if not parent_branch:
463        parent_branch = git_parent_branch(git_branch())
464
465    if checkname is None:
466        if cmd == 'git-pbchk':
467            checkname = 'pbchk'
468        else:
469            checkname = 'nits'
470
471    if checkname == 'pbchk':
472        if args:
473            sys.stderr.write("only complete workspaces may be pbchk'd\n");
474            sys.exit(1)
475        pbchk(git_root(), parent_branch, None)
476    elif checkname == 'nits':
477        nits(git_root(), parent_branch, args)
478    else:
479        run_checks(git_root(), parent_branch, [eval(checkname)], args)
480
481if __name__ == '__main__':
482    try:
483        main(os.path.basename(sys.argv[0]), sys.argv[1:])
484    except GitError as e:
485        sys.stderr.write("failed to run git:\n %s\n" % str(e))
486        sys.exit(1)
487