xref: /illumos-gate/usr/src/tools/scripts/git-pbchk.py (revision 4b9db4f6425b1a08fca4390f446072c4a6aae8d5)
1#!@TOOLS_PYTHON@ -Es
2#
3#  This program is free software; you can redistribute it and/or modify
4#  it under the terms of the GNU General Public License version 2
5#  as published by the Free Software Foundation.
6#
7#  This program is distributed in the hope that it will be useful,
8#  but WITHOUT ANY WARRANTY; without even the implied warranty of
9#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10#  GNU General Public License for more details.
11#
12#  You should have received a copy of the GNU General Public License
13#  along with this program; if not, write to the Free Software
14#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15#
16
17#
18# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
19# Copyright 2008, 2012 Richard Lowe
20# Copyright 2019 Garrett D'Amore <garrett@damore.org>
21# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
22# Copyright 2016 Nexenta Systems, Inc.
23# Copyright (c) 2019, Joyent, Inc.
24# Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
25# Copyright 2024 Bill Sommerfeld
26# Copyright 2024 Oxide Computer Company
27#
28
29from __future__ import print_function
30
31import getopt
32import io
33import os
34import re
35import subprocess
36import sys
37import tempfile
38import textwrap
39
40if sys.version_info[0] < 3:
41    from cStringIO import StringIO
42else:
43    from io import StringIO
44
45#
46# Adjust the load path based on our location and the version of python into
47# which it is being loaded.  This assumes the normal onbld directory
48# structure, where we are in bin/ and the modules are in
49# lib/python(version)?/onbld/Scm/.  If that changes so too must this.
50#
51sys.path.insert(1, os.path.join(os.path.dirname(__file__), "..", "lib",
52                                "python%d.%d" % sys.version_info[:2]))
53
54#
55# Add the relative path to usr/src/tools to the load path, such that when run
56# from the source tree we use the modules also within the source tree.
57#
58sys.path.insert(2, os.path.join(os.path.dirname(__file__), ".."))
59
60from onbld.Scm import Ignore
61from onbld.Checks import Comments, Copyright, CStyle, HdrChk, WsCheck
62from onbld.Checks import JStyle, Keywords, ManLint, Mapfile, SpellCheck
63from onbld.Checks import ShellLint, PkgFmt
64
65class GitError(Exception):
66    pass
67
68def git(command):
69    """Run a command and return a stream containing its stdout (and write its
70    stderr to its stdout)"""
71
72    if type(command) != list:
73        command = command.split()
74
75    command = ["git"] + command
76
77    try:
78        tmpfile = tempfile.TemporaryFile(prefix="git-nits", mode="w+b")
79    except EnvironmentError as e:
80        raise GitError("Could not create temporary file: %s\n" % e)
81
82    try:
83        p = subprocess.Popen(command,
84                             stdout=tmpfile,
85                             stderr=subprocess.PIPE)
86    except OSError as e:
87        raise GitError("could not execute %s: %s\n" % (command, e))
88
89    err = p.wait()
90    if err != 0:
91        raise GitError(p.stderr.read())
92
93    tmpfile.seek(0)
94    lines = []
95    for l in tmpfile:
96        lines.append(l.decode('utf-8', 'replace'))
97    return lines
98
99def git_root():
100    """Return the root of the current git workspace"""
101
102    p = git('rev-parse --show-toplevel')
103    dir = p[0].strip()
104
105    return os.path.abspath(dir)
106
107def git_branch():
108    """Return the current git branch"""
109
110    p = git('branch')
111
112    for elt in p:
113        if elt[0] == '*':
114            if elt.endswith('(no branch)'):
115                return None
116            return elt.split()[1]
117
118def git_parent_branch(branch):
119    """Return the parent of the current git branch.
120
121    If this branch tracks a remote branch, return the remote branch which is
122    tracked.  If not, default to origin/master."""
123
124    if not branch:
125        return None
126
127    p = git(["for-each-ref", "--format=%(refname:short) %(upstream:short)",
128            "refs/heads/"])
129
130    if not p:
131        sys.stderr.write("Failed finding git parent branch\n")
132        sys.exit(1)
133
134    for line in p:
135        # Git 1.7 will leave a ' ' trailing any non-tracking branch
136        if ' ' in line and not line.endswith(' \n'):
137            local, remote = line.split()
138            if local == branch:
139                return remote
140    return 'origin/master'
141
142def slices(strlist, sep):
143    """Yield start & end of each commit within the list of comments"""
144    low = 0
145    for i, v in enumerate(strlist):
146        if v == sep:
147            yield(low, i)
148            low = i+1
149
150    if low != len(strlist):
151        yield(low, len(strlist))
152
153def git_comments(parent):
154    """Return the checkin comments for each commit on this git branch,
155    structured as a list of lists of lines."""
156
157    p = git('log --pretty=tformat:%%B:SEP: %s..' % parent)
158
159    if not p:
160        sys.stderr.write("No outgoing changesets found - missing -p option?\n");
161        sys.exit(1)
162
163    return [ [line.strip() for line in p[a:b]]
164             for (a, b) in slices(p, ':SEP:\n')]
165
166def git_file_list(parent, paths=None):
167    """Return the set of files which have ever changed on this branch.
168
169    NB: This includes files which no longer exist, or no longer actually
170    differ."""
171
172    p = git("log --name-only --pretty=format: %s.. %s" %
173             (parent, ' '.join(paths)))
174
175    if not p:
176        sys.stderr.write("Failed building file-list from git\n")
177        sys.exit(1)
178
179    ret = set()
180    for fname in p:
181        fname = fname.strip()
182        if fname and not fname.isspace():
183            ret.add(fname)
184
185    return sorted(ret)
186
187def not_check(root, cmd):
188    """Return a function which returns True if a file given as an argument
189    should be excluded from the check named by 'cmd'"""
190
191    ignorefiles = list(filter(os.path.exists,
192                         [os.path.join(root, ".git/info", "%s.NOT" % cmd),
193                          os.path.join(root, "exception_lists", cmd)]))
194    return Ignore.ignore(root, ignorefiles)
195
196def gen_files(root, parent, paths, exclude, filter=None):
197    """Return a function producing file names, relative to the current
198    directory, of any file changed on this branch (limited to 'paths' if
199    requested), and excluding files for which exclude returns a true value """
200
201    if filter is None:
202        filter = lambda x: os.path.isfile(x)
203
204    def ret(select=None):
205        if not select:
206            select = lambda x: True
207
208        for abspath in git_file_list(parent, paths):
209            path = os.path.relpath(os.path.join(root, abspath), '.')
210            try:
211                res = git("diff %s HEAD %s" % (parent, path))
212            except GitError as e:
213                # This ignores all the errors that can be thrown. Usually, this
214                # means that git returned non-zero because the file doesn't
215                # exist, but it could also fail if git can't create a new file
216                # or it can't be executed.  Such errors are 1) unlikely, and 2)
217                # will be caught by other invocations of git().
218                continue
219            empty = not res
220            if (filter(path) and not empty and
221                select(path) and not exclude(abspath)):
222                yield path
223    return ret
224
225def gen_links(root, parent, paths, exclude):
226    """Return a function producing symbolic link names, relative to the current
227    directory, of any file changed on this branch (limited to 'paths' if
228    requested), and excluding files for which exclude returns a true value """
229
230    return gen_files(root, parent, paths, exclude, lambda x: os.path.islink(x))
231
232def gen_none(root, parent, paths, exclude):
233    """ Return a function returning the empty list """
234    return lambda x: []
235
236# The list of possible checks.   Each is recorded as two-function pair; the
237# first is the actual checker, and the second is the generator which creates
238# the list of things that the checker works on.
239
240checks = {}
241nits_checks = []
242all_checks = []
243
244def add_check(fn, gen):
245    """ Define a checker and add it to the appropriate lists """
246    name = fn.__name__
247    if fn.__doc__ is None:
248        raise ValueError('Check function lacks a documentation string',
249                         name)
250    checks[name] = (fn, gen)
251    all_checks.append(name)
252    if gen != gen_none:
253        nits_checks.append(name)
254    return fn
255
256def filechecker(fn):
257    """ Decorator which identifies a function as being a file-checker """
258    return add_check(fn, gen_files)
259
260def linkchecker(fn):
261    """ Decorator which identifies a function as being a symlink-checker """
262    return add_check(fn, gen_links)
263
264def wschecker(fn):
265    """ Decorator which identifies a function as being a workspace checker """
266    return add_check(fn, gen_none)
267
268@wschecker
269def comchk(root, parent, flist, output):
270    "Check that putback comments follow the prescribed format"
271    output.write("Comments:\n")
272
273    comments = git_comments(parent)
274    multi = len(comments) > 1
275    state = {}
276
277    ret = 0
278    for commit in comments:
279
280        s = StringIO()
281
282        result = Comments.comchk(commit, check_db=True,
283                                 output=s, bugs=state)
284        ret |= result
285
286        if result != 0:
287            if multi:
288                output.write('\n%s\n' % commit[0])
289            output.write(s.getvalue())
290
291    return ret
292
293@filechecker
294def copyright(root, parent, flist, output):
295    """Check that each source file contains a copyright notice for the current
296year. You don't need to fix this if you, the potential new copyright holder,
297chooses not to."""
298    ret = 0
299    output.write("Copyrights:\n")
300    for f in flist():
301        with io.open(f, encoding='utf-8', errors='replace') as fh:
302            ret |= Copyright.copyright(fh, output=output)
303    return ret
304
305@filechecker
306def cstyle(root, parent, flist, output):
307    "Check that C source files conform to the illumos C style rules"
308    ret = 0
309    output.write("C style:\n")
310    for f in flist(lambda x: x.endswith('.c') or x.endswith('.h')):
311        with io.open(f, mode='rb') as fh:
312            ret |= CStyle.cstyle(fh, output=output, picky=True,
313                             check_posix_types=True,
314                             check_continuation=True)
315    return ret
316
317@filechecker
318def hdrchk(root, parent, flist, output):
319    "Check that C header files conform to the illumos header style rules"
320    ret = 0
321    output.write("Header format:\n")
322    for f in flist(lambda x: x.endswith('.h')):
323        with io.open(f, encoding='utf-8', errors='replace') as fh:
324            ret |= HdrChk.hdrchk(fh, lenient=True, output=output)
325    return ret
326
327@filechecker
328def jstyle(root, parent, flist, output):
329    """Check that Java source files conform to the illumos Java style rules
330(which differ from the traditionally recommended Java style)"""
331
332    ret = 0
333    output.write("Java style:\n")
334    for f in flist(lambda x: x.endswith('.java')):
335        with io.open(f, mode='rb') as fh:
336            ret |= JStyle.jstyle(fh, output=output, picky=True)
337    return ret
338
339@filechecker
340def keywords(root, parent, flist, output):
341    """Check that no source files contain unexpanded SCCS keywords.
342It is possible that this check may false positive on certain inputs.
343It is generally obvious when this is the case.
344
345This check does not check for expanded SCCS keywords, though the common
346'ident'-style lines should be removed regardless of whether they are
347expanded."""
348
349    ret = 0
350    output.write("SCCS Keywords:\n")
351    for f in flist():
352        with io.open(f, encoding='utf-8', errors='replace') as fh:
353            ret |= Keywords.keywords(fh, output=output)
354    return ret
355
356@filechecker
357def manlint(root, parent, flist, output):
358    "Check for problems with man pages."
359
360    ret = 0
361    output.write("Man page format/spelling:\n")
362    ManfileRE = re.compile(r'.*\.[0-9][a-z]*$', re.IGNORECASE)
363    for f in flist(lambda x: ManfileRE.match(x)):
364        with io.open(f, mode='rb') as fh:
365            ret |= ManLint.manlint(fh, output=output, picky=True)
366            ret |= SpellCheck.spellcheck(fh, output=output)
367    return ret
368
369@filechecker
370def mapfilechk(root, parent, flist, output):
371    """Check that linker mapfiles contain a comment directing anyone
372editing to read the directions in usr/lib/README.mapfiles."""
373
374    ret = 0
375    # We are interested in examining any file that has the following
376    # in its final path segment:
377    #    - Contains the word 'mapfile'
378    #    - Begins with 'map.'
379    #    - Ends with '.map'
380    # We don't want to match unless these things occur in final path segment
381    # because directory names with these strings don't indicate a mapfile.
382    # We also ignore files with suffixes that tell us that the files
383    # are not mapfiles.
384    MapfileRE = re.compile(r'.*((mapfile[^/]*)|(/map\.+[^/]*)|(\.map))$',
385        re.IGNORECASE)
386    NotMapSuffixRE = re.compile(r'.*\.[ch]$', re.IGNORECASE)
387
388    output.write("Mapfile comments:\n")
389
390    for f in flist(lambda x: MapfileRE.match(x) and not
391                   NotMapSuffixRE.match(x)):
392        with io.open(f, encoding='utf-8', errors='replace') as fh:
393            ret |= Mapfile.mapfilechk(fh, output=output)
394    return ret
395
396@filechecker
397def shelllint(root, parent, flist, output):
398    """Check shell scripts for common errors."""
399    ret = 0
400    output.write("Shell lint:\n")
401
402    def isshell(x):
403        (_, ext) = os.path.splitext(x)
404        if ext in ['.sh', '.ksh']:
405            return True
406        if ext == '':
407            with io.open(x, mode='r', errors='ignore') as fh:
408                if re.match(r'^#.*\bk?sh\b', fh.readline()):
409                    return True
410        return False
411
412    for f in flist(isshell):
413        with io.open(f, mode='rb') as fh:
414            ret |= ShellLint.lint(fh, output=output)
415
416    return ret
417
418@filechecker
419def pkgfmt(root, parent, flist, output):
420    """Check package manifests for common errors."""
421    ret = 0
422    output.write("Package manifests:\n")
423
424    for f in flist(lambda x: x.endswith('.p5m')):
425        with io.open(f, mode='rb') as fh:
426            ret |= PkgFmt.check(fh, output=output)
427
428    return ret
429
430def iswinreserved(name):
431    reserved = [
432        'con', 'prn', 'aux', 'nul',
433        'com1', 'com2', 'com3', 'com4', 'com5',
434        'com6', 'com7', 'com8', 'com9', 'com0',
435        'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5',
436        'lpt6', 'lpt7', 'lpt8', 'lpt9', 'lpt0' ]
437    l = name.lower()
438    for r in reserved:
439        if l == r or l.startswith(r+"."):
440            return True
441    return False
442
443def haswinspecial(name):
444    specials = '<>:"\\|?*'
445    for c in name:
446        if c in specials:
447            return True
448    return False
449
450@filechecker
451def winnames(root, parent, flist, output):
452    "Check for filenames which can't be used in a Windows filesystem."
453    ret = 0
454    output.write("Illegal filenames (Windows):\n")
455    for f in flist():
456        if haswinspecial(f):
457            output.write("  "+f+": invalid character in name\n")
458            ret |= 1
459            continue
460
461        parts = f.split('/')
462        for p in parts:
463            if iswinreserved(p):
464                output.write("  "+f+": reserved file name\n")
465                ret |= 1
466                break
467
468    return ret
469
470@filechecker
471def wscheck(root, parent, flist, output):
472    "Check for whitespace issues such as mixed tabs/spaces in source files."
473    ret = 0
474    output.write("white space nits:\n")
475    for f in flist():
476        with io.open(f, encoding='utf-8', errors='replace') as fh:
477            ret |= WsCheck.wscheck(fh, output=output)
478    return ret
479
480@linkchecker
481def symlinks(root, parent, flist, output):
482    "Check for committed symlinks (there shouldn't be any)."
483    ret = 0
484    output.write("Symbolic links:\n")
485    for f in flist():
486        output.write("  "+f+"\n")
487        ret |= 1
488    return ret
489
490def run_checks(root, parent, checklist, paths=''):
491    """Run the checks named in 'checklist',
492    and report results for any which fail.
493
494    Return failure if any of them did.
495
496    NB: the check names also name the NOT
497    file which excepts files from them."""
498
499    ret = 0
500
501    for check in checklist:
502        (cmd, gen) = checks[check]
503
504        s = StringIO()
505
506        exclude = not_check(root, check)
507        result = cmd(root, parent, gen(root, parent, paths, exclude),
508                     output=s)
509        ret |= result
510
511        if result != 0:
512            print(s.getvalue())
513
514    return ret
515
516def print_checks():
517
518    for c in all_checks:
519        print(textwrap.fill(
520            "%-11s %s" % (c, checks[c][0].__doc__),
521            width=78,
522            subsequent_indent=' '*12), '\n')
523
524def main(cmd, args):
525    parent_branch = None
526    allow_questionable_requests = False
527
528    checklist = []
529
530    try:
531        opts, args = getopt.getopt(args, 'lfb:c:p:')
532    except getopt.GetoptError as e:
533        sys.stderr.write(str(e) + '\n')
534        sys.stderr.write("Usage: %s [-l] [-f] [-c check] [-p branch] "
535                         "[path...]\n" % cmd)
536        sys.exit(1)
537
538    for opt, arg in opts:
539        if opt == '-l':
540            print_checks()
541            sys.exit(0)
542        elif opt == '-f':
543            allow_questionable_requests = True
544        # We accept "-b" as an alias of "-p" for backwards compatibility.
545        elif opt == '-p' or opt == '-b':
546            parent_branch = arg
547        elif opt == '-c':
548            if arg not in checks:
549                sys.stderr.write("Unknown check '%s'\n" % arg)
550                sys.exit(1)
551            checklist.append(arg)
552
553    if not parent_branch:
554        parent_branch = git_parent_branch(git_branch())
555
556    comments = git_comments(parent_branch)
557    if len(comments) > 5 and not allow_questionable_requests:
558        sys.stderr.write("Declining to check history since %s, would be %d "
559                         "commits. Rerun with -f if you really mean to.\n" %
560                         (parent_branch, len(comments)))
561        sys.exit(1)
562
563    if len(checklist) == 0:
564        if cmd == 'git-pbchk':
565            if args:
566                sys.stderr.write("only complete workspaces may be pbchk'd\n");
567                sys.exit(1)
568            checklist = all_checks
569        else:
570            checklist = nits_checks
571
572    run_checks(git_root(), parent_branch, checklist, args)
573
574if __name__ == '__main__':
575    try:
576        main(os.path.basename(sys.argv[0]), sys.argv[1:])
577    except GitError as e:
578        sys.stderr.write("failed to run git:\n %s\n" % str(e))
579        sys.exit(1)
580