xref: /illumos-gate/usr/src/tools/scripts/git-pbchk.py (revision 4d8d108f42a089b7b4441353f2ad7a75e1c7b31d)
1#!@TOOLS_PYTHON@ -Es
2#
3#  This program is free software; you can redistribute it and/or modify
4#  it under the terms of the GNU General Public License version 2
5#  as published by the Free Software Foundation.
6#
7#  This program is distributed in the hope that it will be useful,
8#  but WITHOUT ANY WARRANTY; without even the implied warranty of
9#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10#  GNU General Public License for more details.
11#
12#  You should have received a copy of the GNU General Public License
13#  along with this program; if not, write to the Free Software
14#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15#
16
17#
18# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
19# Copyright 2008, 2012 Richard Lowe
20# Copyright 2019 Garrett D'Amore <garrett@damore.org>
21# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
22# Copyright 2016 Nexenta Systems, Inc.
23# Copyright (c) 2019, Joyent, Inc.
24# Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
25# Copyright 2024 Bill Sommerfeld
26#
27
28from __future__ import print_function
29
30import getopt
31import io
32import os
33import re
34import subprocess
35import sys
36import tempfile
37import textwrap
38
39if sys.version_info[0] < 3:
40    from cStringIO import StringIO
41else:
42    from io import StringIO
43
44#
45# Adjust the load path based on our location and the version of python into
46# which it is being loaded.  This assumes the normal onbld directory
47# structure, where we are in bin/ and the modules are in
48# lib/python(version)?/onbld/Scm/.  If that changes so too must this.
49#
50sys.path.insert(1, os.path.join(os.path.dirname(__file__), "..", "lib",
51                                "python%d.%d" % sys.version_info[:2]))
52
53#
54# Add the relative path to usr/src/tools to the load path, such that when run
55# from the source tree we use the modules also within the source tree.
56#
57sys.path.insert(2, os.path.join(os.path.dirname(__file__), ".."))
58
59from onbld.Scm import Ignore
60from onbld.Checks import Comments, Copyright, CStyle, HdrChk, WsCheck
61from onbld.Checks import JStyle, Keywords, ManLint, Mapfile, SpellCheck
62from onbld.Checks import ShellLint, PkgFmt
63
64class GitError(Exception):
65    pass
66
67def git(command):
68    """Run a command and return a stream containing its stdout (and write its
69    stderr to its stdout)"""
70
71    if type(command) != list:
72        command = command.split()
73
74    command = ["git"] + command
75
76    try:
77        tmpfile = tempfile.TemporaryFile(prefix="git-nits", mode="w+b")
78    except EnvironmentError as e:
79        raise GitError("Could not create temporary file: %s\n" % e)
80
81    try:
82        p = subprocess.Popen(command,
83                             stdout=tmpfile,
84                             stderr=subprocess.PIPE)
85    except OSError as e:
86        raise GitError("could not execute %s: %s\n" % (command, e))
87
88    err = p.wait()
89    if err != 0:
90        raise GitError(p.stderr.read())
91
92    tmpfile.seek(0)
93    lines = []
94    for l in tmpfile:
95        lines.append(l.decode('utf-8', 'replace'))
96    return lines
97
98def git_root():
99    """Return the root of the current git workspace"""
100
101    p = git('rev-parse --show-toplevel')
102    dir = p[0].strip()
103
104    return os.path.abspath(dir)
105
106def git_branch():
107    """Return the current git branch"""
108
109    p = git('branch')
110
111    for elt in p:
112        if elt[0] == '*':
113            if elt.endswith('(no branch)'):
114                return None
115            return elt.split()[1]
116
117def git_parent_branch(branch):
118    """Return the parent of the current git branch.
119
120    If this branch tracks a remote branch, return the remote branch which is
121    tracked.  If not, default to origin/master."""
122
123    if not branch:
124        return None
125
126    p = git(["for-each-ref", "--format=%(refname:short) %(upstream:short)",
127            "refs/heads/"])
128
129    if not p:
130        sys.stderr.write("Failed finding git parent branch\n")
131        sys.exit(1)
132
133    for line in p:
134        # Git 1.7 will leave a ' ' trailing any non-tracking branch
135        if ' ' in line and not line.endswith(' \n'):
136            local, remote = line.split()
137            if local == branch:
138                return remote
139    return 'origin/master'
140
141def slices(strlist, sep):
142    """Yield start & end of each commit within the list of comments"""
143    low = 0
144    for i, v in enumerate(strlist):
145        if v == sep:
146            yield(low, i)
147            low = i+1
148
149    if low != len(strlist):
150        yield(low, len(strlist))
151
152def git_comments(parent):
153    """Return the checkin comments for each commit on this git branch,
154    structured as a list of lists of lines."""
155
156    p = git('log --pretty=tformat:%%B:SEP: %s..' % parent)
157
158    if not p:
159        sys.stderr.write("No outgoing changesets found - missing -p option?\n");
160        sys.exit(1)
161
162    return [ [line.strip() for line in p[a:b]]
163             for (a, b) in slices(p, ':SEP:\n')]
164
165def git_file_list(parent, paths=None):
166    """Return the set of files which have ever changed on this branch.
167
168    NB: This includes files which no longer exist, or no longer actually
169    differ."""
170
171    p = git("log --name-only --pretty=format: %s.. %s" %
172             (parent, ' '.join(paths)))
173
174    if not p:
175        sys.stderr.write("Failed building file-list from git\n")
176        sys.exit(1)
177
178    ret = set()
179    for fname in p:
180        fname = fname.strip()
181        if fname and not fname.isspace():
182            ret.add(fname)
183
184    return sorted(ret)
185
186def not_check(root, cmd):
187    """Return a function which returns True if a file given as an argument
188    should be excluded from the check named by 'cmd'"""
189
190    ignorefiles = list(filter(os.path.exists,
191                         [os.path.join(root, ".git/info", "%s.NOT" % cmd),
192                          os.path.join(root, "exception_lists", cmd)]))
193    return Ignore.ignore(root, ignorefiles)
194
195def gen_files(root, parent, paths, exclude, filter=None):
196    """Return a function producing file names, relative to the current
197    directory, of any file changed on this branch (limited to 'paths' if
198    requested), and excluding files for which exclude returns a true value """
199
200    if filter is None:
201        filter = lambda x: os.path.isfile(x)
202
203    def ret(select=None):
204        if not select:
205            select = lambda x: True
206
207        for abspath in git_file_list(parent, paths):
208            path = os.path.relpath(os.path.join(root, abspath), '.')
209            try:
210                res = git("diff %s HEAD %s" % (parent, path))
211            except GitError as e:
212                # This ignores all the errors that can be thrown. Usually, this
213                # means that git returned non-zero because the file doesn't
214                # exist, but it could also fail if git can't create a new file
215                # or it can't be executed.  Such errors are 1) unlikely, and 2)
216                # will be caught by other invocations of git().
217                continue
218            empty = not res
219            if (filter(path) and not empty and
220                select(path) and not exclude(abspath)):
221                yield path
222    return ret
223
224def gen_links(root, parent, paths, exclude):
225    """Return a function producing symbolic link names, relative to the current
226    directory, of any file changed on this branch (limited to 'paths' if
227    requested), and excluding files for which exclude returns a true value """
228
229    return gen_files(root, parent, paths, exclude, lambda x: os.path.islink(x))
230
231def gen_none(root, parent, paths, exclude):
232    """ Return a function returning the empty list """
233    return lambda x: []
234
235# The list of possible checks.   Each is recorded as two-function pair; the
236# first is the actual checker, and the second is the generator which creates
237# the list of things that the checker works on.
238
239checks = {}
240nits_checks = []
241all_checks = []
242
243def add_check(fn, gen):
244    """ Define a checker and add it to the appropriate lists """
245    name = fn.__name__
246    if fn.__doc__ is None:
247        raise ValueError('Check function lacks a documentation string',
248                         name)
249    checks[name] = (fn, gen)
250    all_checks.append(name)
251    if gen != gen_none:
252        nits_checks.append(name)
253    return fn
254
255def filechecker(fn):
256    """ Decorator which identifies a function as being a file-checker """
257    return add_check(fn, gen_files)
258
259def linkchecker(fn):
260    """ Decorator which identifies a function as being a symlink-checker """
261    return add_check(fn, gen_links)
262
263def wschecker(fn):
264    """ Decorator which identifies a function as being a workspace checker """
265    return add_check(fn, gen_none)
266
267@wschecker
268def comchk(root, parent, flist, output):
269    "Check that putback comments follow the prescribed format"
270    output.write("Comments:\n")
271
272    comments = git_comments(parent)
273    multi = len(comments) > 1
274    state = {}
275
276    ret = 0
277    for commit in comments:
278
279        s = StringIO()
280
281        result = Comments.comchk(commit, check_db=True,
282                                 output=s, bugs=state)
283        ret |= result
284
285        if result != 0:
286            if multi:
287                output.write('\n%s\n' % commit[0])
288            output.write(s.getvalue())
289
290    return ret
291
292@filechecker
293def copyright(root, parent, flist, output):
294    """Check that each source file contains a copyright notice for the current
295year. You don't need to fix this if you, the potential new copyright holder,
296chooses not to."""
297    ret = 0
298    output.write("Copyrights:\n")
299    for f in flist():
300        with io.open(f, encoding='utf-8', errors='replace') as fh:
301            ret |= Copyright.copyright(fh, output=output)
302    return ret
303
304@filechecker
305def cstyle(root, parent, flist, output):
306    "Check that C source files conform to the illumos C style rules"
307    ret = 0
308    output.write("C style:\n")
309    for f in flist(lambda x: x.endswith('.c') or x.endswith('.h')):
310        with io.open(f, mode='rb') as fh:
311            ret |= CStyle.cstyle(fh, output=output, picky=True,
312                             check_posix_types=True,
313                             check_continuation=True)
314    return ret
315
316@filechecker
317def hdrchk(root, parent, flist, output):
318    "Check that C header files conform to the illumos header style rules"
319    ret = 0
320    output.write("Header format:\n")
321    for f in flist(lambda x: x.endswith('.h')):
322        with io.open(f, encoding='utf-8', errors='replace') as fh:
323            ret |= HdrChk.hdrchk(fh, lenient=True, output=output)
324    return ret
325
326@filechecker
327def jstyle(root, parent, flist, output):
328    """Check that Java source files conform to the illumos Java style rules
329(which differ from the traditionally recommended Java style)"""
330
331    ret = 0
332    output.write("Java style:\n")
333    for f in flist(lambda x: x.endswith('.java')):
334        with io.open(f, mode='rb') as fh:
335            ret |= JStyle.jstyle(fh, output=output, picky=True)
336    return ret
337
338@filechecker
339def keywords(root, parent, flist, output):
340    """Check that no source files contain unexpanded SCCS keywords.
341It is possible that this check may false positive on certain inputs.
342It is generally obvious when this is the case.
343
344This check does not check for expanded SCCS keywords, though the common
345'ident'-style lines should be removed regardless of whether they are
346expanded."""
347
348    ret = 0
349    output.write("SCCS Keywords:\n")
350    for f in flist():
351        with io.open(f, encoding='utf-8', errors='replace') as fh:
352            ret |= Keywords.keywords(fh, output=output)
353    return ret
354
355@filechecker
356def manlint(root, parent, flist, output):
357    "Check for problems with man pages."
358
359    ret = 0
360    output.write("Man page format/spelling:\n")
361    ManfileRE = re.compile(r'.*\.[0-9][a-z]*$', re.IGNORECASE)
362    for f in flist(lambda x: ManfileRE.match(x)):
363        with io.open(f, mode='rb') as fh:
364            ret |= ManLint.manlint(fh, output=output, picky=True)
365            ret |= SpellCheck.spellcheck(fh, output=output)
366    return ret
367
368@filechecker
369def mapfilechk(root, parent, flist, output):
370    """Check that linker mapfiles contain a comment directing anyone
371editing to read the directions in usr/lib/README.mapfiles."""
372
373    ret = 0
374    # We are interested in examining any file that has the following
375    # in its final path segment:
376    #    - Contains the word 'mapfile'
377    #    - Begins with 'map.'
378    #    - Ends with '.map'
379    # We don't want to match unless these things occur in final path segment
380    # because directory names with these strings don't indicate a mapfile.
381    # We also ignore files with suffixes that tell us that the files
382    # are not mapfiles.
383    MapfileRE = re.compile(r'.*((mapfile[^/]*)|(/map\.+[^/]*)|(\.map))$',
384        re.IGNORECASE)
385    NotMapSuffixRE = re.compile(r'.*\.[ch]$', re.IGNORECASE)
386
387    output.write("Mapfile comments:\n")
388
389    for f in flist(lambda x: MapfileRE.match(x) and not
390                   NotMapSuffixRE.match(x)):
391        with io.open(f, encoding='utf-8', errors='replace') as fh:
392            ret |= Mapfile.mapfilechk(fh, output=output)
393    return ret
394
395@filechecker
396def shelllint(root, parent, flist, output):
397    """Check shell scripts for common errors."""
398    ret = 0
399    output.write("Shell lint:\n")
400
401    def isshell(x):
402        (_, ext) = os.path.splitext(x)
403        if ext in ['.sh', '.ksh']:
404            return True
405        if ext == '':
406            with io.open(x, mode='r', errors='ignore') as fh:
407                if re.match(r'^#.*\bk?sh\b', fh.readline()):
408                    return True
409        return False
410
411    for f in flist(isshell):
412        with io.open(f, mode='rb') as fh:
413            ret |= ShellLint.lint(fh, output=output)
414
415    return ret
416
417@filechecker
418def pkgfmt(root, parent, flist, output):
419    """Check package manifests for common errors."""
420    ret = 0
421    output.write("Package manifests:\n")
422
423    for f in flist(lambda x: x.endswith('.p5m')):
424        with io.open(f, mode='rb') as fh:
425            ret |= PkgFmt.check(fh, output=output)
426
427    return ret
428
429def iswinreserved(name):
430    reserved = [
431        'con', 'prn', 'aux', 'nul',
432        'com1', 'com2', 'com3', 'com4', 'com5',
433        'com6', 'com7', 'com8', 'com9', 'com0',
434        'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5',
435        'lpt6', 'lpt7', 'lpt8', 'lpt9', 'lpt0' ]
436    l = name.lower()
437    for r in reserved:
438        if l == r or l.startswith(r+"."):
439            return True
440    return False
441
442def haswinspecial(name):
443    specials = '<>:"\\|?*'
444    for c in name:
445        if c in specials:
446            return True
447    return False
448
449@filechecker
450def winnames(root, parent, flist, output):
451    "Check for filenames which can't be used in a Windows filesystem."
452    ret = 0
453    output.write("Illegal filenames (Windows):\n")
454    for f in flist():
455        if haswinspecial(f):
456            output.write("  "+f+": invalid character in name\n")
457            ret |= 1
458            continue
459
460        parts = f.split('/')
461        for p in parts:
462            if iswinreserved(p):
463                output.write("  "+f+": reserved file name\n")
464                ret |= 1
465                break
466
467    return ret
468
469@filechecker
470def wscheck(root, parent, flist, output):
471    "Check for whitespace issues such as mixed tabs/spaces in source files."
472    ret = 0
473    output.write("white space nits:\n")
474    for f in flist():
475        with io.open(f, encoding='utf-8', errors='replace') as fh:
476            ret |= WsCheck.wscheck(fh, output=output)
477    return ret
478
479@linkchecker
480def symlinks(root, parent, flist, output):
481    "Check for committed symlinks (there shouldn't be any)."
482    ret = 0
483    output.write("Symbolic links:\n")
484    for f in flist():
485        output.write("  "+f+"\n")
486        ret |= 1
487    return ret
488
489def run_checks(root, parent, checklist, paths=''):
490    """Run the checks named in 'checklist',
491    and report results for any which fail.
492
493    Return failure if any of them did.
494
495    NB: the check names also name the NOT
496    file which excepts files from them."""
497
498    ret = 0
499
500    for check in checklist:
501        (cmd, gen) = checks[check]
502
503        s = StringIO()
504
505        exclude = not_check(root, check)
506        result = cmd(root, parent, gen(root, parent, paths, exclude),
507                     output=s)
508        ret |= result
509
510        if result != 0:
511            print(s.getvalue())
512
513    return ret
514
515def print_checks():
516
517    for c in all_checks:
518        print(textwrap.fill(
519            "%-11s %s" % (c, checks[c][0].__doc__),
520            width=78,
521            subsequent_indent=' '*12), '\n')
522
523def main(cmd, args):
524    parent_branch = None
525
526    checklist = []
527
528    try:
529        opts, args = getopt.getopt(args, 'lb:c:p:')
530    except getopt.GetoptError as e:
531        sys.stderr.write(str(e) + '\n')
532        sys.stderr.write("Usage: %s [-l] [-c check] [-p branch] [path...]\n"
533                         % cmd)
534        sys.exit(1)
535
536    for opt, arg in opts:
537        if opt == '-l':
538            print_checks()
539            sys.exit(0)
540        # We accept "-b" as an alias of "-p" for backwards compatibility.
541        elif opt == '-p' or opt == '-b':
542            parent_branch = arg
543        elif opt == '-c':
544            if arg not in checks:
545                sys.stderr.write("Unknown check '%s'\n" % arg)
546                sys.exit(1)
547            checklist.append(arg)
548
549    if not parent_branch:
550        parent_branch = git_parent_branch(git_branch())
551
552    if len(checklist) == 0:
553        if cmd == 'git-pbchk':
554            if args:
555                sys.stderr.write("only complete workspaces may be pbchk'd\n");
556                sys.exit(1)
557            checklist = all_checks
558        else:
559            checklist = nits_checks
560
561    run_checks(git_root(), parent_branch, checklist, args)
562
563if __name__ == '__main__':
564    try:
565        main(os.path.basename(sys.argv[0]), sys.argv[1:])
566    except GitError as e:
567        sys.stderr.write("failed to run git:\n %s\n" % str(e))
568        sys.exit(1)
569