#!/usr/bin/python2.6 # # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License (the "License"). # You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at usr/src/OPENSOLARIS.LICENSE. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets "[]" replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # # Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # # Compare the content generated by a build to a set of manifests # describing how that content is to be delivered. # import getopt import os import stat import sys from pkg import actions from pkg import manifest # # Dictionary used to map action names to output format. Each entry is # indexed by action name, and consists of a list of tuples that map # FileInfo class members to output labels. # OUTPUTMAP = { "dir": [ ("group", "group="), ("mode", "mode="), ("owner", "owner="), ("path", "path=") ], "file": [ ("hash", ""), ("group", "group="), ("mode", "mode="), ("owner", "owner="), ("path", "path=") ], "link": [ ("mediator", "mediator="), ("path", "path="), ("target", "target=") ], "hardlink": [ ("path", "path="), ("hardkey", "target=") ], } # Mode checks used to validate safe file and directory permissions ALLMODECHECKS = frozenset(("m", "w", "s", "o")) DEFAULTMODECHECKS = frozenset(("m", "w", "o")) class FileInfo(object): """Base class to represent a file. Subclassed according to whether the file represents an actual filesystem object (RealFileInfo) or an IPS manifest action (ActionInfo). """ def __init__(self): self.path = None self.isdir = False self.target = None self.owner = None self.group = None self.mode = None self.hardkey = None self.hardpaths = set() self.editable = False def name(self): """Return the IPS action name of a FileInfo object. """ if self.isdir: return "dir" if self.target: return "link" if self.hardkey: return "hardlink" return "file" def checkmodes(self, modechecks): """Check for and report on unsafe permissions. Returns a potentially empty list of warning strings. """ w = [] t = self.name() if t in ("link", "hardlink"): return w m = int(self.mode, 8) o = self.owner p = self.path if "s" in modechecks and t == "file": if m & (stat.S_ISUID | stat.S_ISGID): if m & (stat.S_IRGRP | stat.S_IROTH): w.extend(["%s: 0%o: setuid/setgid file should not be " \ "readable by group or other" % (p, m)]) if "o" in modechecks and o != "root" and ((m & stat.S_ISUID) == 0): mu = (m & stat.S_IRWXU) >> 6 mg = (m & stat.S_IRWXG) >> 3 mo = m & stat.S_IRWXO e = self.editable if (((mu & 02) == 0 and (mo & mg & 04) == 04) or (t == "file" and mo & 01 == 1) or (mg, mo) == (mu, mu) or ((t == "file" and not e or t == "dir" and o == "bin") and (mg & 05 == mo & 05)) or (t == "file" and o == "bin" and mu & 01 == 01) or (m & 0105 != 0 and p.startswith("etc/security/dev/"))): w.extend(["%s: owner \"%s\" may be safely " \ "changed to \"root\"" % (p, o)]) if "w" in modechecks and t == "file" and o != "root": uwx = stat.S_IWUSR | stat.S_IXUSR if m & uwx == uwx: w.extend(["%s: non-root-owned executable should not " \ "also be writable by owner." % p]) if ("m" in modechecks and m & (stat.S_IWGRP | stat.S_IWOTH) != 0 and m & stat.S_ISVTX == 0): w.extend(["%s: 0%o: should not be writable by group or other" % (p, m)]) return w def __ne__(self, other): """Compare two FileInfo objects. Note this is the "not equal" comparison, so a return value of False indicates that the objects are functionally equivalent. """ # # Map the objects such that the lhs is always the ActionInfo, # and the rhs is always the RealFileInfo. # # It's only really important that the rhs not be an # ActionInfo; if we're comparing FileInfo the RealFileInfo, it # won't actually matter what we choose. # if isinstance(self, ActionInfo): lhs = self rhs = other else: lhs = other rhs = self # # Because the manifest may legitimately translate a relative # path from the proto area into a different path on the installed # system, we don't compare paths here. We only expect this comparison # to be invoked on items with identical relative paths in # first place. # # # All comparisons depend on type. For symlink and directory, they # must be the same. For file and hardlink, see below. # typelhs = lhs.name() typerhs = rhs.name() if typelhs in ("link", "dir"): if typelhs != typerhs: return True # # For symlinks, all that's left is the link target. # For mediated symlinks targets can differ. # if typelhs == "link": return (lhs.mediator is None) and (lhs.target != rhs.target) # # For a directory, it's important that both be directories, # the modes be identical, and the paths are identical. We already # checked all but the modes above. # # If both objects are files, then we're in the same boat. # if typelhs == "dir" or (typelhs == "file" and typerhs == "file"): return lhs.mode != rhs.mode # # For files or hardlinks: # # Since the key space is different (inodes for real files and # actual link targets for hard links), and since the proto area will # identify all N occurrences as hardlinks, but the manifests as one # file and N-1 hardlinks, we have to compare files to hardlinks. # # # If they're both hardlinks, we just make sure that # the same target path appears in both sets of # possible targets. # if typelhs == "hardlink" and typerhs == "hardlink": return len(lhs.hardpaths.intersection(rhs.hardpaths)) == 0 # # Otherwise, we have a mix of file and hardlink, so we # need to make sure that the file path appears in the # set of possible target paths for the hardlink. # # We already know that the ActionInfo, if present, is the lhs # operator. So it's the rhs operator that's guaranteed to # have a set of hardpaths. # return lhs.path not in rhs.hardpaths def __str__(self): """Return an action-style representation of a FileInfo object. We don't currently quote items with embedded spaces. If we ever decide to parse this output, we'll want to revisit that. """ name = self.name() out = name for member, label in OUTPUTMAP[name]: out += " " + label + str(getattr(self, member)) return out def protostr(self): """Return a protolist-style representation of a FileInfo object. """ target = "-" major = "-" minor = "-" mode = self.mode owner = self.owner group = self.group name = self.name() if name == "dir": ftype = "d" elif name in ("file", "hardlink"): ftype = "f" elif name == "link": ftype = "s" target = self.target mode = "777" owner = "root" group = "other" out = "%c %-30s %-20s %4s %-5s %-5s %6d %2ld - -" % \ (ftype, self.path, target, mode, owner, group, 0, 1) return out class ActionInfo(FileInfo): """Object to track information about manifest actions. This currently understands file, link, dir, and hardlink actions. """ def __init__(self, action): FileInfo.__init__(self) # # Currently, all actions that we support have a "path" # attribute. If that changes, then we'll need to # catch a KeyError from this assignment. # self.path = action.attrs["path"] if action.name == "file": self.owner = action.attrs["owner"] self.group = action.attrs["group"] self.mode = action.attrs["mode"] self.hash = action.hash if "preserve" in action.attrs: self.editable = True elif action.name == "link": target = action.attrs["target"] self.target = os.path.normpath(target) self.mediator = action.attrs.get("mediator") elif action.name == "dir": self.owner = action.attrs["owner"] self.group = action.attrs["group"] self.mode = action.attrs["mode"] self.isdir = True elif action.name == "hardlink": target = os.path.normpath(action.get_target_path()) self.hardkey = target self.hardpaths.add(target) @staticmethod def supported(action): """Indicates whether the specified IPS action time is correctly handled by the ActionInfo constructor. """ return action in frozenset(("file", "dir", "link", "hardlink")) class UnsupportedFileFormatError(Exception): """This means that the stat.S_IFMT returned something we don't support, ie a pipe or socket. If it's appropriate for such an object to be in the proto area, then the RealFileInfo constructor will need to evolve to support it, or it will need to be in the exception list. """ def __init__(self, path, mode): Exception.__init__(self) self.path = path self.mode = mode def __str__(self): return '%s: unsupported S_IFMT %07o' % (self.path, self.mode) class RealFileInfo(FileInfo): """Object to track important-to-packaging file information. This currently handles regular files, directories, and symbolic links. For multiple RealFileInfo objects with identical hardkeys, there is no way to determine which of the hard links should be delivered as a file, and which as hardlinks. """ def __init__(self, root=None, path=None): FileInfo.__init__(self) self.path = path path = os.path.join(root, path) lstat = os.lstat(path) mode = lstat.st_mode # # Per stat.py, these cases are mutually exclusive. # if stat.S_ISREG(mode): self.hash = self.path elif stat.S_ISDIR(mode): self.isdir = True elif stat.S_ISLNK(mode): self.target = os.path.normpath(os.readlink(path)) self.mediator = None else: raise UnsupportedFileFormatError(path, mode) if not stat.S_ISLNK(mode): self.mode = "%04o" % stat.S_IMODE(mode) # # Instead of reading the group and owner from the proto area after # a non-root build, just drop in dummy values. Since we don't # compare them anywhere, this should allow at least marginally # useful comparisons of protolist-style output. # self.owner = "owner" self.group = "group" # # refcount > 1 indicates a hard link # if lstat.st_nlink > 1: # # This could get ugly if multiple proto areas reside # on different filesystems. # self.hardkey = lstat.st_ino class DirectoryTree(dict): """Meant to be subclassed according to population method. """ def __init__(self, name): dict.__init__(self) self.name = name def compare(self, other): """Compare two different sets of FileInfo objects. """ keys1 = frozenset(self.keys()) keys2 = frozenset(other.keys()) common = keys1.intersection(keys2) onlykeys1 = keys1.difference(common) onlykeys2 = keys2.difference(common) if onlykeys1: print "Entries present in %s but not %s:" % \ (self.name, other.name) for path in sorted(onlykeys1): print("\t%s" % str(self[path])) print "" if onlykeys2: print "Entries present in %s but not %s:" % \ (other.name, self.name) for path in sorted(onlykeys2): print("\t%s" % str(other[path])) print "" nodifferences = True for path in sorted(common): if self[path] != other[path]: if nodifferences: nodifferences = False print "Entries that differ between %s and %s:" \ % (self.name, other.name) print("%14s %s" % (self.name, self[path])) print("%14s %s" % (other.name, other[path])) if not nodifferences: print "" class BadProtolistFormat(Exception): """This means that the user supplied a file via -l, but at least one line from that file doesn't have the right number of fields to parse as protolist output. """ def __str__(self): return 'bad proto list entry: "%s"' % Exception.__str__(self) class ProtoTree(DirectoryTree): """Describes one or more proto directories as a dictionary of RealFileInfo objects, indexed by relative path. """ def adddir(self, proto, exceptions): """Extends the ProtoTree dictionary with RealFileInfo objects describing the proto dir, indexed by relative path. """ newentries = {} pdir = os.path.normpath(proto) strippdir = lambda r, n: os.path.join(r, n)[len(pdir)+1:] for root, dirs, files in os.walk(pdir): for name in dirs + files: path = strippdir(root, name) if path not in exceptions: try: newentries[path] = RealFileInfo(pdir, path) except OSError, e: sys.stderr.write("Warning: unable to stat %s: %s\n" % (path, e)) continue else: exceptions.remove(path) if name in dirs: dirs.remove(name) # # Find the sets of paths in this proto dir that are hardlinks # to the same inode. # # It seems wasteful to store this in each FileInfo, but we # otherwise need a linking mechanism. With this information # here, FileInfo object comparison can be self contained. # # We limit this aggregation to a single proto dir, as # represented by newentries. That means we don't need to care # about proto dirs on separate filesystems, or about hardlinks # that cross proto dir boundaries. # hk2path = {} for path, fileinfo in newentries.iteritems(): if fileinfo.hardkey: hk2path.setdefault(fileinfo.hardkey, set()).add(path) for fileinfo in newentries.itervalues(): if fileinfo.hardkey: fileinfo.hardpaths.update(hk2path[fileinfo.hardkey]) self.update(newentries) def addprotolist(self, protolist, exceptions): """Read in the specified file, assumed to be the output of protolist. This has been tested minimally, and is potentially useful for comparing across the transition period, but should ultimately go away. """ try: plist = open(protolist) except IOError, exc: raise IOError("cannot open proto list: %s" % str(exc)) newentries = {} for pline in plist: pline = pline.split() # # Use a FileInfo() object instead of a RealFileInfo() # object because we want to avoid the RealFileInfo # constructor, because there's nothing to actually stat(). # fileinfo = FileInfo() try: if pline[1] in exceptions: exceptions.remove(pline[1]) continue if pline[0] == "d": fileinfo.isdir = True fileinfo.path = pline[1] if pline[2] != "-": fileinfo.target = os.path.normpath(pline[2]) fileinfo.mode = int("0%s" % pline[3]) fileinfo.owner = pline[4] fileinfo.group = pline[5] if pline[6] != "0": fileinfo.hardkey = pline[6] newentries[pline[1]] = fileinfo except IndexError: raise BadProtolistFormat(pline) plist.close() hk2path = {} for path, fileinfo in newentries.iteritems(): if fileinfo.hardkey: hk2path.setdefault(fileinfo.hardkey, set()).add(path) for fileinfo in newentries.itervalues(): if fileinfo.hardkey: fileinfo.hardpaths.update(hk2path[fileinfo.hardkey]) self.update(newentries) class ManifestParsingError(Exception): """This means that the Manifest.set_content() raised an ActionError. We raise this, instead, to tell us which manifest could not be parsed, rather than what action error we hit. """ def __init__(self, mfile, error): Exception.__init__(self) self.mfile = mfile self.error = error def __str__(self): return "unable to parse manifest %s: %s" % (self.mfile, self.error) class ManifestTree(DirectoryTree): """Describes one or more directories containing arbitrarily many manifests as a dictionary of ActionInfo objects, indexed by the relative path of the data source within the proto area. That path may or may not be the same as the path attribute of the given action. """ def addmanifest(self, root, mfile, arch, modechecks, exceptions): """Treats the specified input file as a pkg(5) package manifest, and extends the ManifestTree dictionary with entries for the actions therein. """ mfest = manifest.Manifest() try: mfest.set_content(open(os.path.join(root, mfile)).read()) except IOError, exc: raise IOError("cannot read manifest: %s" % str(exc)) except actions.ActionError, exc: raise ManifestParsingError(mfile, str(exc)) # # Make sure the manifest is applicable to the user-specified # architecture. Assumption: if variant.arch is not an # attribute of the manifest, then the package should be # installed on all architectures. # if arch not in mfest.attributes.get("variant.arch", (arch,)): return modewarnings = set() for action in mfest.gen_actions(): if "path" not in action.attrs or \ not ActionInfo.supported(action.name): continue # # The dir action is currently fully specified, in that it # lists owner, group, and mode attributes. If that # changes in pkg(5) code, we'll need to revisit either this # code or the ActionInfo() constructor. It's possible # that the pkg(5) system could be extended to provide a # mechanism for specifying directory permissions outside # of the individual manifests that deliver files into # those directories. Doing so at time of manifest # processing would mean that validate_pkg continues to work, # but doing so at time of publication would require updates. # # # See pkgsend(1) for the use of NOHASH for objects with # datastreams. Currently, that means "files," but this # should work for any other such actions. # if getattr(action, "hash", "NOHASH") != "NOHASH": path = action.hash else: path = action.attrs["path"] # # This is the wrong tool in which to enforce consistency # on a set of manifests. So instead of comparing the # different actions with the same "path" attribute, we # use the first one. # if path in self: continue # # As with the manifest itself, if an action has specified # variant.arch, we look for the target architecture # therein. # var = None # # The name of this method changed in pkg(5) build 150, we need to # work with both sets. # if hasattr(action, 'get_variants'): var = action.get_variants() else: var = action.get_variant_template() if "variant.arch" in var and arch not in var["variant.arch"]: return self[path] = ActionInfo(action) if modechecks is not None and path not in exceptions: modewarnings.update(self[path].checkmodes(modechecks)) if len(modewarnings) > 0: print "warning: unsafe permissions in %s" % mfile for w in sorted(modewarnings): print w print "" def adddir(self, mdir, arch, modechecks, exceptions): """Walks the specified directory looking for pkg(5) manifests. """ for mfile in os.listdir(mdir): if (mfile.endswith(".mog") and stat.S_ISREG(os.lstat(os.path.join(mdir, mfile)).st_mode)): try: self.addmanifest(mdir, mfile, arch, modechecks, exceptions) except IOError, exc: sys.stderr.write("warning: %s\n" % str(exc)) def resolvehardlinks(self): """Populates mode, group, and owner for resolved (ie link target is present in the manifest tree) hard links. """ for info in self.values(): if info.name() == "hardlink": tgt = info.hardkey if tgt in self: tgtinfo = self[tgt] info.owner = tgtinfo.owner info.group = tgtinfo.group info.mode = tgtinfo.mode class ExceptionList(set): """Keep track of an exception list as a set of paths to be excluded from any other lists we build. """ def __init__(self, files, arch): set.__init__(self) for fname in files: try: self.readexceptionfile(fname, arch) except IOError, exc: sys.stderr.write("warning: cannot read exception file: %s\n" % str(exc)) def readexceptionfile(self, efile, arch): """Build a list of all pathnames from the specified file that either apply to all architectures (ie which have no trailing architecture tokens), or to the specified architecture (ie which have the value of the arch arg as a trailing architecture token.) """ excfile = open(efile) for exc in excfile: exc = exc.split() if len(exc) and exc[0][0] != "#": if arch in (exc[1:] or arch): self.add(os.path.normpath(exc[0])) excfile.close() USAGE = """%s [-v] -a arch [-e exceptionfile]... [-L|-M [-X check]...] input_1 [input_2] where input_1 and input_2 may specify proto lists, proto areas, or manifest directories. For proto lists, use one or more -l file arguments. For proto areas, use one or more -p dir arguments. For manifest directories, use one or more -m dir arguments. If -L or -M is specified, then only one input source is allowed, and it should be one or more manifest directories. These two options are mutually exclusive. The -L option is used to generate a proto list to stdout. The -M option is used to check for safe file and directory modes. By default, this causes all mode checks to be performed. Individual mode checks may be turned off using "-X check," where "check" comes from the following set of checks: m check for group or other write permissions w check for user write permissions on files and directories not owned by root s check for group/other read permission on executable files that have setuid/setgid bit(s) o check for files that could be safely owned by root """ % sys.argv[0] def usage(msg=None): """Try to give the user useful information when they don't get the command syntax right. """ if msg: sys.stderr.write("%s: %s\n" % (sys.argv[0], msg)) sys.stderr.write(USAGE) sys.exit(2) def main(argv): """Compares two out of three possible data sources: a proto list, a set of proto areas, and a set of manifests. """ try: opts, args = getopt.getopt(argv, 'a:e:Ll:Mm:p:vX:') except getopt.GetoptError, exc: usage(str(exc)) if args: usage() arch = None exceptionlists = [] listonly = False manifestdirs = [] manifesttree = ManifestTree("manifests") protodirs = [] prototree = ProtoTree("proto area") protolists = [] protolist = ProtoTree("proto list") modechecks = set() togglemodechecks = set() trees = [] comparing = set() verbose = False for opt, arg in opts: if opt == "-a": if arch: usage("may only specify one architecture") else: arch = arg elif opt == "-e": exceptionlists.append(arg) elif opt == "-L": listonly = True elif opt == "-l": comparing.add("protolist") protolists.append(os.path.normpath(arg)) elif opt == "-M": modechecks.update(DEFAULTMODECHECKS) elif opt == "-m": comparing.add("manifests") manifestdirs.append(os.path.normpath(arg)) elif opt == "-p": comparing.add("proto area") protodirs.append(os.path.normpath(arg)) elif opt == "-v": verbose = True elif opt == "-X": togglemodechecks.add(arg) if listonly or len(modechecks) > 0: if len(comparing) != 1 or "manifests" not in comparing: usage("-L and -M require one or more -m args, and no -l or -p") if listonly and len(modechecks) > 0: usage("-L and -M are mutually exclusive") elif len(comparing) != 2: usage("must specify exactly two of -l, -m, and -p") if len(togglemodechecks) > 0 and len(modechecks) == 0: usage("-X requires -M") for s in togglemodechecks: if s not in ALLMODECHECKS: usage("unknown mode check %s" % s) modechecks.symmetric_difference_update((s)) if len(modechecks) == 0: modechecks = None if not arch: usage("must specify architecture") exceptions = ExceptionList(exceptionlists, arch) originalexceptions = exceptions.copy() if len(manifestdirs) > 0: for mdir in manifestdirs: manifesttree.adddir(mdir, arch, modechecks, exceptions) if listonly: manifesttree.resolvehardlinks() for info in manifesttree.values(): print "%s" % info.protostr() sys.exit(0) if modechecks is not None: sys.exit(0) trees.append(manifesttree) if len(protodirs) > 0: for pdir in protodirs: prototree.adddir(pdir, exceptions) trees.append(prototree) if len(protolists) > 0: for plist in protolists: try: protolist.addprotolist(plist, exceptions) except IOError, exc: sys.stderr.write("warning: %s\n" % str(exc)) trees.append(protolist) if verbose and exceptions: print "Entries present in exception list but missing from proto area:" for exc in sorted(exceptions): print "\t%s" % exc print "" usedexceptions = originalexceptions.difference(exceptions) harmfulexceptions = usedexceptions.intersection(manifesttree) if harmfulexceptions: print "Entries present in exception list but also in manifests:" for exc in sorted(harmfulexceptions): print "\t%s" % exc del manifesttree[exc] print "" trees[0].compare(trees[1]) if __name__ == '__main__': try: main(sys.argv[1:]) except KeyboardInterrupt: sys.exit(1) except IOError: sys.exit(1)