xref: /illumos-gate/usr/src/tools/scripts/wsdiff.py (revision 096c97d62be876a03a0a8cdb0a540e9c84ec509f)
1#!@TOOLS_PYTHON@
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or http://www.opensolaris.org/os/licensing.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23# Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
24#
25
26#
27# wsdiff(1) is a tool that can be used to determine which compiled objects
28# have changed as a result of a given source change. Developers backporting
29# new features, RFEs and bug fixes need to be able to identify the set of
30# patch deliverables necessary for feature/fix realization on a patched system.
31#
32# The tool works by comparing objects in two trees/proto areas (one build with,
33# and without the source changes.
34#
35# Using wsdiff(1) is fairly simple:
36#	- Bringover to a fresh workspace
37#	- Perform a full non-debug build (clobber if workspace isn't fresh)
38#	- Move the proto area aside, call it proto.old, or something.
39#	- Integrate your changes to the workspace
40#	- Perform another full non-debug clobber build.
41#	- Use wsdiff(1) to see what changed:
42#		$ wsdiff proto.old proto
43#
44# By default, wsdiff will print the list of changed objects / deliverables to
45# stdout. If a results file is specified via -r, the list of differing objects,
46# and details about why wsdiff(1) thinks they are different will be logged to
47# the results file.
48#
49# By invoking nightly(1) with the -w option to NIGHTLY_FLAGS, nightly(1) will use
50# wsdiff(1) to report on what objects changed since the last build.
51#
52# For patch deliverable purposes, it's advised to have nightly do a clobber,
53# non-debug build.
54#
55# Think about the results. Was something flagged that you don't expect? Go look
56# at the results file to see details about the differences.
57#
58# Use the -i option in conjunction with -v and -V to dive deeper and have wsdiff(1)
59# report with more verbosity.
60#
61# Usage: wsdiff [-vVt] [-r results ] [-i filelist ] old new
62#
63# Where "old" is the path to the proto area build without the changes, and
64# "new" is the path to the proto area built with the changes. The following
65# options are supported:
66#
67#        -v      Do not truncate observed diffs in results
68#        -V      Log *all* ELF sect diffs vs. logging the first diff found
69#        -t      Use onbld tools in $SRC/tools
70#        -r      Log results and observed differences
71#        -i      Tell wsdiff which objects to compare via an input file list
72
73from __future__ import print_function
74import datetime, fnmatch, getopt, os, profile, io, subprocess
75import re, resource, select, shutil, signal, string, struct, sys, tempfile
76import time, threading
77from stat import *
78from subprocess import Popen, PIPE
79
80# Human readable diffs truncated by default if longer than this
81# Specifying -v on the command line will override
82diffs_sz_thresh = 4096
83
84# Lock name	 Provides exclusive access to
85# --------------+------------------------------------------------
86# output_lock	 standard output or temporary file (difference())
87# log_lock	 the results file (log_difference())
88# wset_lock	 changedFiles list (workerThread())
89output_lock = threading.Lock()
90log_lock = threading.Lock()
91wset_lock = threading.Lock()
92
93# Variable for thread control
94keep_processing = True
95
96# Default search path for wsdiff
97wsdiff_path = [ "/usr/bin",
98		"/usr/ccs/bin",
99		"/lib/svc/bin",
100		"/opt/onbld/bin" ]
101
102# These are objects that wsdiff will notice look different, but will not report.
103# Existence of an exceptions list, and adding things here is *dangerous*,
104# and therefore the *only* reasons why anything would be listed here is because
105# the objects do not build deterministically, yet we *cannot* fix this.
106#
107# These perl libraries use __DATE__ and therefore always look different.
108# Ideally, we would purge use the use of __DATE__ from the source, but because
109# this is source we wish to distribute with Solaris "unchanged", we cannot modify.
110#
111wsdiff_exceptions = [
112	"usr/perl5/5.8.4/lib/sun4-solaris-64int/CORE/libperl.so.1",
113	"usr/perl5/5.6.1/lib/sun4-solaris-64int/CORE/libperl.so.1",
114	"usr/perl5/5.8.4/lib/i86pc-solaris-64int/CORE/libperl.so.1",
115	"usr/perl5/5.6.1/lib/i86pc-solaris-64int/CORE/libperl.so.1"
116]
117
118def getoutput(cmd):
119	p = Popen(cmd, shell=True, stdout=PIPE)
120	output, x = p.communicate()
121	return (p.returncode, output.decode(errors='replace'))
122
123#####
124# Logging routines
125#
126
127# Debug message to be printed to the screen, and the log file
128def debug(msg) :
129
130	# Add prefix to highlight debugging message
131	msg = "## " + msg
132	if debugon :
133		output_lock.acquire()
134		print(msg)
135		sys.stdout.flush()
136		output_lock.release()
137		if logging :
138			log_lock.acquire()
139			print(msg, file=log)
140			log.flush()
141			log_lock.release()
142
143# Informational message to be printed to the screen, and the log file
144def info(msg) :
145
146	output_lock.acquire()
147	print(msg)
148	sys.stdout.flush()
149	output_lock.release()
150	if logging :
151		log_lock.acquire()
152		print(msg, file=log)
153		log.flush()
154		log_lock.release()
155
156# Error message to be printed to the screen, and the log file
157def error(msg) :
158
159	output_lock.acquire()
160	print("ERROR: " + msg, file=sys.stderr)
161	sys.stderr.flush()
162	output_lock.release()
163	if logging :
164		log_lock.acquire()
165		print("ERROR: " + msg, file=log)
166		log.flush()
167		log_lock.release()
168
169# Informational message to be printed only to the log, if there is one.
170def v_info(msg) :
171
172	if logging :
173		log_lock.acquire()
174		print(msg, file=log)
175		log.flush()
176		log_lock.release()
177
178#
179# Flag a detected file difference
180# Display the fileName to stdout, and log the difference
181#
182def difference(f, dtype, diffs) :
183
184	if f in wsdiff_exceptions :
185		return
186
187	output_lock.acquire()
188	if sorted :
189		differentFiles.append(f)
190	else:
191		print(f)
192		sys.stdout.flush()
193	output_lock.release()
194
195	log_difference(f, dtype, diffs)
196
197#
198# Do the actual logging of the difference to the results file
199#
200def log_difference(f, dtype, diffs) :
201
202	if logging :
203		log_lock.acquire()
204		print(f, file=log)
205		print("NOTE: " + dtype + " difference detected.", file=log)
206
207		difflen = len(diffs)
208		if difflen > 0 :
209			print('', file=log)
210
211			if not vdiffs and difflen > diffs_sz_thresh :
212				print(diffs[:diffs_sz_thresh], file=log)
213				print("... truncated due to length: " +
214				      "use -v to override ...", file=log)
215			else :
216				print(diffs, file=log)
217			print('\n', file=log)
218		log.flush()
219		log_lock.release()
220
221
222#####
223# diff generating routines
224#
225
226#
227# Return human readable diffs from two temporary files
228#
229def diffFileData(tmpf1, tmpf2) :
230
231	binaries = False
232
233	# Filter the data through od(1) if the data is detected
234	# as being binary
235	if isBinary(tmpf1) or isBinary(tmpf2) :
236		binaries = True
237		tmp_od1 = tmpf1 + ".od"
238		tmp_od2 = tmpf2 + ".od"
239
240		cmd = od_cmd + " -c -t x4" + " " + tmpf1 + " > " + tmp_od1
241		os.system(cmd)
242		cmd = od_cmd + " -c -t x4" + " " + tmpf2 + " > " + tmp_od2
243		os.system(cmd)
244
245		tmpf1 = tmp_od1
246		tmpf2 = tmp_od2
247
248	try:
249		rc, data = getoutput(diff_cmd + " " + tmpf1 + " " + tmpf2)
250		# Remove the temp files as we no longer need them.
251		if binaries :
252			try:
253				os.unlink(tmp_od1)
254			except OSError as e:
255				error("diffFileData: unlink failed %s" % e)
256			try:
257				os.unlink(tmp_od2)
258			except OSError as e:
259				error("diffFileData: unlink failed %s" % e)
260	except:
261		error("failed to get output of command: " + diff_cmd + " "
262		    + tmpf1 + " " + tmpf2)
263
264		# Send exception for the failed command up
265		raise
266		return
267
268	return data
269
270#
271# Return human readable diffs betweeen two datasets
272#
273def diffData(base, ptch, d1, d2) :
274
275	t = threading.currentThread()
276	tmpFile1 = tmpDir1 + os.path.basename(base) + t.getName()
277	tmpFile2 = tmpDir2 + os.path.basename(ptch) + t.getName()
278
279	try:
280		fd1 = io.open(tmpFile1, mode='w', errors='ignore')
281	except:
282		error("failed to open: " + tmpFile1)
283		cleanup(1)
284
285	try:
286		fd2 = io.open(tmpFile2, mode='w', errors='ignore')
287	except:
288		error("failed to open: " + tmpFile2)
289		cleanup(1)
290
291	fd1.write(d1)
292	fd2.write(d2)
293	fd1.close()
294	fd2.close()
295
296	return diffFileData(tmpFile1, tmpFile2)
297
298#####
299# Misc utility functions
300#
301
302# Prune off the leading prefix from string s
303def str_prefix_trunc(s, prefix) :
304	snipLen = len(prefix)
305	return s[snipLen:]
306
307#
308# Prune off leading proto path goo (if there is one) to yield
309# the deliverable's eventual path relative to root
310# e.g. proto.base/root_sparc/usr/src/cmd/prstat => usr/src/cmd/prstat
311#
312def fnFormat(fn) :
313	root_arch_str = "root_" + arch
314
315	pos = fn.find(root_arch_str)
316	if pos == -1 :
317		return fn
318
319	pos = fn.find("/", pos)
320	if pos == -1 :
321		return fn
322
323	return fn[pos + 1:]
324
325#####
326# Usage / argument processing
327#
328
329#
330# Display usage message
331#
332def usage() :
333	sys.stdout.flush()
334	print("""Usage: wsdiff [-dvVst] [-r results ] [-i filelist ] old new
335        -d      Print debug messages about the progress
336        -v      Do not truncate observed diffs in results
337        -V      Log *all* ELF sect diffs vs. logging the first diff found
338        -t      Use onbld tools in $SRC/tools
339        -r      Log results and observed differences
340        -s      Produce sorted list of differences
341        -i      Tell wsdiff which objects to compare via an input file list""",
342	    file=sys.stderr)
343	sys.exit(1)
344
345#
346# Process command line options
347#
348def args() :
349
350	global debugon
351	global logging
352	global vdiffs
353	global reportAllSects
354	global sorted
355
356	validOpts = 'di:r:vVst?'
357
358	baseRoot = ""
359	ptchRoot = ""
360	fileNamesFile = ""
361	results = ""
362	localTools = False
363
364	# getopt.getopt() returns:
365	#	an option/value tuple
366	#	a list of remaining non-option arguments
367	#
368	# A correct wsdiff invocation will have exactly two non option
369	# arguments, the paths to the base (old), ptch (new) proto areas
370	try:
371		optlist, args = getopt.getopt(sys.argv[1:], validOpts)
372	except getopt.error as val:
373		usage()
374
375	if len(args) != 2 :
376		usage();
377
378	for opt,val in optlist :
379		if opt == '-d' :
380			debugon = True
381		elif opt == '-i' :
382			fileNamesFile = val
383		elif opt == '-r' :
384			results = val
385			logging = True
386		elif opt == '-s' :
387			sorted = True
388		elif opt == '-v' :
389			vdiffs = True
390		elif opt == '-V' :
391			reportAllSects = True
392		elif opt == '-t':
393			localTools = True
394		else:
395			usage()
396
397	baseRoot = args[0]
398	ptchRoot = args[1]
399
400	if len(baseRoot) == 0 or len(ptchRoot) == 0 :
401		usage()
402
403	if logging and len(results) == 0 :
404		usage()
405
406	if vdiffs and not logging :
407		error("The -v option requires a results file (-r)")
408		sys.exit(1)
409
410	if reportAllSects and not logging :
411		error("The -V option requires a results file (-r)")
412		sys.exit(1)
413
414	# alphabetical order
415	return	baseRoot, fileNamesFile, localTools, ptchRoot, results
416
417#####
418# File identification
419#
420
421#
422# Identify the file type.
423# If it's not ELF, use the file extension to identify
424# certain file types that require special handling to
425# compare. Otherwise just return a basic "ASCII" type.
426#
427def getTheFileType(f) :
428
429	extensions = { 'a'	:	'ELF Object Archive',
430		       'jar'	:	'Java Archive',
431		       'html'	:	'HTML',
432		       'ln'	:	'Lint Library',
433		       'db'	:	'Sqlite Database' }
434
435	try:
436		if os.stat(f)[ST_SIZE] == 0 :
437			return 'ASCII'
438	except:
439		error("failed to stat " + f)
440		return 'Error'
441
442	if isELF(f) == 1 :
443		return 'ELF'
444
445	fnamelist = f.split('.')
446	if len(fnamelist) > 1 :	# Test the file extension
447		extension = fnamelist[-1]
448		if extension in extensions.keys():
449			return extensions[extension]
450
451	return 'ASCII'
452
453#
454# Return non-zero if "f" is an ELF file
455#
456elfmagic = b'\177ELF'
457def isELF(f) :
458	try:
459		with io.open(f, mode='rb') as fd:
460			magic = fd.read(len(elfmagic))
461
462		if magic == elfmagic :
463			return 1
464	except:
465		pass
466	return 0
467
468#
469# Return non-zero is "f" is binary.
470# Consider the file to be binary if it contains any null characters
471#
472def isBinary(f) :
473	try:
474		with io.open(f, mode='rb') as fd:
475			s = fd.read()
476
477		if s.find(b'\0') == -1 :
478			return 0
479	except:
480		pass
481	return 1
482
483#####
484# Directory traversal and file finding
485#
486
487#
488# Return a sorted list of files found under the specified directory
489#
490def findFiles(d) :
491	for path, subdirs, files in os.walk(d) :
492		files.sort()
493		for name in files :
494			yield os.path.join(path, name)
495
496#
497# Examine all files in base, ptch
498#
499# Return a list of files appearing in both proto areas,
500# a list of new files (files found only in ptch) and
501# a list of deleted files (files found only in base)
502#
503def protoCatalog(base, ptch) :
504
505	compFiles = []		# List of files in both proto areas
506	ptchList = []		# List of file in patch proto area
507
508	newFiles = []		# New files detected
509	deletedFiles = []	# Deleted files
510
511	debug("Getting the list of files in the base area");
512	baseFilesList = list(findFiles(base))
513	baseStringLength = len(base)
514	debug("Found " + str(len(baseFilesList)) + " files")
515
516	debug("Getting the list of files in the patch area");
517	ptchFilesList = list(findFiles(ptch))
518	ptchStringLength = len(ptch)
519	debug("Found " + str(len(ptchFilesList)) + " files")
520
521	# Inventory files in the base proto area
522	debug("Determining the list of regular files in the base area");
523	for fn in baseFilesList :
524		if os.path.islink(fn) :
525			continue
526
527		fileName = fn[baseStringLength:]
528		compFiles.append(fileName)
529	debug("Found " + str(len(compFiles)) + " files")
530
531	# Inventory files in the patch proto area
532	debug("Determining the list of regular files in the patch area");
533	for fn in ptchFilesList :
534		if os.path.islink(fn) :
535			continue
536
537		fileName = fn[ptchStringLength:]
538		ptchList.append(fileName)
539	debug("Found " + str(len(ptchList)) + " files")
540
541	# Deleted files appear in the base area, but not the patch area
542	debug("Searching for deleted files by comparing the lists")
543	for fileName in compFiles :
544		if not fileName in ptchList :
545			deletedFiles.append(fileName)
546	debug("Found " + str(len(deletedFiles)) + " deleted files")
547
548	# Eliminate "deleted" files from the list of objects appearing
549	# in both the base and patch proto areas
550	debug("Eliminating deleted files from the list of objects")
551	for fileName in deletedFiles :
552		try:
553			compFiles.remove(fileName)
554		except:
555			error("filelist.remove() failed")
556	debug("List for comparison reduced to " + str(len(compFiles))
557	    + " files")
558
559	# New files appear in the patch area, but not the base
560	debug("Getting the list of newly added files")
561	for fileName in ptchList :
562		if not fileName in compFiles :
563			newFiles.append(fileName)
564	debug("Found " + str(len(newFiles)) + " new files")
565
566	return compFiles, newFiles, deletedFiles
567
568#
569# Examine the files listed in the input file list
570#
571# Return a list of files appearing in both proto areas,
572# a list of new files (files found only in ptch) and
573# a list of deleted files (files found only in base)
574#
575def flistCatalog(base, ptch, flist) :
576	compFiles = []		# List of files in both proto areas
577	newFiles = []		# New files detected
578	deletedFiles = []	# Deleted files
579
580	try:
581		fd = open(flist, "r")
582	except:
583		error("could not open: " + flist)
584		cleanup(1)
585
586	files = []
587	files = fd.readlines()
588	fd.close()
589
590	for f in files :
591		ptch_present = True
592		base_present = True
593
594		if f == '\n' :
595			continue
596
597		# the fileNames have a trailing '\n'
598		f = f.rstrip()
599
600		# The objects in the file list have paths relative
601		# to $ROOT or to the base/ptch directory specified on
602		# the command line.
603		# If it's relative to $ROOT, we'll need to add back the
604		# root_`uname -p` goo we stripped off in fnFormat()
605		if os.path.exists(base + f) :
606			fn = f;
607		elif os.path.exists(base + "root_" + arch + "/" + f) :
608			fn = "root_" + arch + "/" + f
609		else :
610			base_present = False
611
612		if base_present :
613			if not os.path.exists(ptch + fn) :
614				ptch_present = False
615		else :
616			if os.path.exists(ptch + f) :
617				fn = f
618			elif os.path.exists(ptch + "root_" + arch + "/" + f) :
619				fn = "root_" + arch + "/" + f
620			else :
621				ptch_present = False
622
623		if os.path.islink(base + fn) :	# ignore links
624			base_present = False
625		if os.path.islink(ptch + fn) :
626			ptch_present = False
627
628		if base_present and ptch_present :
629			compFiles.append(fn)
630		elif base_present :
631			deletedFiles.append(fn)
632		elif ptch_present :
633			newFiles.append(fn)
634		else :
635			if (os.path.islink(base + fn) and
636			    os.path.islink(ptch + fn)) :
637				continue
638			error(f + " in file list, but not in either tree. " +
639			    "Skipping...")
640
641	return compFiles, newFiles, deletedFiles
642
643
644#
645# Build a fully qualified path to an external tool/utility.
646# Consider the default system locations. For onbld tools, if
647# the -t option was specified, we'll try to use built tools in $SRC tools,
648# and otherwise, we'll fall back on /opt/onbld/
649#
650def find_tool(tool) :
651
652	# First, check what was passed
653	if os.path.exists(tool) :
654		return tool
655
656	# Next try in wsdiff path
657	for pdir in wsdiff_path :
658		location = pdir + "/" + tool
659		if os.path.exists(location) :
660			return location + " "
661
662		location = pdir + "/" + arch + "/" + tool
663		if os.path.exists(location) :
664			return location + " "
665
666	error("Could not find path to: " + tool);
667	sys.exit(1);
668
669
670#####
671# ELF file comparison helper routines
672#
673
674#
675# Return a dictionary of ELF section types keyed by section name
676#
677def get_elfheader(f) :
678
679	header = {}
680
681	rc, hstring = getoutput(elfdump_cmd + " -c " + f)
682
683	if len(hstring) == 0 :
684		error("Failed to dump ELF header for " + f)
685		raise
686		return
687
688	# elfdump(1) dumps the section headers with the section name
689	# following "sh_name:", and the section type following "sh_type:"
690	sections = hstring.split("Section Header")
691	for sect in sections :
692		datap = sect.find("sh_name:");
693		if datap == -1 :
694			continue
695		section = sect[datap:].split()[1]
696		datap = sect.find("sh_type:");
697		if datap == -1 :
698			error("Could not get type for sect: " + section +
699			      " in " + f)
700		sh_type = sect[datap:].split()[2]
701		header[section] = sh_type
702
703	return header
704
705#
706# Extract data in the specified ELF section from the given file
707#
708def extract_elf_section(f, section) :
709
710	rc, data = getoutput(dump_cmd + " -sn " + section + " " + f)
711
712	if len(data) == 0 :
713		error(dump_cmd + "yielded no data on section " + section +
714		    " of " + f)
715		raise
716		return
717
718	# dump(1) displays the file name to start...
719	# get past it to the data itself
720	dbegin = data.find(":") + 1
721	data = data[dbegin:];
722
723	return (data)
724
725#
726# Return a (hopefully meaningful) human readable set of diffs
727# for the specified ELF section between f1 and f2
728#
729# Depending on the section, various means for dumping and diffing
730# the data may be employed.
731#
732text_sections = [ '.text', '.init', '.fini' ]
733def diff_elf_section(f1, f2, section, sh_type) :
734
735	t = threading.currentThread()
736	tmpFile1 = tmpDir1 + os.path.basename(f1) + t.getName()
737	tmpFile2 = tmpDir2 + os.path.basename(f2) + t.getName()
738
739	if (sh_type == "SHT_RELA") : # sh_type == SHT_RELA
740		cmd1 = elfdump_cmd + " -r " + f1 + " > " + tmpFile1
741		cmd2 = elfdump_cmd + " -r " + f2 + " > " + tmpFile2
742	elif (section == ".group") :
743		cmd1 = elfdump_cmd + " -g " + f1 + " > " + tmpFile1
744		cmd2 = elfdump_cmd + " -g " + f2 + " > " + tmpFile2
745	elif (section == ".hash") :
746		cmd1 = elfdump_cmd + " -h " + f1 + " > " + tmpFile1
747		cmd2 = elfdump_cmd + " -h " + f2 + " > " + tmpFile2
748	elif (section == ".dynamic") :
749		cmd1 = elfdump_cmd + " -d " + f1 + " > " + tmpFile1
750		cmd2 = elfdump_cmd + " -d " + f2 + " > " + tmpFile2
751	elif (section == ".got") :
752		cmd1 = elfdump_cmd + " -G " + f1 + " > " + tmpFile1
753		cmd2 = elfdump_cmd + " -G " + f2 + " > " + tmpFile2
754	elif (section == ".SUNW_cap") :
755		cmd1 = elfdump_cmd + " -H " + f1 + " > " + tmpFile1
756		cmd2 = elfdump_cmd + " -H " + f2 + " > " + tmpFile2
757	elif (section == ".interp") :
758		cmd1 = elfdump_cmd + " -i " + f1 + " > " + tmpFile1
759		cmd2 = elfdump_cmd + " -i " + f2 + " > " + tmpFile2
760	elif (section == ".symtab" or section == ".dynsym") :
761		cmd1 = (elfdump_cmd + " -s -N " + section + " " + f1 +
762		    " > " + tmpFile1)
763		cmd2 = (elfdump_cmd + " -s -N " + section + " " + f2 +
764		    " > " + tmpFile2)
765	elif (section in text_sections) :
766		# dis sometimes complains when it hits something it doesn't
767		# know how to disassemble. Just ignore it, as the output
768		# being generated here is human readable, and we've already
769		# correctly flagged the difference.
770		cmd1 = (dis_cmd + " -t " + section + " " + f1 +
771		       " 2>/dev/null | grep -v disassembly > " + tmpFile1)
772		cmd2 = (dis_cmd + " -t " + section + " " + f2 +
773		       " 2>/dev/null | grep -v disassembly > " + tmpFile2)
774	else :
775		cmd1 = (elfdump_cmd + " -w " + tmpFile1 + " -N " +
776		       section + " " + f1)
777		cmd2 = (elfdump_cmd + " -w " + tmpFile2 + " -N " +
778		       section + " " + f2)
779
780	os.system(cmd1)
781	os.system(cmd2)
782
783	data = diffFileData(tmpFile1, tmpFile2)
784
785	# remove temp files as we no longer need them
786	try:
787		os.unlink(tmpFile1)
788	except OSError as e:
789		error("diff_elf_section: unlink failed %s" % e)
790	try:
791		os.unlink(tmpFile2)
792	except OSError as e:
793		error("diff_elf_section: unlink failed %s" % e)
794
795	return (data)
796
797#
798# compare the relevant sections of two ELF binaries
799# and report any differences
800#
801# Returns: 1 if any differenes found
802#          0 if no differences found
803#	  -1 on error
804#
805
806# Sections deliberately not considered when comparing two ELF
807# binaries. Differences observed in these sections are not considered
808# significant where patch deliverable identification is concerned.
809sections_to_skip = [ ".SUNW_signature",
810		     ".comment",
811		     ".SUNW_ctf",
812		     ".debug",
813		     ".plt",
814		     ".rela.bss",
815		     ".rela.plt",
816		     ".line",
817		     ".note",
818		     ".compcom",
819		     ]
820
821sections_preferred = [ ".rodata.str1.8",
822		       ".rodata.str1.1",
823		       ".rodata",
824		       ".data1",
825		       ".data",
826		       ".text",
827		       ]
828
829def compareElfs(base, ptch, quiet) :
830
831	global logging
832
833	try:
834		base_header = get_elfheader(base)
835	except:
836		return
837	sections = list(base_header.keys())
838
839	try:
840		ptch_header = get_elfheader(ptch)
841	except:
842		return
843	e2_only_sections = list(ptch_header.keys())
844
845	e1_only_sections = []
846
847	fileName = fnFormat(base)
848
849	# Derive the list of ELF sections found only in
850	# either e1 or e2.
851	for sect in sections :
852		if not sect in e2_only_sections :
853			e1_only_sections.append(sect)
854		else :
855			e2_only_sections.remove(sect)
856
857	if len(e1_only_sections) > 0 :
858		if quiet :
859			return 1
860
861		data = ""
862		if logging :
863			slist = ""
864			for sect in e1_only_sections :
865				slist = slist + sect + "\t"
866			data = ("ELF sections found in " +
867				base + " but not in " + ptch +
868				"\n\n" + slist)
869
870		difference(fileName, "ELF", data)
871		return 1
872
873	if len(e2_only_sections) > 0 :
874		if quiet :
875			return 1
876
877		data = ""
878		if logging :
879			slist = ""
880			for sect in e2_only_sections :
881				slist = slist + sect + "\t"
882			data = ("ELF sections found in " +
883				ptch + " but not in " + base +
884				"\n\n" + slist)
885
886		difference(fileName, "ELF", data)
887		return 1
888
889	# Look for preferred sections, and put those at the
890	# top of the list of sections to compare
891	for psect in sections_preferred :
892		if psect in sections :
893			sections.remove(psect)
894			sections.insert(0, psect)
895
896	# Compare ELF sections
897	first_section = True
898	for sect in sections :
899
900		if sect in sections_to_skip :
901			continue
902
903		try:
904			s1 = extract_elf_section(base, sect);
905		except:
906			return
907
908		try:
909			s2 = extract_elf_section(ptch, sect);
910		except:
911			return
912
913		if len(s1) != len (s2) or s1 != s2:
914			if not quiet:
915				sh_type = base_header[sect]
916				data = diff_elf_section(base, ptch,
917							sect, sh_type)
918
919				# If all ELF sections are being reported, then
920				# invoke difference() to flag the file name to
921				# stdout only once. Any other section differences
922				# should be logged to the results file directly
923				if not first_section :
924					log_difference(fileName,
925					    "ELF " + sect, data)
926				else :
927					difference(fileName, "ELF " + sect,
928					    data)
929
930			if not reportAllSects :
931				return 1
932			first_section = False
933
934	return 0
935
936#####
937# recursively remove 2 directories
938#
939# Used for removal of temporary directory strucures (ignores any errors).
940#
941def clearTmpDirs(dir1, dir2) :
942
943	if os.path.isdir(dir1) > 0 :
944		shutil.rmtree(dir1, True)
945
946	if os.path.isdir(dir2) > 0 :
947		shutil.rmtree(dir2, True)
948
949
950#####
951# Archive object comparison
952#
953# Returns 1 if difference detected
954#         0 if no difference detected
955#        -1 on error
956#
957def compareArchives(base, ptch, fileType) :
958
959	fileName = fnFormat(base)
960	t = threading.currentThread()
961	ArchTmpDir1 = tmpDir1 + os.path.basename(base) + t.getName()
962	ArchTmpDir2 = tmpDir2 + os.path.basename(base) + t.getName()
963
964	#
965	# Be optimistic and first try a straight file compare
966	# as it will allow us to finish up quickly.
967	#
968	if compareBasic(base, ptch, True, fileType) == 0 :
969		return 0
970
971	try:
972		os.makedirs(ArchTmpDir1)
973	except OSError as e:
974		error("compareArchives: makedir failed %s" % e)
975		return -1
976	try:
977		os.makedirs(ArchTmpDir2)
978	except OSError as e:
979		error("compareArchives: makedir failed %s" % e)
980		return -1
981
982	# copy over the objects to the temp areas, and
983	# unpack them
984	baseCmd = "cp -fp " + base + " " + ArchTmpDir1
985	rc, output = getoutput(baseCmd)
986	if rc != 0:
987		error(baseCmd + " failed: " + output)
988		clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
989		return -1
990
991	ptchCmd = "cp -fp " + ptch + " " + ArchTmpDir2
992	rc, output = getoutput(ptchCmd)
993	if rc != 0:
994		error(ptchCmd + " failed: " + output)
995		clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
996		return -1
997
998	bname = fileName.split('/')[-1]
999	if fileType == "Java Archive" :
1000		baseCmd = ("cd " + ArchTmpDir1 + "; " + "jar xf " + bname +
1001			  "; rm -f " + bname + " META-INF/MANIFEST.MF")
1002		ptchCmd = ("cd " + ArchTmpDir2 + "; " + "jar xf " + bname +
1003			  "; rm -f " + bname + " META-INF/MANIFEST.MF")
1004	elif fileType == "ELF Object Archive" :
1005		baseCmd = ("cd " + ArchTmpDir1 + "; " + "/usr/ccs/bin/ar x " +
1006			  bname + "; rm -f " + bname)
1007		ptchCmd = ("cd " + ArchTmpDir2 + "; " + "/usr/ccs/bin/ar x " +
1008			  bname + "; rm -f " + bname)
1009	else :
1010		error("unexpected file type: " + fileType)
1011		clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1012		return -1
1013
1014	os.system(baseCmd)
1015	os.system(ptchCmd)
1016
1017	baseFlist = list(findFiles(ArchTmpDir1))
1018	ptchFlist = list(findFiles(ArchTmpDir2))
1019
1020	# Trim leading path off base/ptch file lists
1021	flist = []
1022	for fn in baseFlist :
1023		flist.append(str_prefix_trunc(fn, ArchTmpDir1))
1024	baseFlist = flist
1025
1026	flist = []
1027	for fn in ptchFlist :
1028		flist.append(str_prefix_trunc(fn, ArchTmpDir2))
1029	ptchFlist = flist
1030
1031	for fn in ptchFlist :
1032		if not fn in baseFlist :
1033			difference(fileName, fileType,
1034				   fn + " added to " + fileName)
1035			clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1036			return 1
1037
1038	for fn in baseFlist :
1039		if not fn in ptchFlist :
1040			difference(fileName, fileType,
1041				   fn + " removed from " + fileName)
1042			clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1043			return 1
1044
1045		differs = compareOneFile((ArchTmpDir1 + fn),
1046		    (ArchTmpDir2 + fn), True)
1047		if differs :
1048			difference(fileName, fileType,
1049				   fn + " in " + fileName + " differs")
1050			clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1051			return 1
1052
1053	clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1054	return 0
1055
1056#####
1057# (Basic) file comparison
1058#
1059# There's some special case code here for Javadoc HTML files
1060#
1061# Returns 1 if difference detected
1062#         0 if no difference detected
1063#        -1 on error
1064#
1065def compareBasic(base, ptch, quiet, fileType) :
1066
1067	fileName = fnFormat(base);
1068
1069	if quiet and os.stat(base)[ST_SIZE] != os.stat(ptch)[ST_SIZE] :
1070		return 1
1071
1072	try:
1073		baseFile = io.open(base, errors='replace')
1074	except:
1075		error("could not open " + base)
1076		return -1
1077	try:
1078		ptchFile = io.open(ptch, errors='replace')
1079	except:
1080		error("could not open " + ptch)
1081		return -1
1082
1083	baseData = baseFile.read()
1084	ptchData = ptchFile.read()
1085
1086	baseFile.close()
1087	ptchFile.close()
1088
1089	needToSnip = False
1090	if fileType == "HTML" :
1091		needToSnip = True
1092		toSnipBeginStr = "<!-- Generated by javadoc"
1093		toSnipEndStr = "-->\n"
1094
1095	if needToSnip :
1096		toSnipBegin = baseData.find(toSnipBeginStr)
1097		if toSnipBegin != -1 :
1098			toSnipEnd = (baseData[toSnipBegin:].find(toSnipEndStr) +
1099						len(toSnipEndStr))
1100			baseData = (baseData[:toSnipBegin] +
1101				   baseData[toSnipBegin + toSnipEnd:])
1102			ptchData = (ptchData[:toSnipBegin] +
1103				   ptchData[toSnipBegin + toSnipEnd:])
1104
1105	if quiet :
1106		if baseData != ptchData :
1107			return 1
1108	else :
1109		if len(baseData) != len(ptchData) or baseData != ptchData :
1110			diffs = diffData(base, ptch, baseData, ptchData)
1111			difference(fileName, fileType, diffs)
1112			return 1
1113	return 0
1114
1115
1116#####
1117# Compare two objects by producing a data dump from
1118# each object, and then comparing the dump data
1119#
1120# Returns: 1 if a difference is detected
1121#          0 if no difference detected
1122#         -1 upon error
1123#
1124def compareByDumping(base, ptch, quiet, fileType) :
1125
1126	fileName = fnFormat(base);
1127	t = threading.currentThread()
1128	tmpFile1 = tmpDir1 + os.path.basename(base) + t.getName()
1129	tmpFile2 = tmpDir2 + os.path.basename(ptch) + t.getName()
1130
1131	if fileType == "Lint Library" :
1132		baseCmd = (lintdump_cmd + " -ir " + base +
1133			  " | egrep -v '(LINTOBJ|LINTMOD):'" +
1134			  " | grep -v PASS[1-3]:" +
1135			  " > " + tmpFile1)
1136		ptchCmd = (lintdump_cmd + " -ir " + ptch +
1137			  " | egrep -v '(LINTOBJ|LINTMOD):'" +
1138			  " | grep -v PASS[1-3]:" +
1139			  " > " + tmpFile2)
1140	elif fileType == "Sqlite Database" :
1141		baseCmd = ("echo .dump | " + sqlite_cmd + base + " > " +
1142			  tmpFile1)
1143		ptchCmd = ("echo .dump | " + sqlite_cmd + ptch + " > " +
1144			  tmpFile2)
1145
1146	os.system(baseCmd)
1147	os.system(ptchCmd)
1148
1149	try:
1150		baseFile = open(tmpFile1)
1151	except:
1152		error("could not open: " + tmpFile1)
1153		return
1154	try:
1155		ptchFile = open(tmpFile2)
1156	except:
1157		error("could not open: " + tmpFile2)
1158		return
1159
1160	baseData = baseFile.read()
1161	ptchData = ptchFile.read()
1162
1163	baseFile.close()
1164	ptchFile.close()
1165
1166	if len(baseData) != len(ptchData) or baseData != ptchData :
1167		if not quiet :
1168			data = diffFileData(tmpFile1, tmpFile2);
1169			try:
1170				os.unlink(tmpFile1)
1171			except OSError as e:
1172				error("compareByDumping: unlink failed %s" % e)
1173			try:
1174				os.unlink(tmpFile2)
1175			except OSError as e:
1176				error("compareByDumping: unlink failed %s" % e)
1177			difference(fileName, fileType, data)
1178		return 1
1179
1180	# Remove the temporary files now.
1181	try:
1182		os.unlink(tmpFile1)
1183	except OSError as e:
1184		error("compareByDumping: unlink failed %s" % e)
1185	try:
1186		os.unlink(tmpFile2)
1187	except OSError as e:
1188		error("compareByDumping: unlink failed %s" % e)
1189
1190	return 0
1191
1192#####
1193#
1194# SIGINT signal handler. Changes thread control variable to tell the threads
1195# to finish their current job and exit.
1196#
1197def discontinue_processing(signl, frme):
1198	global keep_processing
1199
1200	print("Caught Ctrl-C, stopping the threads", file=sys.stderr)
1201	keep_processing = False
1202
1203	return 0
1204
1205#####
1206#
1207# worker thread for changedFiles processing
1208#
1209class workerThread(threading.Thread) :
1210	def run(self):
1211		global wset_lock
1212		global changedFiles
1213		global baseRoot
1214		global ptchRoot
1215		global keep_processing
1216
1217		while (keep_processing) :
1218			# grab the lock to changedFiles and remove one member
1219			# and process it
1220			wset_lock.acquire()
1221			try :
1222				fn = changedFiles.pop()
1223			except IndexError :
1224				# there is nothing more to do
1225				wset_lock.release()
1226				return
1227			wset_lock.release()
1228
1229			base = baseRoot + fn
1230			ptch = ptchRoot + fn
1231
1232			compareOneFile(base, ptch, False)
1233
1234
1235#####
1236# Compare two objects. Detect type changes.
1237# Vector off to the appropriate type specific
1238# compare routine based on the type.
1239#
1240def compareOneFile(base, ptch, quiet) :
1241
1242	# Verify the file types.
1243	# If they are different, indicate this and move on
1244	btype = getTheFileType(base)
1245	ptype = getTheFileType(ptch)
1246
1247	if btype == 'Error' or ptype == 'Error' :
1248		return -1
1249
1250	fileName = fnFormat(base)
1251
1252	if (btype != ptype) :
1253		if not quiet :
1254			difference(fileName, "file type", btype + " to " + ptype)
1255		return 1
1256	else :
1257		fileType = btype
1258
1259	if (fileType == 'ELF') :
1260		return compareElfs(base, ptch, quiet)
1261
1262	elif (fileType == 'Java Archive' or fileType == 'ELF Object Archive') :
1263		return compareArchives(base, ptch, fileType)
1264
1265	elif (fileType == 'HTML') :
1266		return compareBasic(base, ptch, quiet, fileType)
1267
1268	elif ( fileType == 'Lint Library' ) :
1269		return compareByDumping(base, ptch, quiet, fileType)
1270
1271	elif ( fileType == 'Sqlite Database' ) :
1272		return compareByDumping(base, ptch, quiet, fileType)
1273
1274	else :
1275		# it has to be some variety of text file
1276		return compareBasic(base, ptch, quiet, fileType)
1277
1278# Cleanup and self-terminate
1279def cleanup(ret) :
1280
1281	debug("Performing cleanup (" + str(ret) + ")")
1282	if os.path.isdir(tmpDir1) > 0 :
1283		shutil.rmtree(tmpDir1)
1284
1285	if os.path.isdir(tmpDir2) > 0 :
1286		shutil.rmtree(tmpDir2)
1287
1288	if logging :
1289		log.close()
1290
1291	sys.exit(ret)
1292
1293def main() :
1294
1295	# Log file handle
1296	global log
1297
1298	# Globals relating to command line options
1299	global logging, vdiffs, reportAllSects
1300
1301	# Named temporary files / directories
1302	global tmpDir1, tmpDir2
1303
1304	# Command paths
1305	global lintdump_cmd, elfdump_cmd, dump_cmd, dis_cmd, od_cmd, diff_cmd, sqlite_cmd
1306
1307	# Default search path
1308	global wsdiff_path
1309
1310	# Essentially "uname -p"
1311	global arch
1312
1313	# changed files for worker thread processing
1314	global changedFiles
1315	global baseRoot
1316	global ptchRoot
1317
1318	# Sort the list of files from a temporary file
1319	global sorted
1320	global differentFiles
1321
1322	# Debugging indicator
1323	global debugon
1324
1325	# Some globals need to be initialized
1326	debugon = logging = vdiffs = reportAllSects = sorted = False
1327
1328
1329	# Process command line arguments
1330	# Return values are returned from args() in alpha order
1331	# (Yes, python functions can return multiple values (ewww))
1332	# Note that args() also set the globals:
1333	#	logging to True if verbose logging (to a file) was enabled
1334	#	vdiffs to True if logged differences aren't to be truncated
1335	#	reportAllSects to True if all ELF section differences are to be reported
1336	#
1337	baseRoot, fileNamesFile, localTools, ptchRoot, results = args()
1338
1339	#
1340	# Set up the results/log file
1341	#
1342	if logging :
1343		try:
1344			log = open(results, "w")
1345		except:
1346			logging = False
1347			error("failed to open log file: " + log)
1348			sys.exit(1)
1349
1350		dateTimeStr= "# %04d-%02d-%02d at %02d:%02d:%02d" % time.localtime()[:6]
1351		v_info("# This file was produced by wsdiff")
1352		v_info(dateTimeStr)
1353
1354	# Changed files (used only for the sorted case)
1355	if sorted :
1356		differentFiles = []
1357
1358	#
1359	# Build paths to the tools required tools
1360	#
1361	# Try to look for tools in $SRC/tools if the "-t" option
1362	# was specified
1363	#
1364	rc, arch = getoutput("uname -p")
1365	arch = arch.rstrip()
1366	if localTools :
1367		try:
1368			src = os.environ['SRC']
1369		except:
1370			error("-t specified, but $SRC not set. Cannot find $SRC/tools")
1371			src = ""
1372		if len(src) > 0 :
1373			wsdiff_path.insert(0, src + "/tools/proto/opt/onbld/bin")
1374
1375	lintdump_cmd = find_tool("lintdump")
1376	elfdump_cmd = find_tool("elfdump")
1377	dump_cmd = find_tool("dump")
1378	od_cmd = find_tool("od")
1379	dis_cmd = find_tool("dis")
1380	diff_cmd = find_tool("diff")
1381	sqlite_cmd = find_tool("sqlite")
1382
1383	#
1384	# Set resource limit for number of open files as high as possible.
1385	# This might get handy with big number of threads.
1386	#
1387	(nofile_soft, nofile_hard) = resource.getrlimit(resource.RLIMIT_NOFILE)
1388	try:
1389		resource.setrlimit(resource.RLIMIT_NOFILE,
1390		    (nofile_hard, nofile_hard))
1391	except:
1392		error("cannot set resource limits for number of open files")
1393		sys.exit(1)
1394
1395	#
1396	# validate the base and patch paths
1397	#
1398	if baseRoot[-1] != '/' :
1399		baseRoot += '/'
1400
1401	if ptchRoot[-1] != '/' :
1402		ptchRoot += '/'
1403
1404	if not os.path.exists(baseRoot) :
1405		error("old proto area: " + baseRoot + " does not exist")
1406		sys.exit(1)
1407
1408	if not os.path.exists(ptchRoot) :
1409		error("new proto area: " + ptchRoot + " does not exist")
1410		sys.exit(1)
1411
1412	#
1413	# log some information identifying the run
1414	#
1415	v_info("Old proto area: " + baseRoot)
1416	v_info("New proto area: " + ptchRoot)
1417	v_info("Results file: " + results + "\n")
1418
1419	#
1420	# Set up the temporary directories / files
1421	# Could use python's tmpdir routines, but these should
1422	# be easier to identify / keep around for debugging
1423	pid = os.getpid()
1424	tmpDir1 = "/tmp/wsdiff_tmp1_" + str(pid) + "/"
1425	tmpDir2 = "/tmp/wsdiff_tmp2_" + str(pid) + "/"
1426	try:
1427		os.makedirs(tmpDir1)
1428	except OSError as e:
1429		error("main: makedir failed %s" % e)
1430	try:
1431		os.makedirs(tmpDir2)
1432	except OSError as e:
1433		error("main: makedir failed %s" % e)
1434
1435	# Derive a catalog of new, deleted, and to-be-compared objects
1436	# either from the specified base and patch proto areas, or from
1437	# from an input file list
1438	newOrDeleted = False
1439
1440	if fileNamesFile != "" :
1441		changedFiles, newFiles, deletedFiles = \
1442			      flistCatalog(baseRoot, ptchRoot, fileNamesFile)
1443	else :
1444		changedFiles, newFiles, deletedFiles = \
1445				protoCatalog(baseRoot, ptchRoot)
1446
1447	if len(newFiles) > 0 :
1448		newOrDeleted = True
1449		info("\nNew objects found: ")
1450
1451		if sorted :
1452			newFiles.sort()
1453		for fn in newFiles :
1454			info(fnFormat(fn))
1455
1456	if len(deletedFiles) > 0 :
1457		newOrDeleted = True
1458		info("\nObjects removed: ")
1459
1460		if sorted :
1461			deletedFiles.sort()
1462		for fn in deletedFiles :
1463			info(fnFormat(fn))
1464
1465	if newOrDeleted :
1466		info("\nChanged objects: ")
1467	if sorted :
1468		debug("The list will appear after the processing is done")
1469
1470	# Here's where all the heavy lifting happens
1471	# Perform a comparison on each object appearing in
1472	# both proto areas. compareOneFile will examine the
1473	# file types of each object, and will vector off to
1474	# the appropriate comparison routine, where the compare
1475	# will happen, and any differences will be reported / logged
1476
1477	# determine maximum number of worker threads by using
1478	# DMAKE_MAX_JOBS environment variable set by nightly(1)
1479	# or get number of CPUs in the system
1480	try:
1481		max_threads = int(os.environ['DMAKE_MAX_JOBS'])
1482	except:
1483		max_threads = os.sysconf("SC_NPROCESSORS_ONLN")
1484		# If we cannot get number of online CPUs in the system
1485		# run unparallelized otherwise bump the number up 20%
1486		# to achieve best results.
1487		if max_threads == -1 :
1488			max_threads = 1
1489		else :
1490			max_threads += max_threads/5
1491
1492	# Set signal handler to attempt graceful exit
1493	debug("Setting signal handler")
1494	signal.signal( signal.SIGINT, discontinue_processing )
1495
1496	# Create and unleash the threads
1497	# Only at most max_threads must be running at any moment
1498	mythreads = []
1499	debug("Spawning " + str(max_threads) + " threads");
1500	for i in range(max_threads) :
1501		thread = workerThread()
1502		mythreads.append(thread)
1503		mythreads[i].start()
1504
1505	# Wait for the threads to finish and do cleanup if interrupted
1506	debug("Waiting for the threads to finish")
1507	while True:
1508		if not True in [thread.isAlive() for thread in mythreads]:
1509		    break
1510		else:
1511		    # Some threads are still going
1512		    time.sleep(1)
1513
1514	# Interrupted by SIGINT
1515	if keep_processing == False :
1516		cleanup(1)
1517
1518	# If the list of differences was sorted it is stored in an array
1519	if sorted :
1520		differentFiles.sort()
1521		for f in differentFiles :
1522			info(fnFormat(f))
1523
1524	# We're done, cleanup.
1525	cleanup(0)
1526
1527if __name__ == '__main__' :
1528	try:
1529		main()
1530	except KeyboardInterrupt :
1531		cleanup(1);
1532
1533