xref: /illumos-gate/usr/src/tools/scripts/wsdiff.py (revision c559157643fef9f9afb0414e00a3579407ba3052)
1#!@TOOLS_PYTHON@
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or http://www.opensolaris.org/os/licensing.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
24#
25
26#
27# wsdiff(1) is a tool that can be used to determine which compiled objects
28# have changed as a result of a given source change. Developers backporting
29# new features, RFEs and bug fixes need to be able to identify the set of
30# patch deliverables necessary for feature/fix realization on a patched system.
31#
32# The tool works by comparing objects in two trees/proto areas (one build with,
33# and without the source changes.
34#
35# Using wsdiff(1) is fairly simple:
36#	- Bringover to a fresh workspace
37#	- Perform a full non-debug build (clobber if workspace isn't fresh)
38#	- Move the proto area aside, call it proto.old, or something.
39#	- Integrate your changes to the workspace
40#	- Perform another full non-debug clobber build.
41#	- Use wsdiff(1) to see what changed:
42#		$ wsdiff proto.old proto
43#
44# By default, wsdiff will print the list of changed objects / deliverables to
45# stdout. If a results file is specified via -r, the list of differing objects,
46# and details about why wsdiff(1) thinks they are different will be logged to
47# the results file.
48#
49# By invoking nightly(1) with the -w option to NIGHTLY_FLAGS, nightly(1) will
50# use wsdiff(1) to report on what objects changed since the last build.
51#
52# For patch deliverable purposes, it's advised to have nightly do a clobber,
53# non-debug build.
54#
55# Think about the results. Was something flagged that you don't expect? Go look
56# at the results file to see details about the differences.
57#
58# Use the -i option in conjunction with -v and -V to dive deeper and have
59# wsdiff(1) report with more verbosity.
60#
61# Usage: wsdiff [-vVt] [-r results ] [-i filelist ] old new
62#
63# Where "old" is the path to the proto area build without the changes, and
64# "new" is the path to the proto area built with the changes. The following
65# options are supported:
66#
67#        -v      Do not truncate observed diffs in results
68#        -V      Log *all* ELF sect diffs vs. logging the first diff found
69#        -t      Use onbld tools in $SRC/tools
70#        -r      Log results and observed differences
71#        -i      Tell wsdiff which objects to compare via an input file list
72
73from __future__ import print_function
74import datetime, fnmatch, getopt, os, profile, io, subprocess
75import re, resource, select, shutil, signal, string, struct, sys, tempfile
76import time, threading
77from stat import *
78
79PY3 = sys.version_info[0] == 3
80
81if not PY3:
82	import commands
83
84# Human readable diffs truncated by default if longer than this
85# Specifying -v on the command line will override
86diffs_sz_thresh = 4096
87
88# Lock name	 Provides exclusive access to
89# --------------+------------------------------------------------
90# output_lock	 standard output or temporary file (difference())
91# log_lock	 the results file (log_difference())
92# wset_lock	 changedFiles list (workerThread())
93output_lock = threading.Lock()
94log_lock = threading.Lock()
95wset_lock = threading.Lock()
96
97# Variable for thread control
98keep_processing = True
99
100# Default search path for wsdiff
101wsdiff_path = [ "/usr/bin",
102		"/usr/ccs/bin",
103		"/lib/svc/bin",
104		"/opt/onbld/bin" ]
105
106# These are objects that wsdiff will notice look different, but will not report.
107# Existence of an exceptions list, and adding things here is *dangerous*,
108# and therefore the *only* reasons why anything would be listed here is because
109# the objects do not build deterministically, yet we *cannot* fix this.
110#
111# These perl libraries use __DATE__ and therefore always look different.
112# Ideally, we would purge use the use of __DATE__ from the source, but because
113# this is source we wish to distribute with Solaris "unchanged", we cannot modify.
114#
115wsdiff_exceptions = [
116	"usr/perl5/5.8.4/lib/sun4-solaris-64int/CORE/libperl.so.1",
117	"usr/perl5/5.6.1/lib/sun4-solaris-64int/CORE/libperl.so.1",
118	"usr/perl5/5.8.4/lib/i86pc-solaris-64int/CORE/libperl.so.1",
119	"usr/perl5/5.6.1/lib/i86pc-solaris-64int/CORE/libperl.so.1"
120]
121
122if PY3:
123	def getoutput(cmd):
124		import shlex, tempfile
125		f, fpath = tempfile.mkstemp()
126		status = os.system("{ " + cmd + "; } >" +
127			shlex.quote(fpath) + " 2>&1")
128		returncode = os.WEXITSTATUS(status)
129		with os.fdopen(f, "r") as tfile:
130			output = tfile.read()
131		os.unlink(fpath)
132		if output[-1:] == '\n':
133			output = output[:-1]
134		return returncode, output
135else:
136	getoutput = commands.getstatusoutput
137
138#####
139# Logging routines
140#
141
142# Debug message to be printed to the screen, and the log file
143def debug(msg) :
144
145	# Add prefix to highlight debugging message
146	msg = "## " + msg
147	if debugon :
148		output_lock.acquire()
149		print(msg)
150		sys.stdout.flush()
151		output_lock.release()
152		if logging :
153			log_lock.acquire()
154			print(msg, file=log)
155			log.flush()
156			log_lock.release()
157
158# Informational message to be printed to the screen, and the log file
159def info(msg) :
160
161	output_lock.acquire()
162	print(msg)
163	sys.stdout.flush()
164	output_lock.release()
165	if logging :
166		log_lock.acquire()
167		print(msg, file=log)
168		log.flush()
169		log_lock.release()
170
171# Error message to be printed to the screen, and the log file
172def error(msg) :
173
174	output_lock.acquire()
175	print("ERROR: " + msg, file=sys.stderr)
176	sys.stderr.flush()
177	output_lock.release()
178	if logging :
179		log_lock.acquire()
180		print("ERROR: " + msg, file=log)
181		log.flush()
182		log_lock.release()
183
184# Informational message to be printed only to the log, if there is one.
185def v_info(msg) :
186
187	if logging :
188		log_lock.acquire()
189		print(msg, file=log)
190		log.flush()
191		log_lock.release()
192
193#
194# Flag a detected file difference
195# Display the fileName to stdout, and log the difference
196#
197def difference(f, dtype, diffs) :
198
199	if f in wsdiff_exceptions :
200		return
201
202	output_lock.acquire()
203	if sorted :
204		differentFiles.append(f)
205	else:
206		print(f)
207		sys.stdout.flush()
208	output_lock.release()
209
210	log_difference(f, dtype, diffs)
211
212#
213# Do the actual logging of the difference to the results file
214#
215def log_difference(f, dtype, diffs) :
216
217	if logging :
218		log_lock.acquire()
219		print(f, file=log)
220		print("NOTE: " + dtype + " difference detected.", file=log)
221
222		difflen = len(diffs)
223		if difflen > 0 :
224			print('', file=log)
225
226			if not vdiffs and difflen > diffs_sz_thresh :
227				print(diffs[:diffs_sz_thresh], file=log)
228				print("... truncated due to length: " +
229				      "use -v to override ...", file=log)
230			else :
231				print(diffs, file=log)
232			print('\n', file=log)
233		log.flush()
234		log_lock.release()
235
236
237#####
238# diff generating routines
239#
240
241#
242# Return human readable diffs from two files
243#
244def diffFileData(tmpf1, tmpf2) :
245
246	binaries = False
247
248	# Filter the data through od(1) if the data is detected
249	# as being binary
250	if isBinary(tmpf1) or isBinary(tmpf2) :
251		binaries = True
252		tmp_od1 = tmpf1 + ".od"
253		tmp_od2 = tmpf2 + ".od"
254
255		cmd = od_cmd + " -c -t x4" + " " + tmpf1 + " > " + tmp_od1
256		os.system(cmd)
257		cmd = od_cmd + " -c -t x4" + " " + tmpf2 + " > " + tmp_od2
258		os.system(cmd)
259
260		tmpf1 = tmp_od1
261		tmpf2 = tmp_od2
262
263	try:
264		rc, data = getoutput(diff_cmd + " " + tmpf1 + " " + tmpf2)
265		# Remove the temp files as we no longer need them.
266		if binaries :
267			try:
268				os.unlink(tmp_od1)
269			except OSError as e:
270				error("diffFileData: unlink failed %s" % e)
271			try:
272				os.unlink(tmp_od2)
273			except OSError as e:
274				error("diffFileData: unlink failed %s" % e)
275	except:
276		error("failed to get output of command: " + diff_cmd + " "
277		    + tmpf1 + " " + tmpf2)
278
279		# Send exception for the failed command up
280		raise
281		return
282
283	return data
284
285#####
286# Misc utility functions
287#
288
289# Prune off the leading prefix from string s
290def str_prefix_trunc(s, prefix) :
291	snipLen = len(prefix)
292	return s[snipLen:]
293
294#
295# Prune off leading proto path goo (if there is one) to yield
296# the deliverable's eventual path relative to root
297# e.g. proto.base/root_sparc/usr/src/cmd/prstat => usr/src/cmd/prstat
298#
299def fnFormat(fn) :
300	root_arch_str = "root_" + arch
301
302	pos = fn.find(root_arch_str)
303	if pos == -1 :
304		return fn
305
306	pos = fn.find("/", pos)
307	if pos == -1 :
308		return fn
309
310	return fn[pos + 1:]
311
312#####
313# Usage / argument processing
314#
315
316#
317# Display usage message
318#
319def usage() :
320	sys.stdout.flush()
321	print("""Usage: wsdiff [-dvVst] [-r results ] [-i filelist ] old new
322        -d      Print debug messages about the progress
323        -v      Do not truncate observed diffs in results
324        -V      Log *all* ELF sect diffs vs. logging the first diff found
325        -t      Use onbld tools in $SRC/tools
326        -r      Log results and observed differences
327        -s      Produce sorted list of differences
328        -i      Tell wsdiff which objects to compare via an input file list""",
329	    file=sys.stderr)
330	sys.exit(1)
331
332#
333# Process command line options
334#
335def args() :
336
337	global debugon
338	global logging
339	global vdiffs
340	global reportAllSects
341	global sorted
342
343	validOpts = 'di:r:vVst?'
344
345	baseRoot = ""
346	ptchRoot = ""
347	fileNamesFile = ""
348	results = ""
349	localTools = False
350
351	# getopt.getopt() returns:
352	#	an option/value tuple
353	#	a list of remaining non-option arguments
354	#
355	# A correct wsdiff invocation will have exactly two non option
356	# arguments, the paths to the base (old), ptch (new) proto areas
357	try:
358		optlist, args = getopt.getopt(sys.argv[1:], validOpts)
359	except getopt.error as val:
360		usage()
361
362	if len(args) != 2 :
363		usage();
364
365	for opt,val in optlist :
366		if opt == '-d' :
367			debugon = True
368		elif opt == '-i' :
369			fileNamesFile = val
370		elif opt == '-r' :
371			results = val
372			logging = True
373		elif opt == '-s' :
374			sorted = True
375		elif opt == '-v' :
376			vdiffs = True
377		elif opt == '-V' :
378			reportAllSects = True
379		elif opt == '-t':
380			localTools = True
381		else:
382			usage()
383
384	baseRoot = args[0]
385	ptchRoot = args[1]
386
387	if len(baseRoot) == 0 or len(ptchRoot) == 0 :
388		usage()
389
390	if logging and len(results) == 0 :
391		usage()
392
393	if vdiffs and not logging :
394		error("The -v option requires a results file (-r)")
395		sys.exit(1)
396
397	if reportAllSects and not logging :
398		error("The -V option requires a results file (-r)")
399		sys.exit(1)
400
401	# alphabetical order
402	return	baseRoot, fileNamesFile, localTools, ptchRoot, results
403
404#####
405# File identification
406#
407
408#
409# Identify the file type.
410# If it's not ELF, use the file extension to identify
411# certain file types that require special handling to
412# compare. Otherwise just return a basic "ASCII" type.
413#
414def getTheFileType(f) :
415
416	extensions = { 'a'	:	'ELF Object Archive',
417		       'jar'	:	'Java Archive',
418		       'html'	:	'HTML',
419		       'ln'	:	'Lint Library',
420		       'db'	:	'Sqlite Database' }
421
422	try:
423		if os.stat(f)[ST_SIZE] == 0 :
424			return 'ASCII'
425	except:
426		error("failed to stat " + f)
427		return 'Error'
428
429	if isELF(f) == 1 :
430		return 'ELF'
431
432	fnamelist = f.split('.')
433	if len(fnamelist) > 1 :	# Test the file extension
434		extension = fnamelist[-1]
435		if extension in extensions.keys():
436			return extensions[extension]
437
438	return 'ASCII'
439
440#
441# Return non-zero if "f" is an ELF file
442#
443elfmagic = b'\177ELF'
444def isELF(f) :
445	try:
446		with open(f, mode='rb') as fd:
447			magic = fd.read(len(elfmagic))
448
449		if magic == elfmagic :
450			return 1
451	except:
452		pass
453	return 0
454
455#
456# Return non-zero is "f" is binary.
457# Consider the file to be binary if it contains any null characters
458#
459def isBinary(f) :
460	try:
461		with open(f, mode='rb') as fd:
462			s = fd.read()
463
464		if s.find(b'\0') == -1 :
465			return 0
466	except:
467		pass
468	return 1
469
470#####
471# Directory traversal and file finding
472#
473
474#
475# Return a sorted list of files found under the specified directory
476#
477def findFiles(d) :
478	for path, subdirs, files in os.walk(d) :
479		files.sort()
480		for name in files :
481			yield os.path.join(path, name)
482
483#
484# Examine all files in base, ptch
485#
486# Return a list of files appearing in both proto areas,
487# a list of new files (files found only in ptch) and
488# a list of deleted files (files found only in base)
489#
490def protoCatalog(base, ptch) :
491
492	compFiles = []		# List of files in both proto areas
493	ptchList = []		# List of file in patch proto area
494
495	newFiles = []		# New files detected
496	deletedFiles = []	# Deleted files
497
498	debug("Getting the list of files in the base area");
499	baseFilesList = list(findFiles(base))
500	baseStringLength = len(base)
501	debug("Found " + str(len(baseFilesList)) + " files")
502
503	debug("Getting the list of files in the patch area");
504	ptchFilesList = list(findFiles(ptch))
505	ptchStringLength = len(ptch)
506	debug("Found " + str(len(ptchFilesList)) + " files")
507
508	# Inventory files in the base proto area
509	debug("Determining the list of regular files in the base area");
510	for fn in baseFilesList :
511		if os.path.islink(fn) :
512			continue
513
514		fileName = fn[baseStringLength:]
515		compFiles.append(fileName)
516	debug("Found " + str(len(compFiles)) + " files")
517
518	# Inventory files in the patch proto area
519	debug("Determining the list of regular files in the patch area");
520	for fn in ptchFilesList :
521		if os.path.islink(fn) :
522			continue
523
524		fileName = fn[ptchStringLength:]
525		ptchList.append(fileName)
526	debug("Found " + str(len(ptchList)) + " files")
527
528	# Deleted files appear in the base area, but not the patch area
529	debug("Searching for deleted files by comparing the lists")
530	for fileName in compFiles :
531		if not fileName in ptchList :
532			deletedFiles.append(fileName)
533	debug("Found " + str(len(deletedFiles)) + " deleted files")
534
535	# Eliminate "deleted" files from the list of objects appearing
536	# in both the base and patch proto areas
537	debug("Eliminating deleted files from the list of objects")
538	for fileName in deletedFiles :
539		try:
540			compFiles.remove(fileName)
541		except:
542			error("filelist.remove() failed")
543	debug("List for comparison reduced to " + str(len(compFiles))
544	    + " files")
545
546	# New files appear in the patch area, but not the base
547	debug("Getting the list of newly added files")
548	for fileName in ptchList :
549		if not fileName in compFiles :
550			newFiles.append(fileName)
551	debug("Found " + str(len(newFiles)) + " new files")
552
553	return compFiles, newFiles, deletedFiles
554
555#
556# Examine the files listed in the input file list
557#
558# Return a list of files appearing in both proto areas,
559# a list of new files (files found only in ptch) and
560# a list of deleted files (files found only in base)
561#
562def flistCatalog(base, ptch, flist) :
563	compFiles = []		# List of files in both proto areas
564	newFiles = []		# New files detected
565	deletedFiles = []	# Deleted files
566
567	try:
568		fd = open(flist, "r")
569	except:
570		error("could not open: " + flist)
571		cleanup(1)
572
573	files = []
574	files = fd.readlines()
575	fd.close()
576
577	for f in files :
578		ptch_present = True
579		base_present = True
580
581		if f == '\n' :
582			continue
583
584		# the fileNames have a trailing '\n'
585		f = f.rstrip()
586
587		# The objects in the file list have paths relative
588		# to $ROOT or to the base/ptch directory specified on
589		# the command line.
590		# If it's relative to $ROOT, we'll need to add back the
591		# root_`uname -p` goo we stripped off in fnFormat()
592		if os.path.exists(base + f) :
593			fn = f;
594		elif os.path.exists(base + "root_" + arch + "/" + f) :
595			fn = "root_" + arch + "/" + f
596		else :
597			base_present = False
598
599		if base_present :
600			if not os.path.exists(ptch + fn) :
601				ptch_present = False
602		else :
603			if os.path.exists(ptch + f) :
604				fn = f
605			elif os.path.exists(ptch + "root_" + arch + "/" + f) :
606				fn = "root_" + arch + "/" + f
607			else :
608				ptch_present = False
609
610		if os.path.islink(base + fn) :	# ignore links
611			base_present = False
612		if os.path.islink(ptch + fn) :
613			ptch_present = False
614
615		if base_present and ptch_present :
616			compFiles.append(fn)
617		elif base_present :
618			deletedFiles.append(fn)
619		elif ptch_present :
620			newFiles.append(fn)
621		else :
622			if (os.path.islink(base + fn) and
623			    os.path.islink(ptch + fn)) :
624				continue
625			error(f + " in file list, but not in either tree. " +
626			    "Skipping...")
627
628	return compFiles, newFiles, deletedFiles
629
630
631#
632# Build a fully qualified path to an external tool/utility.
633# Consider the default system locations. For onbld tools, if
634# the -t option was specified, we'll try to use built tools in $SRC tools,
635# and otherwise, we'll fall back on /opt/onbld/
636#
637def find_tool(tool) :
638
639	# First, check what was passed
640	if os.path.exists(tool) :
641		return tool
642
643	# Next try in wsdiff path
644	for pdir in wsdiff_path :
645		location = pdir + "/" + tool
646		if os.path.exists(location) :
647			return location + " "
648
649		location = pdir + "/" + arch + "/" + tool
650		if os.path.exists(location) :
651			return location + " "
652
653	error("Could not find path to: " + tool);
654	sys.exit(1);
655
656
657#####
658# ELF file comparison helper routines
659#
660
661#
662# Return a dictionary of ELF section types keyed by section name
663#
664def get_elfheader(f) :
665
666	header = {}
667
668	rc, hstring = getoutput(elfdump_cmd + " -c " + f)
669
670	if len(hstring) == 0 :
671		error("Failed to dump ELF header for " + f)
672		raise
673		return
674
675	# elfdump(1) dumps the section headers with the section name
676	# following "sh_name:", and the section type following "sh_type:"
677	sections = hstring.split("Section Header")
678	for sect in sections :
679		datap = sect.find("sh_name:");
680		if datap == -1 :
681			continue
682		section = sect[datap:].split()[1]
683		datap = sect.find("sh_type:");
684		if datap == -1 :
685			error("Could not get type for sect: " + section +
686			      " in " + f)
687		sh_type = sect[datap:].split()[2]
688		header[section] = sh_type
689
690	return header
691
692#
693# Extract data in the specified ELF section from the given file
694#
695def extract_elf_section(f, section) :
696
697	rc, data = getoutput(dump_cmd + " -sn " + section + " " + f)
698
699	if len(data) == 0 :
700		error(dump_cmd + "yielded no data on section " + section +
701		    " of " + f)
702		raise
703		return
704
705	# dump(1) displays the file name to start...
706	# get past it to the data itself
707	dbegin = data.find(":") + 1
708	data = data[dbegin:];
709
710	return (data)
711
712#
713# Return a (hopefully meaningful) human readable set of diffs
714# for the specified ELF section between f1 and f2
715#
716# Depending on the section, various means for dumping and diffing
717# the data may be employed.
718#
719text_sections = [ '.text', '.init', '.fini' ]
720def diff_elf_section(f1, f2, section, sh_type) :
721
722	t = threading.currentThread()
723	tmpFile1 = tmpDir1 + os.path.basename(f1) + t.getName()
724	tmpFile2 = tmpDir2 + os.path.basename(f2) + t.getName()
725
726	if (sh_type == "SHT_RELA") : # sh_type == SHT_RELA
727		cmd1 = elfdump_cmd + " -r " + f1 + " > " + tmpFile1
728		cmd2 = elfdump_cmd + " -r " + f2 + " > " + tmpFile2
729	elif (section == ".group") :
730		cmd1 = elfdump_cmd + " -g " + f1 + " > " + tmpFile1
731		cmd2 = elfdump_cmd + " -g " + f2 + " > " + tmpFile2
732	elif (section == ".hash") :
733		cmd1 = elfdump_cmd + " -h " + f1 + " > " + tmpFile1
734		cmd2 = elfdump_cmd + " -h " + f2 + " > " + tmpFile2
735	elif (section == ".dynamic") :
736		cmd1 = elfdump_cmd + " -d " + f1 + " > " + tmpFile1
737		cmd2 = elfdump_cmd + " -d " + f2 + " > " + tmpFile2
738	elif (section == ".got") :
739		cmd1 = elfdump_cmd + " -G " + f1 + " > " + tmpFile1
740		cmd2 = elfdump_cmd + " -G " + f2 + " > " + tmpFile2
741	elif (section == ".SUNW_cap") :
742		cmd1 = elfdump_cmd + " -H " + f1 + " > " + tmpFile1
743		cmd2 = elfdump_cmd + " -H " + f2 + " > " + tmpFile2
744	elif (section == ".interp") :
745		cmd1 = elfdump_cmd + " -i " + f1 + " > " + tmpFile1
746		cmd2 = elfdump_cmd + " -i " + f2 + " > " + tmpFile2
747	elif (section == ".symtab" or section == ".dynsym") :
748		cmd1 = (elfdump_cmd + " -s -N " + section + " " + f1 +
749		    " > " + tmpFile1)
750		cmd2 = (elfdump_cmd + " -s -N " + section + " " + f2 +
751		    " > " + tmpFile2)
752	elif (section in text_sections) :
753		# dis sometimes complains when it hits something it doesn't
754		# know how to disassemble. Just ignore it, as the output
755		# being generated here is human readable, and we've already
756		# correctly flagged the difference.
757		cmd1 = (dis_cmd + " -t " + section + " " + f1 +
758		       " 2>/dev/null | grep -v disassembly > " + tmpFile1)
759		cmd2 = (dis_cmd + " -t " + section + " " + f2 +
760		       " 2>/dev/null | grep -v disassembly > " + tmpFile2)
761	else :
762		cmd1 = (elfdump_cmd + " -w " + tmpFile1 + " -N " +
763		       section + " " + f1)
764		cmd2 = (elfdump_cmd + " -w " + tmpFile2 + " -N " +
765		       section + " " + f2)
766
767	os.system(cmd1)
768	os.system(cmd2)
769
770	data = diffFileData(tmpFile1, tmpFile2)
771
772	# remove temp files as we no longer need them
773	try:
774		os.unlink(tmpFile1)
775	except OSError as e:
776		error("diff_elf_section: unlink failed %s" % e)
777	try:
778		os.unlink(tmpFile2)
779	except OSError as e:
780		error("diff_elf_section: unlink failed %s" % e)
781
782	return (data)
783
784#
785# compare the relevant sections of two ELF binaries
786# and report any differences
787#
788# Returns: 1 if any differenes found
789#          0 if no differences found
790#	  -1 on error
791#
792
793# Sections deliberately not considered when comparing two ELF
794# binaries. Differences observed in these sections are not considered
795# significant where patch deliverable identification is concerned.
796sections_to_skip = [ ".SUNW_signature",
797		     ".comment",
798		     ".SUNW_ctf",
799		     ".debug",
800		     ".plt",
801		     ".rela.bss",
802		     ".rela.plt",
803		     ".line",
804		     ".note",
805		     ".compcom",
806		     ]
807
808sections_preferred = [ ".rodata.str1.8",
809		       ".rodata.str1.1",
810		       ".rodata",
811		       ".data1",
812		       ".data",
813		       ".text",
814		       ]
815
816def compareElfs(base, ptch, quiet) :
817
818	global logging
819
820	try:
821		base_header = get_elfheader(base)
822	except:
823		return
824	sections = list(base_header.keys())
825
826	try:
827		ptch_header = get_elfheader(ptch)
828	except:
829		return
830	e2_only_sections = list(ptch_header.keys())
831
832	e1_only_sections = []
833
834	fileName = fnFormat(base)
835
836	# Derive the list of ELF sections found only in
837	# either e1 or e2.
838	for sect in sections :
839		if not sect in e2_only_sections :
840			e1_only_sections.append(sect)
841		else :
842			e2_only_sections.remove(sect)
843
844	if len(e1_only_sections) > 0 :
845		if quiet :
846			return 1
847
848		data = ""
849		if logging :
850			slist = ""
851			for sect in e1_only_sections :
852				slist = slist + sect + "\t"
853			data = ("ELF sections found in " +
854				base + " but not in " + ptch +
855				"\n\n" + slist)
856
857		difference(fileName, "ELF", data)
858		return 1
859
860	if len(e2_only_sections) > 0 :
861		if quiet :
862			return 1
863
864		data = ""
865		if logging :
866			slist = ""
867			for sect in e2_only_sections :
868				slist = slist + sect + "\t"
869			data = ("ELF sections found in " +
870				ptch + " but not in " + base +
871				"\n\n" + slist)
872
873		difference(fileName, "ELF", data)
874		return 1
875
876	# Look for preferred sections, and put those at the
877	# top of the list of sections to compare
878	for psect in sections_preferred :
879		if psect in sections :
880			sections.remove(psect)
881			sections.insert(0, psect)
882
883	# Compare ELF sections
884	first_section = True
885	for sect in sections :
886
887		if sect in sections_to_skip :
888			continue
889
890		try:
891			s1 = extract_elf_section(base, sect);
892		except:
893			return
894
895		try:
896			s2 = extract_elf_section(ptch, sect);
897		except:
898			return
899
900		if len(s1) != len (s2) or s1 != s2:
901			if not quiet:
902				sh_type = base_header[sect]
903				data = diff_elf_section(base, ptch,
904							sect, sh_type)
905
906				# If all ELF sections are being reported, then
907				# invoke difference() to flag the file name to
908				# stdout only once. Any other section differences
909				# should be logged to the results file directly
910				if not first_section :
911					log_difference(fileName,
912					    "ELF " + sect, data)
913				else :
914					difference(fileName, "ELF " + sect,
915					    data)
916
917			if not reportAllSects :
918				return 1
919			first_section = False
920
921	return 0
922
923#####
924# recursively remove 2 directories
925#
926# Used for removal of temporary directory strucures (ignores any errors).
927#
928def clearTmpDirs(dir1, dir2) :
929
930	if os.path.isdir(dir1) > 0 :
931		shutil.rmtree(dir1, True)
932
933	if os.path.isdir(dir2) > 0 :
934		shutil.rmtree(dir2, True)
935
936
937#####
938# Archive object comparison
939#
940# Returns 1 if difference detected
941#         0 if no difference detected
942#        -1 on error
943#
944def compareArchives(base, ptch, fileType) :
945
946	fileName = fnFormat(base)
947	t = threading.currentThread()
948	ArchTmpDir1 = tmpDir1 + os.path.basename(base) + t.getName()
949	ArchTmpDir2 = tmpDir2 + os.path.basename(base) + t.getName()
950
951	#
952	# Be optimistic and first try a straight file compare
953	# as it will allow us to finish up quickly.
954	#
955	if compareBasic(base, ptch, True, fileType) == 0 :
956		return 0
957
958	try:
959		os.makedirs(ArchTmpDir1)
960	except OSError as e:
961		error("compareArchives: makedir failed %s" % e)
962		return -1
963	try:
964		os.makedirs(ArchTmpDir2)
965	except OSError as e:
966		error("compareArchives: makedir failed %s" % e)
967		return -1
968
969	# copy over the objects to the temp areas, and
970	# unpack them
971	baseCmd = "cp -fp " + base + " " + ArchTmpDir1
972	rc, output = getoutput(baseCmd)
973	if rc != 0:
974		error(baseCmd + " failed: " + output)
975		clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
976		return -1
977
978	ptchCmd = "cp -fp " + ptch + " " + ArchTmpDir2
979	rc, output = getoutput(ptchCmd)
980	if rc != 0:
981		error(ptchCmd + " failed: " + output)
982		clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
983		return -1
984
985	bname = fileName.split('/')[-1]
986	if fileType == "Java Archive" :
987		baseCmd = ("cd " + ArchTmpDir1 + "; " + "jar xf " + bname +
988			  "; rm -f " + bname + " META-INF/MANIFEST.MF")
989		ptchCmd = ("cd " + ArchTmpDir2 + "; " + "jar xf " + bname +
990			  "; rm -f " + bname + " META-INF/MANIFEST.MF")
991	elif fileType == "ELF Object Archive" :
992		baseCmd = ("cd " + ArchTmpDir1 + "; " + "/usr/ccs/bin/ar x " +
993			  bname + "; rm -f " + bname)
994		ptchCmd = ("cd " + ArchTmpDir2 + "; " + "/usr/ccs/bin/ar x " +
995			  bname + "; rm -f " + bname)
996	else :
997		error("unexpected file type: " + fileType)
998		clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
999		return -1
1000
1001	os.system(baseCmd)
1002	os.system(ptchCmd)
1003
1004	baseFlist = list(findFiles(ArchTmpDir1))
1005	ptchFlist = list(findFiles(ArchTmpDir2))
1006
1007	# Trim leading path off base/ptch file lists
1008	flist = []
1009	for fn in baseFlist :
1010		flist.append(str_prefix_trunc(fn, ArchTmpDir1))
1011	baseFlist = flist
1012
1013	flist = []
1014	for fn in ptchFlist :
1015		flist.append(str_prefix_trunc(fn, ArchTmpDir2))
1016	ptchFlist = flist
1017
1018	for fn in ptchFlist :
1019		if not fn in baseFlist :
1020			difference(fileName, fileType,
1021				   fn + " added to " + fileName)
1022			clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1023			return 1
1024
1025	for fn in baseFlist :
1026		if not fn in ptchFlist :
1027			difference(fileName, fileType,
1028				   fn + " removed from " + fileName)
1029			clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1030			return 1
1031
1032		differs = compareOneFile((ArchTmpDir1 + fn),
1033		    (ArchTmpDir2 + fn), True)
1034		if differs :
1035			difference(fileName, fileType,
1036				   fn + " in " + fileName + " differs")
1037			clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1038			return 1
1039
1040	clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1041	return 0
1042
1043#####
1044# (Basic) file comparison
1045#
1046# Returns 1 if difference detected
1047#         0 if no difference detected
1048#        -1 on error
1049#
1050def compareBasic(base, ptch, quiet, fileType) :
1051
1052	fileName = fnFormat(base);
1053
1054	if quiet and os.stat(base)[ST_SIZE] != os.stat(ptch)[ST_SIZE] :
1055		return 1
1056
1057	try:
1058		with open(base, 'rb') as fh:
1059			baseData = fh.read()
1060	except:
1061		error("could not open " + base)
1062		return -1
1063
1064	try:
1065		with open(ptch, 'rb') as fh:
1066			ptchData = fh.read()
1067	except:
1068		error("could not open " + ptch)
1069		return -1
1070
1071	if quiet :
1072		if baseData != ptchData :
1073			return 1
1074	else :
1075		if len(baseData) != len(ptchData) or baseData != ptchData :
1076			diffs = diffFileData(base, ptch)
1077			difference(fileName, fileType, diffs)
1078			return 1
1079	return 0
1080
1081
1082#####
1083# Compare two objects by producing a data dump from
1084# each object, and then comparing the dump data
1085#
1086# Returns: 1 if a difference is detected
1087#          0 if no difference detected
1088#         -1 upon error
1089#
1090def compareByDumping(base, ptch, quiet, fileType) :
1091
1092	fileName = fnFormat(base);
1093	t = threading.currentThread()
1094	tmpFile1 = tmpDir1 + os.path.basename(base) + t.getName()
1095	tmpFile2 = tmpDir2 + os.path.basename(ptch) + t.getName()
1096
1097	if fileType == "Lint Library" :
1098		baseCmd = (lintdump_cmd + " -ir " + base +
1099			  " | egrep -v '(LINTOBJ|LINTMOD):'" +
1100			  " | grep -v PASS[1-3]:" +
1101			  " > " + tmpFile1)
1102		ptchCmd = (lintdump_cmd + " -ir " + ptch +
1103			  " | egrep -v '(LINTOBJ|LINTMOD):'" +
1104			  " | grep -v PASS[1-3]:" +
1105			  " > " + tmpFile2)
1106	elif fileType == "Sqlite Database" :
1107		baseCmd = ("echo .dump | " + sqlite_cmd + base + " > " +
1108			  tmpFile1)
1109		ptchCmd = ("echo .dump | " + sqlite_cmd + ptch + " > " +
1110			  tmpFile2)
1111
1112	os.system(baseCmd)
1113	os.system(ptchCmd)
1114
1115	try:
1116		with open(tmpFile1, 'rb') as fh:
1117			baseData = fh.read()
1118	except:
1119		error("could not open: " + tmpFile1)
1120		return
1121
1122	try:
1123		with open(tmpFile2, 'rb') as fh:
1124			ptchData = fh.read()
1125	except:
1126		error("could not open: " + tmpFile2)
1127		return
1128
1129	ret = 0
1130
1131	if len(baseData) != len(ptchData) or baseData != ptchData :
1132		if not quiet :
1133			data = diffFileData(tmpFile1, tmpFile2);
1134		ret = 1
1135
1136	# Remove the temporary files now.
1137	try:
1138		os.unlink(tmpFile1)
1139	except OSError as e:
1140		error("compareByDumping: unlink failed %s" % e)
1141	try:
1142		os.unlink(tmpFile2)
1143	except OSError as e:
1144		error("compareByDumping: unlink failed %s" % e)
1145
1146	return ret
1147
1148#####
1149#
1150# SIGINT signal handler. Changes thread control variable to tell the threads
1151# to finish their current job and exit.
1152#
1153def discontinue_processing(signl, frme):
1154	global keep_processing
1155
1156	print("Caught Ctrl-C, stopping the threads", file=sys.stderr)
1157	keep_processing = False
1158
1159	return 0
1160
1161#####
1162#
1163# worker thread for changedFiles processing
1164#
1165class workerThread(threading.Thread) :
1166	def run(self):
1167		global wset_lock
1168		global changedFiles
1169		global baseRoot
1170		global ptchRoot
1171		global keep_processing
1172
1173		while (keep_processing) :
1174			# grab the lock to changedFiles and remove one member
1175			# and process it
1176			wset_lock.acquire()
1177			try :
1178				fn = changedFiles.pop()
1179			except IndexError :
1180				# there is nothing more to do
1181				wset_lock.release()
1182				return
1183			wset_lock.release()
1184
1185			base = baseRoot + fn
1186			ptch = ptchRoot + fn
1187
1188			compareOneFile(base, ptch, False)
1189
1190
1191#####
1192# Compare two objects. Detect type changes.
1193# Vector off to the appropriate type specific
1194# compare routine based on the type.
1195#
1196def compareOneFile(base, ptch, quiet) :
1197
1198	# Verify the file types.
1199	# If they are different, indicate this and move on
1200	btype = getTheFileType(base)
1201	ptype = getTheFileType(ptch)
1202
1203	if btype == 'Error' or ptype == 'Error' :
1204		return -1
1205
1206	fileName = fnFormat(base)
1207
1208	if (btype != ptype) :
1209		if not quiet :
1210			difference(fileName, "file type", btype + " to " + ptype)
1211		return 1
1212	else :
1213		fileType = btype
1214
1215	if (fileType == 'ELF') :
1216		return compareElfs(base, ptch, quiet)
1217
1218	elif (fileType == 'Java Archive' or fileType == 'ELF Object Archive') :
1219		return compareArchives(base, ptch, fileType)
1220
1221	elif (fileType == 'HTML') :
1222		return compareBasic(base, ptch, quiet, fileType)
1223
1224	elif ( fileType == 'Lint Library' ) :
1225		return compareByDumping(base, ptch, quiet, fileType)
1226
1227	elif ( fileType == 'Sqlite Database' ) :
1228		return compareByDumping(base, ptch, quiet, fileType)
1229
1230	else :
1231		# it has to be some variety of text file
1232		return compareBasic(base, ptch, quiet, fileType)
1233
1234# Cleanup and self-terminate
1235def cleanup(ret) :
1236
1237	debug("Performing cleanup (" + str(ret) + ")")
1238	if os.path.isdir(tmpDir1) > 0 :
1239		shutil.rmtree(tmpDir1)
1240
1241	if os.path.isdir(tmpDir2) > 0 :
1242		shutil.rmtree(tmpDir2)
1243
1244	if logging :
1245		log.close()
1246
1247	sys.exit(ret)
1248
1249def main() :
1250
1251	# Log file handle
1252	global log
1253
1254	# Globals relating to command line options
1255	global logging, vdiffs, reportAllSects
1256
1257	# Named temporary files / directories
1258	global tmpDir1, tmpDir2
1259
1260	# Command paths
1261	global lintdump_cmd, elfdump_cmd, dump_cmd, dis_cmd, od_cmd, diff_cmd, sqlite_cmd
1262
1263	# Default search path
1264	global wsdiff_path
1265
1266	# Essentially "uname -p"
1267	global arch
1268
1269	# changed files for worker thread processing
1270	global changedFiles
1271	global baseRoot
1272	global ptchRoot
1273
1274	# Sort the list of files from a temporary file
1275	global sorted
1276	global differentFiles
1277
1278	# Debugging indicator
1279	global debugon
1280
1281	# Some globals need to be initialized
1282	debugon = logging = vdiffs = reportAllSects = sorted = False
1283
1284
1285	# Process command line arguments
1286	# Return values are returned from args() in alpha order
1287	# (Yes, python functions can return multiple values (ewww))
1288	# Note that args() also set the globals:
1289	#	logging to True if verbose logging (to a file) was enabled
1290	#	vdiffs to True if logged differences aren't to be truncated
1291	#	reportAllSects to True if all ELF section differences are to be reported
1292	#
1293	baseRoot, fileNamesFile, localTools, ptchRoot, results = args()
1294
1295	#
1296	# Set up the results/log file
1297	#
1298	if logging :
1299		try:
1300			log = open(results, "w")
1301		except:
1302			logging = False
1303			error("failed to open log file: " + log)
1304			sys.exit(1)
1305
1306		dateTimeStr= "# %04d-%02d-%02d at %02d:%02d:%02d" % time.localtime()[:6]
1307		v_info("# This file was produced by wsdiff")
1308		v_info(dateTimeStr)
1309
1310	# Changed files (used only for the sorted case)
1311	if sorted :
1312		differentFiles = []
1313
1314	#
1315	# Build paths to the tools required tools
1316	#
1317	# Try to look for tools in $SRC/tools if the "-t" option
1318	# was specified
1319	#
1320	rc, arch = getoutput("uname -p")
1321	arch = arch.rstrip()
1322	if localTools :
1323		try:
1324			src = os.environ['SRC']
1325		except:
1326			error("-t specified, but $SRC not set. Cannot find $SRC/tools")
1327			src = ""
1328		if len(src) > 0 :
1329			wsdiff_path.insert(0, src + "/tools/proto/opt/onbld/bin")
1330
1331	lintdump_cmd = find_tool("lintdump")
1332	elfdump_cmd = find_tool("elfdump")
1333	dump_cmd = find_tool("dump")
1334	od_cmd = find_tool("od")
1335	dis_cmd = find_tool("dis")
1336	diff_cmd = find_tool("diff")
1337	sqlite_cmd = find_tool("sqlite")
1338
1339	#
1340	# Set resource limit for number of open files as high as possible.
1341	# This might get handy with big number of threads.
1342	#
1343	(nofile_soft, nofile_hard) = resource.getrlimit(resource.RLIMIT_NOFILE)
1344	try:
1345		resource.setrlimit(resource.RLIMIT_NOFILE,
1346		    (nofile_hard, nofile_hard))
1347	except:
1348		error("cannot set resource limits for number of open files")
1349		sys.exit(1)
1350
1351	#
1352	# validate the base and patch paths
1353	#
1354	if baseRoot[-1] != '/' :
1355		baseRoot += '/'
1356
1357	if ptchRoot[-1] != '/' :
1358		ptchRoot += '/'
1359
1360	if not os.path.exists(baseRoot) :
1361		error("old proto area: " + baseRoot + " does not exist")
1362		sys.exit(1)
1363
1364	if not os.path.exists(ptchRoot) :
1365		error("new proto area: " + ptchRoot + " does not exist")
1366		sys.exit(1)
1367
1368	#
1369	# log some information identifying the run
1370	#
1371	v_info("Old proto area: " + baseRoot)
1372	v_info("New proto area: " + ptchRoot)
1373	v_info("Results file: " + results + "\n")
1374
1375	#
1376	# Set up the temporary directories / files
1377	# Could use python's tmpdir routines, but these should
1378	# be easier to identify / keep around for debugging
1379	pid = os.getpid()
1380	tmpDir1 = "/tmp/wsdiff_tmp1_" + str(pid) + "/"
1381	tmpDir2 = "/tmp/wsdiff_tmp2_" + str(pid) + "/"
1382	try:
1383		os.makedirs(tmpDir1)
1384	except OSError as e:
1385		error("main: makedir failed %s" % e)
1386	try:
1387		os.makedirs(tmpDir2)
1388	except OSError as e:
1389		error("main: makedir failed %s" % e)
1390
1391	# Derive a catalog of new, deleted, and to-be-compared objects
1392	# either from the specified base and patch proto areas, or from
1393	# from an input file list
1394	newOrDeleted = False
1395
1396	if fileNamesFile != "" :
1397		changedFiles, newFiles, deletedFiles = \
1398			      flistCatalog(baseRoot, ptchRoot, fileNamesFile)
1399	else :
1400		changedFiles, newFiles, deletedFiles = \
1401				protoCatalog(baseRoot, ptchRoot)
1402
1403	if len(newFiles) > 0 :
1404		newOrDeleted = True
1405		info("\nNew objects found: ")
1406
1407		if sorted :
1408			newFiles.sort()
1409		for fn in newFiles :
1410			info(fnFormat(fn))
1411
1412	if len(deletedFiles) > 0 :
1413		newOrDeleted = True
1414		info("\nObjects removed: ")
1415
1416		if sorted :
1417			deletedFiles.sort()
1418		for fn in deletedFiles :
1419			info(fnFormat(fn))
1420
1421	if newOrDeleted :
1422		info("\nChanged objects: ")
1423	if sorted :
1424		debug("The list will appear after the processing is done")
1425
1426	# Here's where all the heavy lifting happens
1427	# Perform a comparison on each object appearing in
1428	# both proto areas. compareOneFile will examine the
1429	# file types of each object, and will vector off to
1430	# the appropriate comparison routine, where the compare
1431	# will happen, and any differences will be reported / logged
1432
1433	# determine maximum number of worker threads by using
1434	# DMAKE_MAX_JOBS environment variable set by nightly(1)
1435	# or get number of CPUs in the system
1436	try:
1437		max_threads = int(os.environ['DMAKE_MAX_JOBS'])
1438	except:
1439		max_threads = os.sysconf("SC_NPROCESSORS_ONLN")
1440		# If we cannot get number of online CPUs in the system
1441		# run unparallelized otherwise bump the number up 20%
1442		# to achieve best results.
1443		if max_threads == -1 :
1444			max_threads = 1
1445		else :
1446			max_threads += int(max_threads/5)
1447
1448	# Set signal handler to attempt graceful exit
1449	debug("Setting signal handler")
1450	signal.signal( signal.SIGINT, discontinue_processing )
1451
1452	# Create and unleash the threads
1453	# Only at most max_threads must be running at any moment
1454	mythreads = []
1455	debug("Spawning " + str(max_threads) + " threads");
1456	for i in range(max_threads) :
1457		thread = workerThread()
1458		mythreads.append(thread)
1459		mythreads[i].start()
1460
1461	# Wait for the threads to finish and do cleanup if interrupted
1462	debug("Waiting for the threads to finish")
1463	while True:
1464		if not True in [thread.isAlive() for thread in mythreads]:
1465		    break
1466		else:
1467		    # Some threads are still going
1468		    time.sleep(1)
1469
1470	# Interrupted by SIGINT
1471	if keep_processing == False :
1472		cleanup(1)
1473
1474	# If the list of differences was sorted it is stored in an array
1475	if sorted :
1476		differentFiles.sort()
1477		for f in differentFiles :
1478			info(fnFormat(f))
1479
1480	# We're done, cleanup.
1481	cleanup(0)
1482
1483if __name__ == '__main__' :
1484	try:
1485		main()
1486	except KeyboardInterrupt :
1487		cleanup(1);
1488
1489