xref: /illumos-gate/usr/src/tools/scripts/wsdiff.py (revision c938dc67d93d6388d698ee0d599fb7fce2963f92)
1#!@TOOLS_PYTHON@
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or http://www.opensolaris.org/os/licensing.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
24#
25
26#
27# wsdiff(1) is a tool that can be used to determine which compiled objects
28# have changed as a result of a given source change. Developers backporting
29# new features, RFEs and bug fixes need to be able to identify the set of
30# patch deliverables necessary for feature/fix realization on a patched system.
31#
32# The tool works by comparing objects in two trees/proto areas (one build with,
33# and without the source changes.
34#
35# Using wsdiff(1) is fairly simple:
36#	- Bringover to a fresh workspace
37#	- Perform a full non-debug build (clobber if workspace isn't fresh)
38#	- Move the proto area aside, call it proto.old, or something.
39#	- Integrate your changes to the workspace
40#	- Perform another full non-debug clobber build.
41#	- Use wsdiff(1) to see what changed:
42#		$ wsdiff proto.old proto
43#
44# By default, wsdiff will print the list of changed objects / deliverables to
45# stdout. If a results file is specified via -r, the list of differing objects,
46# and details about why wsdiff(1) thinks they are different will be logged to
47# the results file.
48#
49# By invoking nightly(1) with the -w option to NIGHTLY_FLAGS, nightly(1) will
50# use wsdiff(1) to report on what objects changed since the last build.
51#
52# For patch deliverable purposes, it's advised to have nightly do a clobber,
53# non-debug build.
54#
55# Think about the results. Was something flagged that you don't expect? Go look
56# at the results file to see details about the differences.
57#
58# Use the -i option in conjunction with -v and -V to dive deeper and have
59# wsdiff(1) report with more verbosity.
60#
61# Usage: wsdiff [-vVt] [-r results ] [-i filelist ] old new
62#
63# Where "old" is the path to the proto area build without the changes, and
64# "new" is the path to the proto area built with the changes. The following
65# options are supported:
66#
67#        -v      Do not truncate observed diffs in results
68#        -V      Log *all* ELF sect diffs vs. logging the first diff found
69#        -t      Use onbld tools in $SRC/tools
70#        -r      Log results and observed differences
71#        -i      Tell wsdiff which objects to compare via an input file list
72
73from __future__ import print_function
74import datetime, fnmatch, getopt, os, profile, io, subprocess
75import re, resource, select, shutil, signal, string, struct, sys, tempfile
76import time, threading
77from stat import *
78
79PY3 = sys.version_info[0] == 3
80
81if not PY3:
82	import commands
83
84# Human readable diffs truncated by default if longer than this
85# Specifying -v on the command line will override
86diffs_sz_thresh = 4096
87
88# Lock name	 Provides exclusive access to
89# --------------+------------------------------------------------
90# output_lock	 standard output or temporary file (difference())
91# log_lock	 the results file (log_difference())
92# wset_lock	 changedFiles list (workerThread())
93output_lock = threading.Lock()
94log_lock = threading.Lock()
95wset_lock = threading.Lock()
96
97# Variable for thread control
98keep_processing = True
99
100# Default search path for wsdiff
101wsdiff_path = [ "/usr/bin",
102		"/usr/ccs/bin",
103		"/lib/svc/bin",
104		"/opt/onbld/bin" ]
105
106# These are objects that wsdiff will notice look different, but will not report.
107# Existence of an exceptions list, and adding things here is *dangerous*,
108# and therefore the *only* reasons why anything would be listed here is because
109# the objects do not build deterministically, yet we *cannot* fix this.
110#
111# These perl libraries use __DATE__ and therefore always look different.
112# Ideally, we would purge use the use of __DATE__ from the source, but because
113# this is source we wish to distribute with Solaris "unchanged", we cannot modify.
114#
115wsdiff_exceptions = [
116	"usr/perl5/5.8.4/lib/sun4-solaris-64int/CORE/libperl.so.1",
117	"usr/perl5/5.6.1/lib/sun4-solaris-64int/CORE/libperl.so.1",
118	"usr/perl5/5.8.4/lib/i86pc-solaris-64int/CORE/libperl.so.1",
119	"usr/perl5/5.6.1/lib/i86pc-solaris-64int/CORE/libperl.so.1"
120]
121
122def getoutput(cmd):
123	if PY3:
124		return subprocess.getstatusoutput(cmd)
125	else:
126		return commands.getstatusoutput(cmd)
127
128#####
129# Logging routines
130#
131
132# Debug message to be printed to the screen, and the log file
133def debug(msg) :
134
135	# Add prefix to highlight debugging message
136	msg = "## " + msg
137	if debugon :
138		output_lock.acquire()
139		print(msg)
140		sys.stdout.flush()
141		output_lock.release()
142		if logging :
143			log_lock.acquire()
144			print(msg, file=log)
145			log.flush()
146			log_lock.release()
147
148# Informational message to be printed to the screen, and the log file
149def info(msg) :
150
151	output_lock.acquire()
152	print(msg)
153	sys.stdout.flush()
154	output_lock.release()
155	if logging :
156		log_lock.acquire()
157		print(msg, file=log)
158		log.flush()
159		log_lock.release()
160
161# Error message to be printed to the screen, and the log file
162def error(msg) :
163
164	output_lock.acquire()
165	print("ERROR: " + msg, file=sys.stderr)
166	sys.stderr.flush()
167	output_lock.release()
168	if logging :
169		log_lock.acquire()
170		print("ERROR: " + msg, file=log)
171		log.flush()
172		log_lock.release()
173
174# Informational message to be printed only to the log, if there is one.
175def v_info(msg) :
176
177	if logging :
178		log_lock.acquire()
179		print(msg, file=log)
180		log.flush()
181		log_lock.release()
182
183#
184# Flag a detected file difference
185# Display the fileName to stdout, and log the difference
186#
187def difference(f, dtype, diffs) :
188
189	if f in wsdiff_exceptions :
190		return
191
192	output_lock.acquire()
193	if sorted :
194		differentFiles.append(f)
195	else:
196		print(f)
197		sys.stdout.flush()
198	output_lock.release()
199
200	log_difference(f, dtype, diffs)
201
202#
203# Do the actual logging of the difference to the results file
204#
205def log_difference(f, dtype, diffs) :
206
207	if logging :
208		log_lock.acquire()
209		print(f, file=log)
210		print("NOTE: " + dtype + " difference detected.", file=log)
211
212		difflen = len(diffs)
213		if difflen > 0 :
214			print('', file=log)
215
216			if not vdiffs and difflen > diffs_sz_thresh :
217				print(diffs[:diffs_sz_thresh], file=log)
218				print("... truncated due to length: " +
219				      "use -v to override ...", file=log)
220			else :
221				print(diffs, file=log)
222			print('\n', file=log)
223		log.flush()
224		log_lock.release()
225
226
227#####
228# diff generating routines
229#
230
231#
232# Return human readable diffs from two files
233#
234def diffFileData(tmpf1, tmpf2) :
235
236	binaries = False
237
238	# Filter the data through od(1) if the data is detected
239	# as being binary
240	if isBinary(tmpf1) or isBinary(tmpf2) :
241		binaries = True
242		tmp_od1 = tmpf1 + ".od"
243		tmp_od2 = tmpf2 + ".od"
244
245		cmd = od_cmd + " -c -t x4" + " " + tmpf1 + " > " + tmp_od1
246		os.system(cmd)
247		cmd = od_cmd + " -c -t x4" + " " + tmpf2 + " > " + tmp_od2
248		os.system(cmd)
249
250		tmpf1 = tmp_od1
251		tmpf2 = tmp_od2
252
253	try:
254		rc, data = getoutput(diff_cmd + " " + tmpf1 + " " + tmpf2)
255		# Remove the temp files as we no longer need them.
256		if binaries :
257			try:
258				os.unlink(tmp_od1)
259			except OSError as e:
260				error("diffFileData: unlink failed %s" % e)
261			try:
262				os.unlink(tmp_od2)
263			except OSError as e:
264				error("diffFileData: unlink failed %s" % e)
265	except:
266		error("failed to get output of command: " + diff_cmd + " "
267		    + tmpf1 + " " + tmpf2)
268
269		# Send exception for the failed command up
270		raise
271		return
272
273	return data
274
275#####
276# Misc utility functions
277#
278
279# Prune off the leading prefix from string s
280def str_prefix_trunc(s, prefix) :
281	snipLen = len(prefix)
282	return s[snipLen:]
283
284#
285# Prune off leading proto path goo (if there is one) to yield
286# the deliverable's eventual path relative to root
287# e.g. proto.base/root_sparc/usr/src/cmd/prstat => usr/src/cmd/prstat
288#
289def fnFormat(fn) :
290	root_arch_str = "root_" + arch
291
292	pos = fn.find(root_arch_str)
293	if pos == -1 :
294		return fn
295
296	pos = fn.find("/", pos)
297	if pos == -1 :
298		return fn
299
300	return fn[pos + 1:]
301
302#####
303# Usage / argument processing
304#
305
306#
307# Display usage message
308#
309def usage() :
310	sys.stdout.flush()
311	print("""Usage: wsdiff [-dvVst] [-r results ] [-i filelist ] old new
312        -d      Print debug messages about the progress
313        -v      Do not truncate observed diffs in results
314        -V      Log *all* ELF sect diffs vs. logging the first diff found
315        -t      Use onbld tools in $SRC/tools
316        -r      Log results and observed differences
317        -s      Produce sorted list of differences
318        -i      Tell wsdiff which objects to compare via an input file list""",
319	    file=sys.stderr)
320	sys.exit(1)
321
322#
323# Process command line options
324#
325def args() :
326
327	global debugon
328	global logging
329	global vdiffs
330	global reportAllSects
331	global sorted
332
333	validOpts = 'di:r:vVst?'
334
335	baseRoot = ""
336	ptchRoot = ""
337	fileNamesFile = ""
338	results = ""
339	localTools = False
340
341	# getopt.getopt() returns:
342	#	an option/value tuple
343	#	a list of remaining non-option arguments
344	#
345	# A correct wsdiff invocation will have exactly two non option
346	# arguments, the paths to the base (old), ptch (new) proto areas
347	try:
348		optlist, args = getopt.getopt(sys.argv[1:], validOpts)
349	except getopt.error as val:
350		usage()
351
352	if len(args) != 2 :
353		usage();
354
355	for opt,val in optlist :
356		if opt == '-d' :
357			debugon = True
358		elif opt == '-i' :
359			fileNamesFile = val
360		elif opt == '-r' :
361			results = val
362			logging = True
363		elif opt == '-s' :
364			sorted = True
365		elif opt == '-v' :
366			vdiffs = True
367		elif opt == '-V' :
368			reportAllSects = True
369		elif opt == '-t':
370			localTools = True
371		else:
372			usage()
373
374	baseRoot = args[0]
375	ptchRoot = args[1]
376
377	if len(baseRoot) == 0 or len(ptchRoot) == 0 :
378		usage()
379
380	if logging and len(results) == 0 :
381		usage()
382
383	if vdiffs and not logging :
384		error("The -v option requires a results file (-r)")
385		sys.exit(1)
386
387	if reportAllSects and not logging :
388		error("The -V option requires a results file (-r)")
389		sys.exit(1)
390
391	# alphabetical order
392	return	baseRoot, fileNamesFile, localTools, ptchRoot, results
393
394#####
395# File identification
396#
397
398#
399# Identify the file type.
400# If it's not ELF, use the file extension to identify
401# certain file types that require special handling to
402# compare. Otherwise just return a basic "ASCII" type.
403#
404def getTheFileType(f) :
405
406	extensions = { 'a'	:	'ELF Object Archive',
407		       'jar'	:	'Java Archive',
408		       'html'	:	'HTML',
409		       'ln'	:	'Lint Library',
410		       'db'	:	'Sqlite Database' }
411
412	try:
413		if os.stat(f)[ST_SIZE] == 0 :
414			return 'ASCII'
415	except:
416		error("failed to stat " + f)
417		return 'Error'
418
419	if isELF(f) == 1 :
420		return 'ELF'
421
422	fnamelist = f.split('.')
423	if len(fnamelist) > 1 :	# Test the file extension
424		extension = fnamelist[-1]
425		if extension in extensions.keys():
426			return extensions[extension]
427
428	return 'ASCII'
429
430#
431# Return non-zero if "f" is an ELF file
432#
433elfmagic = b'\177ELF'
434def isELF(f) :
435	try:
436		with open(f, mode='rb') as fd:
437			magic = fd.read(len(elfmagic))
438
439		if magic == elfmagic :
440			return 1
441	except:
442		pass
443	return 0
444
445#
446# Return non-zero is "f" is binary.
447# Consider the file to be binary if it contains any null characters
448#
449def isBinary(f) :
450	try:
451		with open(f, mode='rb') as fd:
452			s = fd.read()
453
454		if s.find(b'\0') == -1 :
455			return 0
456	except:
457		pass
458	return 1
459
460#####
461# Directory traversal and file finding
462#
463
464#
465# Return a sorted list of files found under the specified directory
466#
467def findFiles(d) :
468	for path, subdirs, files in os.walk(d) :
469		files.sort()
470		for name in files :
471			yield os.path.join(path, name)
472
473#
474# Examine all files in base, ptch
475#
476# Return a list of files appearing in both proto areas,
477# a list of new files (files found only in ptch) and
478# a list of deleted files (files found only in base)
479#
480def protoCatalog(base, ptch) :
481
482	compFiles = []		# List of files in both proto areas
483	ptchList = []		# List of file in patch proto area
484
485	newFiles = []		# New files detected
486	deletedFiles = []	# Deleted files
487
488	debug("Getting the list of files in the base area");
489	baseFilesList = list(findFiles(base))
490	baseStringLength = len(base)
491	debug("Found " + str(len(baseFilesList)) + " files")
492
493	debug("Getting the list of files in the patch area");
494	ptchFilesList = list(findFiles(ptch))
495	ptchStringLength = len(ptch)
496	debug("Found " + str(len(ptchFilesList)) + " files")
497
498	# Inventory files in the base proto area
499	debug("Determining the list of regular files in the base area");
500	for fn in baseFilesList :
501		if os.path.islink(fn) :
502			continue
503
504		fileName = fn[baseStringLength:]
505		compFiles.append(fileName)
506	debug("Found " + str(len(compFiles)) + " files")
507
508	# Inventory files in the patch proto area
509	debug("Determining the list of regular files in the patch area");
510	for fn in ptchFilesList :
511		if os.path.islink(fn) :
512			continue
513
514		fileName = fn[ptchStringLength:]
515		ptchList.append(fileName)
516	debug("Found " + str(len(ptchList)) + " files")
517
518	# Deleted files appear in the base area, but not the patch area
519	debug("Searching for deleted files by comparing the lists")
520	for fileName in compFiles :
521		if not fileName in ptchList :
522			deletedFiles.append(fileName)
523	debug("Found " + str(len(deletedFiles)) + " deleted files")
524
525	# Eliminate "deleted" files from the list of objects appearing
526	# in both the base and patch proto areas
527	debug("Eliminating deleted files from the list of objects")
528	for fileName in deletedFiles :
529		try:
530			compFiles.remove(fileName)
531		except:
532			error("filelist.remove() failed")
533	debug("List for comparison reduced to " + str(len(compFiles))
534	    + " files")
535
536	# New files appear in the patch area, but not the base
537	debug("Getting the list of newly added files")
538	for fileName in ptchList :
539		if not fileName in compFiles :
540			newFiles.append(fileName)
541	debug("Found " + str(len(newFiles)) + " new files")
542
543	return compFiles, newFiles, deletedFiles
544
545#
546# Examine the files listed in the input file list
547#
548# Return a list of files appearing in both proto areas,
549# a list of new files (files found only in ptch) and
550# a list of deleted files (files found only in base)
551#
552def flistCatalog(base, ptch, flist) :
553	compFiles = []		# List of files in both proto areas
554	newFiles = []		# New files detected
555	deletedFiles = []	# Deleted files
556
557	try:
558		fd = open(flist, "r")
559	except:
560		error("could not open: " + flist)
561		cleanup(1)
562
563	files = []
564	files = fd.readlines()
565	fd.close()
566
567	for f in files :
568		ptch_present = True
569		base_present = True
570
571		if f == '\n' :
572			continue
573
574		# the fileNames have a trailing '\n'
575		f = f.rstrip()
576
577		# The objects in the file list have paths relative
578		# to $ROOT or to the base/ptch directory specified on
579		# the command line.
580		# If it's relative to $ROOT, we'll need to add back the
581		# root_`uname -p` goo we stripped off in fnFormat()
582		if os.path.exists(base + f) :
583			fn = f;
584		elif os.path.exists(base + "root_" + arch + "/" + f) :
585			fn = "root_" + arch + "/" + f
586		else :
587			base_present = False
588
589		if base_present :
590			if not os.path.exists(ptch + fn) :
591				ptch_present = False
592		else :
593			if os.path.exists(ptch + f) :
594				fn = f
595			elif os.path.exists(ptch + "root_" + arch + "/" + f) :
596				fn = "root_" + arch + "/" + f
597			else :
598				ptch_present = False
599
600		if os.path.islink(base + fn) :	# ignore links
601			base_present = False
602		if os.path.islink(ptch + fn) :
603			ptch_present = False
604
605		if base_present and ptch_present :
606			compFiles.append(fn)
607		elif base_present :
608			deletedFiles.append(fn)
609		elif ptch_present :
610			newFiles.append(fn)
611		else :
612			if (os.path.islink(base + fn) and
613			    os.path.islink(ptch + fn)) :
614				continue
615			error(f + " in file list, but not in either tree. " +
616			    "Skipping...")
617
618	return compFiles, newFiles, deletedFiles
619
620
621#
622# Build a fully qualified path to an external tool/utility.
623# Consider the default system locations. For onbld tools, if
624# the -t option was specified, we'll try to use built tools in $SRC tools,
625# and otherwise, we'll fall back on /opt/onbld/
626#
627def find_tool(tool) :
628
629	# First, check what was passed
630	if os.path.exists(tool) :
631		return tool
632
633	# Next try in wsdiff path
634	for pdir in wsdiff_path :
635		location = pdir + "/" + tool
636		if os.path.exists(location) :
637			return location + " "
638
639		location = pdir + "/" + arch + "/" + tool
640		if os.path.exists(location) :
641			return location + " "
642
643	error("Could not find path to: " + tool);
644	sys.exit(1);
645
646
647#####
648# ELF file comparison helper routines
649#
650
651#
652# Return a dictionary of ELF section types keyed by section name
653#
654def get_elfheader(f) :
655
656	header = {}
657
658	rc, hstring = getoutput(elfdump_cmd + " -c " + f)
659
660	if len(hstring) == 0 :
661		error("Failed to dump ELF header for " + f)
662		raise
663		return
664
665	# elfdump(1) dumps the section headers with the section name
666	# following "sh_name:", and the section type following "sh_type:"
667	sections = hstring.split("Section Header")
668	for sect in sections :
669		datap = sect.find("sh_name:");
670		if datap == -1 :
671			continue
672		section = sect[datap:].split()[1]
673		datap = sect.find("sh_type:");
674		if datap == -1 :
675			error("Could not get type for sect: " + section +
676			      " in " + f)
677		sh_type = sect[datap:].split()[2]
678		header[section] = sh_type
679
680	return header
681
682#
683# Extract data in the specified ELF section from the given file
684#
685def extract_elf_section(f, section) :
686
687	rc, data = getoutput(dump_cmd + " -sn " + section + " " + f)
688
689	if len(data) == 0 :
690		error(dump_cmd + "yielded no data on section " + section +
691		    " of " + f)
692		raise
693		return
694
695	# dump(1) displays the file name to start...
696	# get past it to the data itself
697	dbegin = data.find(":") + 1
698	data = data[dbegin:];
699
700	return (data)
701
702#
703# Return a (hopefully meaningful) human readable set of diffs
704# for the specified ELF section between f1 and f2
705#
706# Depending on the section, various means for dumping and diffing
707# the data may be employed.
708#
709text_sections = [ '.text', '.init', '.fini' ]
710def diff_elf_section(f1, f2, section, sh_type) :
711
712	t = threading.currentThread()
713	tmpFile1 = tmpDir1 + os.path.basename(f1) + t.getName()
714	tmpFile2 = tmpDir2 + os.path.basename(f2) + t.getName()
715
716	if (sh_type == "SHT_RELA") : # sh_type == SHT_RELA
717		cmd1 = elfdump_cmd + " -r " + f1 + " > " + tmpFile1
718		cmd2 = elfdump_cmd + " -r " + f2 + " > " + tmpFile2
719	elif (section == ".group") :
720		cmd1 = elfdump_cmd + " -g " + f1 + " > " + tmpFile1
721		cmd2 = elfdump_cmd + " -g " + f2 + " > " + tmpFile2
722	elif (section == ".hash") :
723		cmd1 = elfdump_cmd + " -h " + f1 + " > " + tmpFile1
724		cmd2 = elfdump_cmd + " -h " + f2 + " > " + tmpFile2
725	elif (section == ".dynamic") :
726		cmd1 = elfdump_cmd + " -d " + f1 + " > " + tmpFile1
727		cmd2 = elfdump_cmd + " -d " + f2 + " > " + tmpFile2
728	elif (section == ".got") :
729		cmd1 = elfdump_cmd + " -G " + f1 + " > " + tmpFile1
730		cmd2 = elfdump_cmd + " -G " + f2 + " > " + tmpFile2
731	elif (section == ".SUNW_cap") :
732		cmd1 = elfdump_cmd + " -H " + f1 + " > " + tmpFile1
733		cmd2 = elfdump_cmd + " -H " + f2 + " > " + tmpFile2
734	elif (section == ".interp") :
735		cmd1 = elfdump_cmd + " -i " + f1 + " > " + tmpFile1
736		cmd2 = elfdump_cmd + " -i " + f2 + " > " + tmpFile2
737	elif (section == ".symtab" or section == ".dynsym") :
738		cmd1 = (elfdump_cmd + " -s -N " + section + " " + f1 +
739		    " > " + tmpFile1)
740		cmd2 = (elfdump_cmd + " -s -N " + section + " " + f2 +
741		    " > " + tmpFile2)
742	elif (section in text_sections) :
743		# dis sometimes complains when it hits something it doesn't
744		# know how to disassemble. Just ignore it, as the output
745		# being generated here is human readable, and we've already
746		# correctly flagged the difference.
747		cmd1 = (dis_cmd + " -t " + section + " " + f1 +
748		       " 2>/dev/null | grep -v disassembly > " + tmpFile1)
749		cmd2 = (dis_cmd + " -t " + section + " " + f2 +
750		       " 2>/dev/null | grep -v disassembly > " + tmpFile2)
751	else :
752		cmd1 = (elfdump_cmd + " -w " + tmpFile1 + " -N " +
753		       section + " " + f1)
754		cmd2 = (elfdump_cmd + " -w " + tmpFile2 + " -N " +
755		       section + " " + f2)
756
757	os.system(cmd1)
758	os.system(cmd2)
759
760	data = diffFileData(tmpFile1, tmpFile2)
761
762	# remove temp files as we no longer need them
763	try:
764		os.unlink(tmpFile1)
765	except OSError as e:
766		error("diff_elf_section: unlink failed %s" % e)
767	try:
768		os.unlink(tmpFile2)
769	except OSError as e:
770		error("diff_elf_section: unlink failed %s" % e)
771
772	return (data)
773
774#
775# compare the relevant sections of two ELF binaries
776# and report any differences
777#
778# Returns: 1 if any differenes found
779#          0 if no differences found
780#	  -1 on error
781#
782
783# Sections deliberately not considered when comparing two ELF
784# binaries. Differences observed in these sections are not considered
785# significant where patch deliverable identification is concerned.
786sections_to_skip = [ ".SUNW_signature",
787		     ".comment",
788		     ".SUNW_ctf",
789		     ".debug",
790		     ".plt",
791		     ".rela.bss",
792		     ".rela.plt",
793		     ".line",
794		     ".note",
795		     ".compcom",
796		     ]
797
798sections_preferred = [ ".rodata.str1.8",
799		       ".rodata.str1.1",
800		       ".rodata",
801		       ".data1",
802		       ".data",
803		       ".text",
804		       ]
805
806def compareElfs(base, ptch, quiet) :
807
808	global logging
809
810	try:
811		base_header = get_elfheader(base)
812	except:
813		return
814	sections = list(base_header.keys())
815
816	try:
817		ptch_header = get_elfheader(ptch)
818	except:
819		return
820	e2_only_sections = list(ptch_header.keys())
821
822	e1_only_sections = []
823
824	fileName = fnFormat(base)
825
826	# Derive the list of ELF sections found only in
827	# either e1 or e2.
828	for sect in sections :
829		if not sect in e2_only_sections :
830			e1_only_sections.append(sect)
831		else :
832			e2_only_sections.remove(sect)
833
834	if len(e1_only_sections) > 0 :
835		if quiet :
836			return 1
837
838		data = ""
839		if logging :
840			slist = ""
841			for sect in e1_only_sections :
842				slist = slist + sect + "\t"
843			data = ("ELF sections found in " +
844				base + " but not in " + ptch +
845				"\n\n" + slist)
846
847		difference(fileName, "ELF", data)
848		return 1
849
850	if len(e2_only_sections) > 0 :
851		if quiet :
852			return 1
853
854		data = ""
855		if logging :
856			slist = ""
857			for sect in e2_only_sections :
858				slist = slist + sect + "\t"
859			data = ("ELF sections found in " +
860				ptch + " but not in " + base +
861				"\n\n" + slist)
862
863		difference(fileName, "ELF", data)
864		return 1
865
866	# Look for preferred sections, and put those at the
867	# top of the list of sections to compare
868	for psect in sections_preferred :
869		if psect in sections :
870			sections.remove(psect)
871			sections.insert(0, psect)
872
873	# Compare ELF sections
874	first_section = True
875	for sect in sections :
876
877		if sect in sections_to_skip :
878			continue
879
880		try:
881			s1 = extract_elf_section(base, sect);
882		except:
883			return
884
885		try:
886			s2 = extract_elf_section(ptch, sect);
887		except:
888			return
889
890		if len(s1) != len (s2) or s1 != s2:
891			if not quiet:
892				sh_type = base_header[sect]
893				data = diff_elf_section(base, ptch,
894							sect, sh_type)
895
896				# If all ELF sections are being reported, then
897				# invoke difference() to flag the file name to
898				# stdout only once. Any other section differences
899				# should be logged to the results file directly
900				if not first_section :
901					log_difference(fileName,
902					    "ELF " + sect, data)
903				else :
904					difference(fileName, "ELF " + sect,
905					    data)
906
907			if not reportAllSects :
908				return 1
909			first_section = False
910
911	return 0
912
913#####
914# recursively remove 2 directories
915#
916# Used for removal of temporary directory strucures (ignores any errors).
917#
918def clearTmpDirs(dir1, dir2) :
919
920	if os.path.isdir(dir1) > 0 :
921		shutil.rmtree(dir1, True)
922
923	if os.path.isdir(dir2) > 0 :
924		shutil.rmtree(dir2, True)
925
926
927#####
928# Archive object comparison
929#
930# Returns 1 if difference detected
931#         0 if no difference detected
932#        -1 on error
933#
934def compareArchives(base, ptch, fileType) :
935
936	fileName = fnFormat(base)
937	t = threading.currentThread()
938	ArchTmpDir1 = tmpDir1 + os.path.basename(base) + t.getName()
939	ArchTmpDir2 = tmpDir2 + os.path.basename(base) + t.getName()
940
941	#
942	# Be optimistic and first try a straight file compare
943	# as it will allow us to finish up quickly.
944	#
945	if compareBasic(base, ptch, True, fileType) == 0 :
946		return 0
947
948	try:
949		os.makedirs(ArchTmpDir1)
950	except OSError as e:
951		error("compareArchives: makedir failed %s" % e)
952		return -1
953	try:
954		os.makedirs(ArchTmpDir2)
955	except OSError as e:
956		error("compareArchives: makedir failed %s" % e)
957		return -1
958
959	# copy over the objects to the temp areas, and
960	# unpack them
961	baseCmd = "cp -fp " + base + " " + ArchTmpDir1
962	rc, output = getoutput(baseCmd)
963	if rc != 0:
964		error(baseCmd + " failed: " + output)
965		clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
966		return -1
967
968	ptchCmd = "cp -fp " + ptch + " " + ArchTmpDir2
969	rc, output = getoutput(ptchCmd)
970	if rc != 0:
971		error(ptchCmd + " failed: " + output)
972		clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
973		return -1
974
975	bname = fileName.split('/')[-1]
976	if fileType == "Java Archive" :
977		baseCmd = ("cd " + ArchTmpDir1 + "; " + "jar xf " + bname +
978			  "; rm -f " + bname + " META-INF/MANIFEST.MF")
979		ptchCmd = ("cd " + ArchTmpDir2 + "; " + "jar xf " + bname +
980			  "; rm -f " + bname + " META-INF/MANIFEST.MF")
981	elif fileType == "ELF Object Archive" :
982		baseCmd = ("cd " + ArchTmpDir1 + "; " + "/usr/ccs/bin/ar x " +
983			  bname + "; rm -f " + bname)
984		ptchCmd = ("cd " + ArchTmpDir2 + "; " + "/usr/ccs/bin/ar x " +
985			  bname + "; rm -f " + bname)
986	else :
987		error("unexpected file type: " + fileType)
988		clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
989		return -1
990
991	os.system(baseCmd)
992	os.system(ptchCmd)
993
994	baseFlist = list(findFiles(ArchTmpDir1))
995	ptchFlist = list(findFiles(ArchTmpDir2))
996
997	# Trim leading path off base/ptch file lists
998	flist = []
999	for fn in baseFlist :
1000		flist.append(str_prefix_trunc(fn, ArchTmpDir1))
1001	baseFlist = flist
1002
1003	flist = []
1004	for fn in ptchFlist :
1005		flist.append(str_prefix_trunc(fn, ArchTmpDir2))
1006	ptchFlist = flist
1007
1008	for fn in ptchFlist :
1009		if not fn in baseFlist :
1010			difference(fileName, fileType,
1011				   fn + " added to " + fileName)
1012			clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1013			return 1
1014
1015	for fn in baseFlist :
1016		if not fn in ptchFlist :
1017			difference(fileName, fileType,
1018				   fn + " removed from " + fileName)
1019			clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1020			return 1
1021
1022		differs = compareOneFile((ArchTmpDir1 + fn),
1023		    (ArchTmpDir2 + fn), True)
1024		if differs :
1025			difference(fileName, fileType,
1026				   fn + " in " + fileName + " differs")
1027			clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1028			return 1
1029
1030	clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1031	return 0
1032
1033#####
1034# (Basic) file comparison
1035#
1036# Returns 1 if difference detected
1037#         0 if no difference detected
1038#        -1 on error
1039#
1040def compareBasic(base, ptch, quiet, fileType) :
1041
1042	fileName = fnFormat(base);
1043
1044	if quiet and os.stat(base)[ST_SIZE] != os.stat(ptch)[ST_SIZE] :
1045		return 1
1046
1047	try:
1048		with open(base, 'rb') as fh:
1049			baseData = fh.read()
1050	except:
1051		error("could not open " + base)
1052		return -1
1053
1054	try:
1055		with open(ptch, 'rb') as fh:
1056			ptchData = fh.read()
1057	except:
1058		error("could not open " + ptch)
1059		return -1
1060
1061	if quiet :
1062		if baseData != ptchData :
1063			return 1
1064	else :
1065		if len(baseData) != len(ptchData) or baseData != ptchData :
1066			diffs = diffFileData(base, ptch)
1067			difference(fileName, fileType, diffs)
1068			return 1
1069	return 0
1070
1071
1072#####
1073# Compare two objects by producing a data dump from
1074# each object, and then comparing the dump data
1075#
1076# Returns: 1 if a difference is detected
1077#          0 if no difference detected
1078#         -1 upon error
1079#
1080def compareByDumping(base, ptch, quiet, fileType) :
1081
1082	fileName = fnFormat(base);
1083	t = threading.currentThread()
1084	tmpFile1 = tmpDir1 + os.path.basename(base) + t.getName()
1085	tmpFile2 = tmpDir2 + os.path.basename(ptch) + t.getName()
1086
1087	if fileType == "Lint Library" :
1088		baseCmd = (lintdump_cmd + " -ir " + base +
1089			  " | egrep -v '(LINTOBJ|LINTMOD):'" +
1090			  " | grep -v PASS[1-3]:" +
1091			  " > " + tmpFile1)
1092		ptchCmd = (lintdump_cmd + " -ir " + ptch +
1093			  " | egrep -v '(LINTOBJ|LINTMOD):'" +
1094			  " | grep -v PASS[1-3]:" +
1095			  " > " + tmpFile2)
1096	elif fileType == "Sqlite Database" :
1097		baseCmd = ("echo .dump | " + sqlite_cmd + base + " > " +
1098			  tmpFile1)
1099		ptchCmd = ("echo .dump | " + sqlite_cmd + ptch + " > " +
1100			  tmpFile2)
1101
1102	os.system(baseCmd)
1103	os.system(ptchCmd)
1104
1105	try:
1106		with open(tmpFile1, 'rb') as fh:
1107			baseData = fh.read()
1108	except:
1109		error("could not open: " + tmpFile1)
1110		return
1111
1112	try:
1113		with open(tmpFile2, 'rb') as fh:
1114			ptchData = fh.read()
1115	except:
1116		error("could not open: " + tmpFile2)
1117		return
1118
1119	ret = 0
1120
1121	if len(baseData) != len(ptchData) or baseData != ptchData :
1122		if not quiet :
1123			data = diffFileData(tmpFile1, tmpFile2);
1124		ret = 1
1125
1126	# Remove the temporary files now.
1127	try:
1128		os.unlink(tmpFile1)
1129	except OSError as e:
1130		error("compareByDumping: unlink failed %s" % e)
1131	try:
1132		os.unlink(tmpFile2)
1133	except OSError as e:
1134		error("compareByDumping: unlink failed %s" % e)
1135
1136	return ret
1137
1138#####
1139#
1140# SIGINT signal handler. Changes thread control variable to tell the threads
1141# to finish their current job and exit.
1142#
1143def discontinue_processing(signl, frme):
1144	global keep_processing
1145
1146	print("Caught Ctrl-C, stopping the threads", file=sys.stderr)
1147	keep_processing = False
1148
1149	return 0
1150
1151#####
1152#
1153# worker thread for changedFiles processing
1154#
1155class workerThread(threading.Thread) :
1156	def run(self):
1157		global wset_lock
1158		global changedFiles
1159		global baseRoot
1160		global ptchRoot
1161		global keep_processing
1162
1163		while (keep_processing) :
1164			# grab the lock to changedFiles and remove one member
1165			# and process it
1166			wset_lock.acquire()
1167			try :
1168				fn = changedFiles.pop()
1169			except IndexError :
1170				# there is nothing more to do
1171				wset_lock.release()
1172				return
1173			wset_lock.release()
1174
1175			base = baseRoot + fn
1176			ptch = ptchRoot + fn
1177
1178			compareOneFile(base, ptch, False)
1179
1180
1181#####
1182# Compare two objects. Detect type changes.
1183# Vector off to the appropriate type specific
1184# compare routine based on the type.
1185#
1186def compareOneFile(base, ptch, quiet) :
1187
1188	# Verify the file types.
1189	# If they are different, indicate this and move on
1190	btype = getTheFileType(base)
1191	ptype = getTheFileType(ptch)
1192
1193	if btype == 'Error' or ptype == 'Error' :
1194		return -1
1195
1196	fileName = fnFormat(base)
1197
1198	if (btype != ptype) :
1199		if not quiet :
1200			difference(fileName, "file type", btype + " to " + ptype)
1201		return 1
1202	else :
1203		fileType = btype
1204
1205	if (fileType == 'ELF') :
1206		return compareElfs(base, ptch, quiet)
1207
1208	elif (fileType == 'Java Archive' or fileType == 'ELF Object Archive') :
1209		return compareArchives(base, ptch, fileType)
1210
1211	elif (fileType == 'HTML') :
1212		return compareBasic(base, ptch, quiet, fileType)
1213
1214	elif ( fileType == 'Lint Library' ) :
1215		return compareByDumping(base, ptch, quiet, fileType)
1216
1217	elif ( fileType == 'Sqlite Database' ) :
1218		return compareByDumping(base, ptch, quiet, fileType)
1219
1220	else :
1221		# it has to be some variety of text file
1222		return compareBasic(base, ptch, quiet, fileType)
1223
1224# Cleanup and self-terminate
1225def cleanup(ret) :
1226
1227	debug("Performing cleanup (" + str(ret) + ")")
1228	if os.path.isdir(tmpDir1) > 0 :
1229		shutil.rmtree(tmpDir1)
1230
1231	if os.path.isdir(tmpDir2) > 0 :
1232		shutil.rmtree(tmpDir2)
1233
1234	if logging :
1235		log.close()
1236
1237	sys.exit(ret)
1238
1239def main() :
1240
1241	# Log file handle
1242	global log
1243
1244	# Globals relating to command line options
1245	global logging, vdiffs, reportAllSects
1246
1247	# Named temporary files / directories
1248	global tmpDir1, tmpDir2
1249
1250	# Command paths
1251	global lintdump_cmd, elfdump_cmd, dump_cmd, dis_cmd, od_cmd, diff_cmd, sqlite_cmd
1252
1253	# Default search path
1254	global wsdiff_path
1255
1256	# Essentially "uname -p"
1257	global arch
1258
1259	# changed files for worker thread processing
1260	global changedFiles
1261	global baseRoot
1262	global ptchRoot
1263
1264	# Sort the list of files from a temporary file
1265	global sorted
1266	global differentFiles
1267
1268	# Debugging indicator
1269	global debugon
1270
1271	# Some globals need to be initialized
1272	debugon = logging = vdiffs = reportAllSects = sorted = False
1273
1274
1275	# Process command line arguments
1276	# Return values are returned from args() in alpha order
1277	# (Yes, python functions can return multiple values (ewww))
1278	# Note that args() also set the globals:
1279	#	logging to True if verbose logging (to a file) was enabled
1280	#	vdiffs to True if logged differences aren't to be truncated
1281	#	reportAllSects to True if all ELF section differences are to be reported
1282	#
1283	baseRoot, fileNamesFile, localTools, ptchRoot, results = args()
1284
1285	#
1286	# Set up the results/log file
1287	#
1288	if logging :
1289		try:
1290			log = open(results, "w")
1291		except:
1292			logging = False
1293			error("failed to open log file: " + log)
1294			sys.exit(1)
1295
1296		dateTimeStr= "# %04d-%02d-%02d at %02d:%02d:%02d" % time.localtime()[:6]
1297		v_info("# This file was produced by wsdiff")
1298		v_info(dateTimeStr)
1299
1300	# Changed files (used only for the sorted case)
1301	if sorted :
1302		differentFiles = []
1303
1304	#
1305	# Build paths to the tools required tools
1306	#
1307	# Try to look for tools in $SRC/tools if the "-t" option
1308	# was specified
1309	#
1310	rc, arch = getoutput("uname -p")
1311	arch = arch.rstrip()
1312	if localTools :
1313		try:
1314			src = os.environ['SRC']
1315		except:
1316			error("-t specified, but $SRC not set. Cannot find $SRC/tools")
1317			src = ""
1318		if len(src) > 0 :
1319			wsdiff_path.insert(0, src + "/tools/proto/opt/onbld/bin")
1320
1321	lintdump_cmd = find_tool("lintdump")
1322	elfdump_cmd = find_tool("elfdump")
1323	dump_cmd = find_tool("dump")
1324	od_cmd = find_tool("od")
1325	dis_cmd = find_tool("dis")
1326	diff_cmd = find_tool("diff")
1327	sqlite_cmd = find_tool("sqlite")
1328
1329	#
1330	# Set resource limit for number of open files as high as possible.
1331	# This might get handy with big number of threads.
1332	#
1333	(nofile_soft, nofile_hard) = resource.getrlimit(resource.RLIMIT_NOFILE)
1334	try:
1335		resource.setrlimit(resource.RLIMIT_NOFILE,
1336		    (nofile_hard, nofile_hard))
1337	except:
1338		error("cannot set resource limits for number of open files")
1339		sys.exit(1)
1340
1341	#
1342	# validate the base and patch paths
1343	#
1344	if baseRoot[-1] != '/' :
1345		baseRoot += '/'
1346
1347	if ptchRoot[-1] != '/' :
1348		ptchRoot += '/'
1349
1350	if not os.path.exists(baseRoot) :
1351		error("old proto area: " + baseRoot + " does not exist")
1352		sys.exit(1)
1353
1354	if not os.path.exists(ptchRoot) :
1355		error("new proto area: " + ptchRoot + " does not exist")
1356		sys.exit(1)
1357
1358	#
1359	# log some information identifying the run
1360	#
1361	v_info("Old proto area: " + baseRoot)
1362	v_info("New proto area: " + ptchRoot)
1363	v_info("Results file: " + results + "\n")
1364
1365	#
1366	# Set up the temporary directories / files
1367	# Could use python's tmpdir routines, but these should
1368	# be easier to identify / keep around for debugging
1369	pid = os.getpid()
1370	tmpDir1 = "/tmp/wsdiff_tmp1_" + str(pid) + "/"
1371	tmpDir2 = "/tmp/wsdiff_tmp2_" + str(pid) + "/"
1372	try:
1373		os.makedirs(tmpDir1)
1374	except OSError as e:
1375		error("main: makedir failed %s" % e)
1376	try:
1377		os.makedirs(tmpDir2)
1378	except OSError as e:
1379		error("main: makedir failed %s" % e)
1380
1381	# Derive a catalog of new, deleted, and to-be-compared objects
1382	# either from the specified base and patch proto areas, or from
1383	# from an input file list
1384	newOrDeleted = False
1385
1386	if fileNamesFile != "" :
1387		changedFiles, newFiles, deletedFiles = \
1388			      flistCatalog(baseRoot, ptchRoot, fileNamesFile)
1389	else :
1390		changedFiles, newFiles, deletedFiles = \
1391				protoCatalog(baseRoot, ptchRoot)
1392
1393	if len(newFiles) > 0 :
1394		newOrDeleted = True
1395		info("\nNew objects found: ")
1396
1397		if sorted :
1398			newFiles.sort()
1399		for fn in newFiles :
1400			info(fnFormat(fn))
1401
1402	if len(deletedFiles) > 0 :
1403		newOrDeleted = True
1404		info("\nObjects removed: ")
1405
1406		if sorted :
1407			deletedFiles.sort()
1408		for fn in deletedFiles :
1409			info(fnFormat(fn))
1410
1411	if newOrDeleted :
1412		info("\nChanged objects: ")
1413	if sorted :
1414		debug("The list will appear after the processing is done")
1415
1416	# Here's where all the heavy lifting happens
1417	# Perform a comparison on each object appearing in
1418	# both proto areas. compareOneFile will examine the
1419	# file types of each object, and will vector off to
1420	# the appropriate comparison routine, where the compare
1421	# will happen, and any differences will be reported / logged
1422
1423	# determine maximum number of worker threads by using
1424	# DMAKE_MAX_JOBS environment variable set by nightly(1)
1425	# or get number of CPUs in the system
1426	try:
1427		max_threads = int(os.environ['DMAKE_MAX_JOBS'])
1428	except:
1429		max_threads = os.sysconf("SC_NPROCESSORS_ONLN")
1430		# If we cannot get number of online CPUs in the system
1431		# run unparallelized otherwise bump the number up 20%
1432		# to achieve best results.
1433		if max_threads == -1 :
1434			max_threads = 1
1435		else :
1436			max_threads += int(max_threads/5)
1437
1438	# Set signal handler to attempt graceful exit
1439	debug("Setting signal handler")
1440	signal.signal( signal.SIGINT, discontinue_processing )
1441
1442	# Create and unleash the threads
1443	# Only at most max_threads must be running at any moment
1444	mythreads = []
1445	debug("Spawning " + str(max_threads) + " threads");
1446	for i in range(max_threads) :
1447		thread = workerThread()
1448		mythreads.append(thread)
1449		mythreads[i].start()
1450
1451	# Wait for the threads to finish and do cleanup if interrupted
1452	debug("Waiting for the threads to finish")
1453	while True:
1454		if not True in [thread.isAlive() for thread in mythreads]:
1455		    break
1456		else:
1457		    # Some threads are still going
1458		    time.sleep(1)
1459
1460	# Interrupted by SIGINT
1461	if keep_processing == False :
1462		cleanup(1)
1463
1464	# If the list of differences was sorted it is stored in an array
1465	if sorted :
1466		differentFiles.sort()
1467		for f in differentFiles :
1468			info(fnFormat(f))
1469
1470	# We're done, cleanup.
1471	cleanup(0)
1472
1473if __name__ == '__main__' :
1474	try:
1475		main()
1476	except KeyboardInterrupt :
1477		cleanup(1);
1478
1479