xref: /illumos-gate/usr/src/tools/scripts/wsdiff.py (revision cadd68ea0014761eda6a293664086dfa80686d85)
1#!@TOOLS_PYTHON@
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or http://www.opensolaris.org/os/licensing.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23# Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
24#
25
26#
27# wsdiff(1) is a tool that can be used to determine which compiled objects
28# have changed as a result of a given source change. Developers backporting
29# new features, RFEs and bug fixes need to be able to identify the set of
30# patch deliverables necessary for feature/fix realization on a patched system.
31#
32# The tool works by comparing objects in two trees/proto areas (one build with,
33# and without the source changes.
34#
35# Using wsdiff(1) is fairly simple:
36#	- Bringover to a fresh workspace
37#	- Perform a full non-debug build (clobber if workspace isn't fresh)
38#	- Move the proto area aside, call it proto.old, or something.
39#	- Integrate your changes to the workspace
40#	- Perform another full non-debug clobber build.
41#	- Use wsdiff(1) to see what changed:
42#		$ wsdiff proto.old proto
43#
44# By default, wsdiff will print the list of changed objects / deliverables to
45# stdout. If a results file is specified via -r, the list of differing objects,
46# and details about why wsdiff(1) thinks they are different will be logged to
47# the results file.
48#
49# By invoking nightly(1) with the -w option to NIGHTLY_FLAGS, nightly(1) will use
50# wsdiff(1) to report on what objects changed since the last build.
51#
52# For patch deliverable purposes, it's advised to have nightly do a clobber,
53# non-debug build.
54#
55# Think about the results. Was something flagged that you don't expect? Go look
56# at the results file to see details about the differences.
57#
58# Use the -i option in conjunction with -v and -V to dive deeper and have wsdiff(1)
59# report with more verbosity.
60#
61# Usage: wsdiff [-vVt] [-r results ] [-i filelist ] old new
62#
63# Where "old" is the path to the proto area build without the changes, and
64# "new" is the path to the proto area built with the changes. The following
65# options are supported:
66#
67#        -v      Do not truncate observed diffs in results
68#        -V      Log *all* ELF sect diffs vs. logging the first diff found
69#        -t      Use onbld tools in $SRC/tools
70#        -r      Log results and observed differences
71#        -i      Tell wsdiff which objects to compare via an input file list
72
73from __future__ import print_function
74import datetime, fnmatch, getopt, os, profile, subprocess
75import re, resource, select, shutil, signal, string, struct, sys, tempfile
76import time, threading
77from stat import *
78
79# Human readable diffs truncated by default if longer than this
80# Specifying -v on the command line will override
81diffs_sz_thresh = 4096
82
83# Lock name	 Provides exclusive access to
84# --------------+------------------------------------------------
85# output_lock	 standard output or temporary file (difference())
86# log_lock	 the results file (log_difference())
87# wset_lock	 changedFiles list (workerThread())
88output_lock = threading.Lock()
89log_lock = threading.Lock()
90wset_lock = threading.Lock()
91
92# Variable for thread control
93keep_processing = True
94
95# Default search path for wsdiff
96wsdiff_path = [ "/usr/bin",
97		"/usr/ccs/bin",
98		"/lib/svc/bin",
99		"/opt/onbld/bin" ]
100
101# These are objects that wsdiff will notice look different, but will not report.
102# Existence of an exceptions list, and adding things here is *dangerous*,
103# and therefore the *only* reasons why anything would be listed here is because
104# the objects do not build deterministically, yet we *cannot* fix this.
105#
106# These perl libraries use __DATE__ and therefore always look different.
107# Ideally, we would purge use the use of __DATE__ from the source, but because
108# this is source we wish to distribute with Solaris "unchanged", we cannot modify.
109#
110wsdiff_exceptions = [
111	"usr/perl5/5.8.4/lib/sun4-solaris-64int/CORE/libperl.so.1",
112	"usr/perl5/5.6.1/lib/sun4-solaris-64int/CORE/libperl.so.1",
113	"usr/perl5/5.8.4/lib/i86pc-solaris-64int/CORE/libperl.so.1",
114	"usr/perl5/5.6.1/lib/i86pc-solaris-64int/CORE/libperl.so.1"
115]
116
117#####
118# Logging routines
119#
120
121# Debug message to be printed to the screen, and the log file
122def debug(msg) :
123
124	# Add prefix to highlight debugging message
125	msg = "## " + msg
126	if debugon :
127		output_lock.acquire()
128		print(msg)
129		sys.stdout.flush()
130		output_lock.release()
131		if logging :
132			log_lock.acquire()
133			print(msg, file=log)
134			log.flush()
135			log_lock.release()
136
137# Informational message to be printed to the screen, and the log file
138def info(msg) :
139
140	output_lock.acquire()
141	print(msg)
142	sys.stdout.flush()
143	output_lock.release()
144	if logging :
145		log_lock.acquire()
146		print(msg, file=log)
147		log.flush()
148		log_lock.release()
149
150# Error message to be printed to the screen, and the log file
151def error(msg) :
152
153	output_lock.acquire()
154	print("ERROR: " + msg, file=sys.stderr)
155	sys.stderr.flush()
156	output_lock.release()
157	if logging :
158		log_lock.acquire()
159		print("ERROR: " + msg, file=log)
160		log.flush()
161		log_lock.release()
162
163# Informational message to be printed only to the log, if there is one.
164def v_info(msg) :
165
166	if logging :
167		log_lock.acquire()
168		print(msg, file=log)
169		log.flush()
170		log_lock.release()
171
172#
173# Flag a detected file difference
174# Display the fileName to stdout, and log the difference
175#
176def difference(f, dtype, diffs) :
177
178	if f in wsdiff_exceptions :
179		return
180
181	output_lock.acquire()
182	if sorted :
183		differentFiles.append(f)
184	else:
185		print(f)
186		sys.stdout.flush()
187	output_lock.release()
188
189	log_difference(f, dtype, diffs)
190
191#
192# Do the actual logging of the difference to the results file
193#
194def log_difference(f, dtype, diffs) :
195
196	if logging :
197		log_lock.acquire()
198		print(f, file=log)
199		print("NOTE: " + dtype + " difference detected.", file=log)
200
201		difflen = len(diffs)
202		if difflen > 0 :
203			print('', file=log)
204
205			if not vdiffs and difflen > diffs_sz_thresh :
206				print(diffs[:diffs_sz_thresh], file=log)
207				print("... truncated due to length: " +
208				      "use -v to override ...", file=log)
209			else :
210				print(diffs, file=log)
211			print('\n', file=log)
212		log.flush()
213		log_lock.release()
214
215
216#####
217# diff generating routines
218#
219
220#
221# Return human readable diffs from two temporary files
222#
223def diffFileData(tmpf1, tmpf2) :
224
225	binaries = False
226
227	# Filter the data through od(1) if the data is detected
228	# as being binary
229	if isBinary(tmpf1) or isBinary(tmpf2) :
230		binaries = True
231		tmp_od1 = tmpf1 + ".od"
232		tmp_od2 = tmpf2 + ".od"
233
234		cmd = od_cmd + " -c -t x4" + " " + tmpf1 + " > " + tmp_od1
235		os.system(cmd)
236		cmd = od_cmd + " -c -t x4" + " " + tmpf2 + " > " + tmp_od2
237		os.system(cmd)
238
239		tmpf1 = tmp_od1
240		tmpf2 = tmp_od2
241
242	try:
243		data = subprocess.check_output(
244		    diff_cmd + " " + tmpf1 + " " + tmpf2)
245		# Remove the temp files as we no longer need them.
246		if binaries :
247			try:
248				os.unlink(tmp_od1)
249			except OSError as e:
250				error("diffFileData: unlink failed %s" % e)
251			try:
252				os.unlink(tmp_od2)
253			except OSError as e:
254				error("diffFileData: unlink failed %s" % e)
255	except:
256		error("failed to get output of command: " + diff_cmd + " "
257		    + tmpf1 + " " + tmpf2)
258
259		# Send exception for the failed command up
260		raise
261		return
262
263	return data
264
265#
266# Return human readable diffs betweeen two datasets
267#
268def diffData(base, ptch, d1, d2) :
269
270	t = threading.currentThread()
271	tmpFile1 = tmpDir1 + os.path.basename(base) + t.getName()
272	tmpFile2 = tmpDir2 + os.path.basename(ptch) + t.getName()
273
274	try:
275		fd1 = open(tmpFile1, "w")
276	except:
277		error("failed to open: " + tmpFile1)
278		cleanup(1)
279
280	try:
281		fd2 = open(tmpFile2, "w")
282	except:
283		error("failed to open: " + tmpFile2)
284		cleanup(1)
285
286	fd1.write(d1)
287	fd2.write(d2)
288	fd1.close()
289	fd2.close()
290
291	return diffFileData(tmpFile1, tmpFile2)
292
293#####
294# Misc utility functions
295#
296
297# Prune off the leading prefix from string s
298def str_prefix_trunc(s, prefix) :
299	snipLen = len(prefix)
300	return s[snipLen:]
301
302#
303# Prune off leading proto path goo (if there is one) to yield
304# the deliverable's eventual path relative to root
305# e.g. proto.base/root_sparc/usr/src/cmd/prstat => usr/src/cmd/prstat
306#
307def fnFormat(fn) :
308	root_arch_str = "root_" + arch
309
310	pos = fn.find(root_arch_str)
311	if pos == -1 :
312		return fn
313
314	pos = fn.find("/", pos)
315	if pos == -1 :
316		return fn
317
318	return fn[pos + 1:]
319
320#####
321# Usage / argument processing
322#
323
324#
325# Display usage message
326#
327def usage() :
328	sys.stdout.flush()
329	print("""Usage: wsdiff [-dvVst] [-r results ] [-i filelist ] old new
330        -d      Print debug messages about the progress
331        -v      Do not truncate observed diffs in results
332        -V      Log *all* ELF sect diffs vs. logging the first diff found
333        -t      Use onbld tools in $SRC/tools
334        -r      Log results and observed differences
335        -s      Produce sorted list of differences
336        -i      Tell wsdiff which objects to compare via an input file list""",
337	    file=sys.stderr)
338	sys.exit(1)
339
340#
341# Process command line options
342#
343def args() :
344
345	global debugon
346	global logging
347	global vdiffs
348	global reportAllSects
349	global sorted
350
351	validOpts = 'di:r:vVst?'
352
353	baseRoot = ""
354	ptchRoot = ""
355	fileNamesFile = ""
356	results = ""
357	localTools = False
358
359	# getopt.getopt() returns:
360	#	an option/value tuple
361	#	a list of remaining non-option arguments
362	#
363	# A correct wsdiff invocation will have exactly two non option
364	# arguments, the paths to the base (old), ptch (new) proto areas
365	try:
366		optlist, args = getopt.getopt(sys.argv[1:], validOpts)
367	except getopt.error as val:
368		usage()
369
370	if len(args) != 2 :
371		usage();
372
373	for opt,val in optlist :
374		if opt == '-d' :
375			debugon = True
376		elif opt == '-i' :
377			fileNamesFile = val
378		elif opt == '-r' :
379			results = val
380			logging = True
381		elif opt == '-s' :
382			sorted = True
383		elif opt == '-v' :
384			vdiffs = True
385		elif opt == '-V' :
386			reportAllSects = True
387		elif opt == '-t':
388			localTools = True
389		else:
390			usage()
391
392	baseRoot = args[0]
393	ptchRoot = args[1]
394
395	if len(baseRoot) == 0 or len(ptchRoot) == 0 :
396		usage()
397
398	if logging and len(results) == 0 :
399		usage()
400
401	if vdiffs and not logging :
402		error("The -v option requires a results file (-r)")
403		sys.exit(1)
404
405	if reportAllSects and not logging :
406		error("The -V option requires a results file (-r)")
407		sys.exit(1)
408
409	# alphabetical order
410	return	baseRoot, fileNamesFile, localTools, ptchRoot, results
411
412#####
413# File identification
414#
415
416#
417# Identify the file type.
418# If it's not ELF, use the file extension to identify
419# certain file types that require special handling to
420# compare. Otherwise just return a basic "ASCII" type.
421#
422def getTheFileType(f) :
423
424	extensions = { 'a'	:	'ELF Object Archive',
425		       'jar'	:	'Java Archive',
426		       'html'	:	'HTML',
427		       'ln'	:	'Lint Library',
428		       'db'	:	'Sqlite Database' }
429
430	try:
431		if os.stat(f)[ST_SIZE] == 0 :
432			return 'ASCII'
433	except:
434		error("failed to stat " + f)
435		return 'Error'
436
437	if isELF(f) == 1 :
438		return 'ELF'
439
440	fnamelist = f.split('.')
441	if len(fnamelist) > 1 :	# Test the file extension
442		extension = fnamelist[-1]
443		if extension in extensions.keys():
444			return extensions[extension]
445
446	return 'ASCII'
447
448#
449# Return non-zero if "f" is an ELF file
450#
451elfmagic = '\177ELF'
452def isELF(f) :
453	try:
454		fd = open(f)
455	except:
456		error("failed to open: " + f)
457		return 0
458	magic = fd.read(len(elfmagic))
459	fd.close()
460
461	if magic == elfmagic :
462		return 1
463	return 0
464
465#
466# Return non-zero is "f" is binary.
467# Consider the file to be binary if it contains any null characters
468#
469def isBinary(f) :
470	try:
471		fd = open(f)
472	except:
473		error("failed to open: " + f)
474		return 0
475	s = fd.read()
476	fd.close()
477
478	if s.find('\0') == -1 :
479		return 0
480	else :
481		return 1
482
483#####
484# Directory traversal and file finding
485#
486
487#
488# Return a sorted list of files found under the specified directory
489#
490def findFiles(d) :
491	for path, subdirs, files in os.walk(d) :
492		files.sort()
493		for name in files :
494			yield os.path.join(path, name)
495
496#
497# Examine all files in base, ptch
498#
499# Return a list of files appearing in both proto areas,
500# a list of new files (files found only in ptch) and
501# a list of deleted files (files found only in base)
502#
503def protoCatalog(base, ptch) :
504
505	compFiles = []		# List of files in both proto areas
506	ptchList = []		# List of file in patch proto area
507
508	newFiles = []		# New files detected
509	deletedFiles = []	# Deleted files
510
511	debug("Getting the list of files in the base area");
512	baseFilesList = list(findFiles(base))
513	baseStringLength = len(base)
514	debug("Found " + str(len(baseFilesList)) + " files")
515
516	debug("Getting the list of files in the patch area");
517	ptchFilesList = list(findFiles(ptch))
518	ptchStringLength = len(ptch)
519	debug("Found " + str(len(ptchFilesList)) + " files")
520
521	# Inventory files in the base proto area
522	debug("Determining the list of regular files in the base area");
523	for fn in baseFilesList :
524		if os.path.islink(fn) :
525			continue
526
527		fileName = fn[baseStringLength:]
528		compFiles.append(fileName)
529	debug("Found " + str(len(compFiles)) + " files")
530
531	# Inventory files in the patch proto area
532	debug("Determining the list of regular files in the patch area");
533	for fn in ptchFilesList :
534		if os.path.islink(fn) :
535			continue
536
537		fileName = fn[ptchStringLength:]
538		ptchList.append(fileName)
539	debug("Found " + str(len(ptchList)) + " files")
540
541	# Deleted files appear in the base area, but not the patch area
542	debug("Searching for deleted files by comparing the lists")
543	for fileName in compFiles :
544		if not fileName in ptchList :
545			deletedFiles.append(fileName)
546	debug("Found " + str(len(deletedFiles)) + " deleted files")
547
548	# Eliminate "deleted" files from the list of objects appearing
549	# in both the base and patch proto areas
550	debug("Eliminating deleted files from the list of objects")
551	for fileName in deletedFiles :
552		try:
553			compFiles.remove(fileName)
554		except:
555			error("filelist.remove() failed")
556	debug("List for comparison reduced to " + str(len(compFiles))
557	    + " files")
558
559	# New files appear in the patch area, but not the base
560	debug("Getting the list of newly added files")
561	for fileName in ptchList :
562		if not fileName in compFiles :
563			newFiles.append(fileName)
564	debug("Found " + str(len(newFiles)) + " new files")
565
566	return compFiles, newFiles, deletedFiles
567
568#
569# Examine the files listed in the input file list
570#
571# Return a list of files appearing in both proto areas,
572# a list of new files (files found only in ptch) and
573# a list of deleted files (files found only in base)
574#
575def flistCatalog(base, ptch, flist) :
576	compFiles = []		# List of files in both proto areas
577	newFiles = []		# New files detected
578	deletedFiles = []	# Deleted files
579
580	try:
581		fd = open(flist, "r")
582	except:
583		error("could not open: " + flist)
584		cleanup(1)
585
586	files = []
587	files = fd.readlines()
588	fd.close()
589
590	for f in files :
591		ptch_present = True
592		base_present = True
593
594		if f == '\n' :
595			continue
596
597		# the fileNames have a trailing '\n'
598		f = f.rstrip()
599
600		# The objects in the file list have paths relative
601		# to $ROOT or to the base/ptch directory specified on
602		# the command line.
603		# If it's relative to $ROOT, we'll need to add back the
604		# root_`uname -p` goo we stripped off in fnFormat()
605		if os.path.exists(base + f) :
606			fn = f;
607		elif os.path.exists(base + "root_" + arch + "/" + f) :
608			fn = "root_" + arch + "/" + f
609		else :
610			base_present = False
611
612		if base_present :
613			if not os.path.exists(ptch + fn) :
614				ptch_present = False
615		else :
616			if os.path.exists(ptch + f) :
617				fn = f
618			elif os.path.exists(ptch + "root_" + arch + "/" + f) :
619				fn = "root_" + arch + "/" + f
620			else :
621				ptch_present = False
622
623		if os.path.islink(base + fn) :	# ignore links
624			base_present = False
625		if os.path.islink(ptch + fn) :
626			ptch_present = False
627
628		if base_present and ptch_present :
629			compFiles.append(fn)
630		elif base_present :
631			deletedFiles.append(fn)
632		elif ptch_present :
633			newFiles.append(fn)
634		else :
635			if (os.path.islink(base + fn) and
636			    os.path.islink(ptch + fn)) :
637				continue
638			error(f + " in file list, but not in either tree. " +
639			    "Skipping...")
640
641	return compFiles, newFiles, deletedFiles
642
643
644#
645# Build a fully qualified path to an external tool/utility.
646# Consider the default system locations. For onbld tools, if
647# the -t option was specified, we'll try to use built tools in $SRC tools,
648# and otherwise, we'll fall back on /opt/onbld/
649#
650def find_tool(tool) :
651
652	# First, check what was passed
653	if os.path.exists(tool) :
654		return tool
655
656	# Next try in wsdiff path
657	for pdir in wsdiff_path :
658		location = pdir + "/" + tool
659		if os.path.exists(location) :
660			return location + " "
661
662		location = pdir + "/" + arch + "/" + tool
663		if os.path.exists(location) :
664			return location + " "
665
666	error("Could not find path to: " + tool);
667	sys.exit(1);
668
669
670#####
671# ELF file comparison helper routines
672#
673
674#
675# Return a dictionary of ELF section types keyed by section name
676#
677def get_elfheader(f) :
678
679	header = {}
680
681	hstring = subprocess.check_output(elfdump_cmd + " -c " + f)
682
683	if len(hstring) == 0 :
684		error("Failed to dump ELF header for " + f)
685		raise
686		return
687
688	# elfdump(1) dumps the section headers with the section name
689	# following "sh_name:", and the section type following "sh_type:"
690	sections = hstring.split("Section Header")
691	for sect in sections :
692		datap = sect.find("sh_name:");
693		if datap == -1 :
694			continue
695		section = sect[datap:].split()[1]
696		datap = sect.find("sh_type:");
697		if datap == -1 :
698			error("Could not get type for sect: " + section +
699			      " in " + f)
700		sh_type = sect[datap:].split()[2]
701		header[section] = sh_type
702
703	return header
704
705#
706# Extract data in the specified ELF section from the given file
707#
708def extract_elf_section(f, section) :
709
710	data = subprocess.check_output(dump_cmd + " -sn " + section + " " + f)
711
712	if len(data) == 0 :
713		error(dump_cmd + "yielded no data on section " + section +
714		    " of " + f)
715		raise
716		return
717
718	# dump(1) displays the file name to start...
719	# get past it to the data itself
720	dbegin = data.find(":") + 1
721	data = data[dbegin:];
722
723	return (data)
724
725#
726# Return a (hopefully meaningful) human readable set of diffs
727# for the specified ELF section between f1 and f2
728#
729# Depending on the section, various means for dumping and diffing
730# the data may be employed.
731#
732text_sections = [ '.text', '.init', '.fini' ]
733def diff_elf_section(f1, f2, section, sh_type) :
734
735	t = threading.currentThread()
736	tmpFile1 = tmpDir1 + os.path.basename(f1) + t.getName()
737	tmpFile2 = tmpDir2 + os.path.basename(f2) + t.getName()
738
739	if (sh_type == "SHT_RELA") : # sh_type == SHT_RELA
740		cmd1 = elfdump_cmd + " -r " + f1 + " > " + tmpFile1
741		cmd2 = elfdump_cmd + " -r " + f2 + " > " + tmpFile2
742	elif (section == ".group") :
743		cmd1 = elfdump_cmd + " -g " + f1 + " > " + tmpFile1
744		cmd2 = elfdump_cmd + " -g " + f2 + " > " + tmpFile2
745	elif (section == ".hash") :
746		cmd1 = elfdump_cmd + " -h " + f1 + " > " + tmpFile1
747		cmd2 = elfdump_cmd + " -h " + f2 + " > " + tmpFile2
748	elif (section == ".dynamic") :
749		cmd1 = elfdump_cmd + " -d " + f1 + " > " + tmpFile1
750		cmd2 = elfdump_cmd + " -d " + f2 + " > " + tmpFile2
751	elif (section == ".got") :
752		cmd1 = elfdump_cmd + " -G " + f1 + " > " + tmpFile1
753		cmd2 = elfdump_cmd + " -G " + f2 + " > " + tmpFile2
754	elif (section == ".SUNW_cap") :
755		cmd1 = elfdump_cmd + " -H " + f1 + " > " + tmpFile1
756		cmd2 = elfdump_cmd + " -H " + f2 + " > " + tmpFile2
757	elif (section == ".interp") :
758		cmd1 = elfdump_cmd + " -i " + f1 + " > " + tmpFile1
759		cmd2 = elfdump_cmd + " -i " + f2 + " > " + tmpFile2
760	elif (section == ".symtab" or section == ".dynsym") :
761		cmd1 = (elfdump_cmd + " -s -N " + section + " " + f1 +
762		    " > " + tmpFile1)
763		cmd2 = (elfdump_cmd + " -s -N " + section + " " + f2 +
764		    " > " + tmpFile2)
765	elif (section in text_sections) :
766		# dis sometimes complains when it hits something it doesn't
767		# know how to disassemble. Just ignore it, as the output
768		# being generated here is human readable, and we've already
769		# correctly flagged the difference.
770		cmd1 = (dis_cmd + " -t " + section + " " + f1 +
771		       " 2>/dev/null | grep -v disassembly > " + tmpFile1)
772		cmd2 = (dis_cmd + " -t " + section + " " + f2 +
773		       " 2>/dev/null | grep -v disassembly > " + tmpFile2)
774	else :
775		cmd1 = (elfdump_cmd + " -w " + tmpFile1 + " -N " +
776		       section + " " + f1)
777		cmd2 = (elfdump_cmd + " -w " + tmpFile2 + " -N " +
778		       section + " " + f2)
779
780	os.system(cmd1)
781	os.system(cmd2)
782
783	data = diffFileData(tmpFile1, tmpFile2)
784
785	# remove temp files as we no longer need them
786	try:
787		os.unlink(tmpFile1)
788	except OSError as e:
789		error("diff_elf_section: unlink failed %s" % e)
790	try:
791		os.unlink(tmpFile2)
792	except OSError as e:
793		error("diff_elf_section: unlink failed %s" % e)
794
795	return (data)
796
797#
798# compare the relevant sections of two ELF binaries
799# and report any differences
800#
801# Returns: 1 if any differenes found
802#          0 if no differences found
803#	  -1 on error
804#
805
806# Sections deliberately not considered when comparing two ELF
807# binaries. Differences observed in these sections are not considered
808# significant where patch deliverable identification is concerned.
809sections_to_skip = [ ".SUNW_signature",
810		     ".comment",
811		     ".SUNW_ctf",
812		     ".debug",
813		     ".plt",
814		     ".rela.bss",
815		     ".rela.plt",
816		     ".line",
817		     ".note",
818		     ".compcom",
819		     ]
820
821sections_preferred = [ ".rodata.str1.8",
822		       ".rodata.str1.1",
823		       ".rodata",
824		       ".data1",
825		       ".data",
826		       ".text",
827		       ]
828
829def compareElfs(base, ptch, quiet) :
830
831	global logging
832
833	try:
834		base_header = get_elfheader(base)
835	except:
836		return
837	sections = base_header.keys()
838
839	try:
840		ptch_header = get_elfheader(ptch)
841	except:
842		return
843	e2_only_sections = ptch_header.keys()
844
845	e1_only_sections = []
846
847	fileName = fnFormat(base)
848
849	# Derive the list of ELF sections found only in
850	# either e1 or e2.
851	for sect in sections :
852		if not sect in e2_only_sections :
853			e1_only_sections.append(sect)
854		else :
855			e2_only_sections.remove(sect)
856
857	if len(e1_only_sections) > 0 :
858		if quiet :
859			return 1
860
861		data = ""
862		if logging :
863			slist = ""
864			for sect in e1_only_sections :
865				slist = slist + sect + "\t"
866			data = ("ELF sections found in " +
867				base + " but not in " + ptch +
868				"\n\n" + slist)
869
870		difference(fileName, "ELF", data)
871		return 1
872
873	if len(e2_only_sections) > 0 :
874		if quiet :
875			return 1
876
877		data = ""
878		if logging :
879			slist = ""
880			for sect in e2_only_sections :
881				slist = slist + sect + "\t"
882			data = ("ELF sections found in " +
883				ptch + " but not in " + base +
884				"\n\n" + slist)
885
886		difference(fileName, "ELF", data)
887		return 1
888
889	# Look for preferred sections, and put those at the
890	# top of the list of sections to compare
891	for psect in sections_preferred :
892		if psect in sections :
893			sections.remove(psect)
894			sections.insert(0, psect)
895
896	# Compare ELF sections
897	first_section = True
898	for sect in sections :
899
900		if sect in sections_to_skip :
901			continue
902
903		try:
904			s1 = extract_elf_section(base, sect);
905		except:
906			return
907
908		try:
909			s2 = extract_elf_section(ptch, sect);
910		except:
911			return
912
913		if len(s1) != len (s2) or s1 != s2:
914			if not quiet:
915				sh_type = base_header[sect]
916				data = diff_elf_section(base, ptch,
917							sect, sh_type)
918
919				# If all ELF sections are being reported, then
920				# invoke difference() to flag the file name to
921				# stdout only once. Any other section differences
922				# should be logged to the results file directly
923				if not first_section :
924					log_difference(fileName,
925					    "ELF " + sect, data)
926				else :
927					difference(fileName, "ELF " + sect,
928					    data)
929
930			if not reportAllSects :
931				return 1
932			first_section = False
933
934	return 0
935
936#####
937# recursively remove 2 directories
938#
939# Used for removal of temporary directory strucures (ignores any errors).
940#
941def clearTmpDirs(dir1, dir2) :
942
943	if os.path.isdir(dir1) > 0 :
944		shutil.rmtree(dir1, True)
945
946	if os.path.isdir(dir2) > 0 :
947		shutil.rmtree(dir2, True)
948
949
950#####
951# Archive object comparison
952#
953# Returns 1 if difference detected
954#         0 if no difference detected
955#        -1 on error
956#
957def compareArchives(base, ptch, fileType) :
958
959	fileName = fnFormat(base)
960	t = threading.currentThread()
961	ArchTmpDir1 = tmpDir1 + os.path.basename(base) + t.getName()
962	ArchTmpDir2 = tmpDir2 + os.path.basename(base) + t.getName()
963
964	#
965	# Be optimistic and first try a straight file compare
966	# as it will allow us to finish up quickly.
967	#
968	if compareBasic(base, ptch, True, fileType) == 0 :
969		return 0
970
971	try:
972		os.makedirs(ArchTmpDir1)
973	except OSError as e:
974		error("compareArchives: makedir failed %s" % e)
975		return -1
976	try:
977		os.makedirs(ArchTmpDir2)
978	except OSError as e:
979		error("compareArchives: makedir failed %s" % e)
980		return -1
981
982	# copy over the objects to the temp areas, and
983	# unpack them
984	baseCmd = "cp -fp " + base + " " + ArchTmpDir1
985	try:
986		output = subprocess.check_output(baseCmd)
987	except CalledProcessError:
988		error(baseCmd + " failed: " + output)
989		clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
990		return -1
991
992	ptchCmd = "cp -fp " + ptch + " " + ArchTmpDir2
993	try:
994		output = subprocess.check_output(ptchCmd)
995	except CalledProcessError:
996		error(ptchCmd + " failed: " + output)
997		clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
998		return -1
999
1000	bname = string.split(fileName, '/')[-1]
1001	if fileType == "Java Archive" :
1002		baseCmd = ("cd " + ArchTmpDir1 + "; " + "jar xf " + bname +
1003			  "; rm -f " + bname + " META-INF/MANIFEST.MF")
1004		ptchCmd = ("cd " + ArchTmpDir2 + "; " + "jar xf " + bname +
1005			  "; rm -f " + bname + " META-INF/MANIFEST.MF")
1006	elif fileType == "ELF Object Archive" :
1007		baseCmd = ("cd " + ArchTmpDir1 + "; " + "/usr/ccs/bin/ar x " +
1008			  bname + "; rm -f " + bname)
1009		ptchCmd = ("cd " + ArchTmpDir2 + "; " + "/usr/ccs/bin/ar x " +
1010			  bname + "; rm -f " + bname)
1011	else :
1012		error("unexpected file type: " + fileType)
1013		clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1014		return -1
1015
1016	os.system(baseCmd)
1017	os.system(ptchCmd)
1018
1019	baseFlist = list(findFiles(ArchTmpDir1))
1020	ptchFlist = list(findFiles(ArchTmpDir2))
1021
1022	# Trim leading path off base/ptch file lists
1023	flist = []
1024	for fn in baseFlist :
1025		flist.append(str_prefix_trunc(fn, ArchTmpDir1))
1026	baseFlist = flist
1027
1028	flist = []
1029	for fn in ptchFlist :
1030		flist.append(str_prefix_trunc(fn, ArchTmpDir2))
1031	ptchFlist = flist
1032
1033	for fn in ptchFlist :
1034		if not fn in baseFlist :
1035			difference(fileName, fileType,
1036				   fn + " added to " + fileName)
1037			clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1038			return 1
1039
1040	for fn in baseFlist :
1041		if not fn in ptchFlist :
1042			difference(fileName, fileType,
1043				   fn + " removed from " + fileName)
1044			clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1045			return 1
1046
1047		differs = compareOneFile((ArchTmpDir1 + fn),
1048		    (ArchTmpDir2 + fn), True)
1049		if differs :
1050			difference(fileName, fileType,
1051				   fn + " in " + fileName + " differs")
1052			clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1053			return 1
1054
1055	clearTmpDirs(ArchTmpDir1, ArchTmpDir2)
1056	return 0
1057
1058#####
1059# (Basic) file comparison
1060#
1061# There's some special case code here for Javadoc HTML files
1062#
1063# Returns 1 if difference detected
1064#         0 if no difference detected
1065#        -1 on error
1066#
1067def compareBasic(base, ptch, quiet, fileType) :
1068
1069	fileName = fnFormat(base);
1070
1071	if quiet and os.stat(base)[ST_SIZE] != os.stat(ptch)[ST_SIZE] :
1072		return 1
1073
1074	try:
1075		baseFile = open(base)
1076	except:
1077		error("could not open " + base)
1078		return -1
1079	try:
1080		ptchFile = open(ptch)
1081	except:
1082		error("could not open " + ptch)
1083		return -1
1084
1085	baseData = baseFile.read()
1086	ptchData = ptchFile.read()
1087
1088	baseFile.close()
1089	ptchFile.close()
1090
1091	needToSnip = False
1092	if fileType == "HTML" :
1093		needToSnip = True
1094		toSnipBeginStr = "<!-- Generated by javadoc"
1095		toSnipEndStr = "-->\n"
1096
1097	if needToSnip :
1098		toSnipBegin = string.find(baseData, toSnipBeginStr)
1099		if toSnipBegin != -1 :
1100			toSnipEnd = (string.find(baseData[toSnipBegin:],
1101						toSnipEndStr) +
1102						len(toSnipEndStr))
1103			baseData = (baseData[:toSnipBegin] +
1104				   baseData[toSnipBegin + toSnipEnd:])
1105			ptchData = (ptchData[:toSnipBegin] +
1106				   ptchData[toSnipBegin + toSnipEnd:])
1107
1108	if quiet :
1109		if baseData != ptchData :
1110			return 1
1111	else :
1112		if len(baseData) != len(ptchData) or baseData != ptchData :
1113			diffs = diffData(base, ptch, baseData, ptchData)
1114			difference(fileName, fileType, diffs)
1115			return 1
1116	return 0
1117
1118
1119#####
1120# Compare two objects by producing a data dump from
1121# each object, and then comparing the dump data
1122#
1123# Returns: 1 if a difference is detected
1124#          0 if no difference detected
1125#         -1 upon error
1126#
1127def compareByDumping(base, ptch, quiet, fileType) :
1128
1129	fileName = fnFormat(base);
1130	t = threading.currentThread()
1131	tmpFile1 = tmpDir1 + os.path.basename(base) + t.getName()
1132	tmpFile2 = tmpDir2 + os.path.basename(ptch) + t.getName()
1133
1134	if fileType == "Lint Library" :
1135		baseCmd = (lintdump_cmd + " -ir " + base +
1136			  " | egrep -v '(LINTOBJ|LINTMOD):'" +
1137			  " | grep -v PASS[1-3]:" +
1138			  " > " + tmpFile1)
1139		ptchCmd = (lintdump_cmd + " -ir " + ptch +
1140			  " | egrep -v '(LINTOBJ|LINTMOD):'" +
1141			  " | grep -v PASS[1-3]:" +
1142			  " > " + tmpFile2)
1143	elif fileType == "Sqlite Database" :
1144		baseCmd = ("echo .dump | " + sqlite_cmd + base + " > " +
1145			  tmpFile1)
1146		ptchCmd = ("echo .dump | " + sqlite_cmd + ptch + " > " +
1147			  tmpFile2)
1148
1149	os.system(baseCmd)
1150	os.system(ptchCmd)
1151
1152	try:
1153		baseFile = open(tmpFile1)
1154	except:
1155		error("could not open: " + tmpFile1)
1156		return
1157	try:
1158		ptchFile = open(tmpFile2)
1159	except:
1160		error("could not open: " + tmpFile2)
1161		return
1162
1163	baseData = baseFile.read()
1164	ptchData = ptchFile.read()
1165
1166	baseFile.close()
1167	ptchFile.close()
1168
1169	if len(baseData) != len(ptchData) or baseData != ptchData :
1170		if not quiet :
1171			data = diffFileData(tmpFile1, tmpFile2);
1172			try:
1173				os.unlink(tmpFile1)
1174			except OSError as e:
1175				error("compareByDumping: unlink failed %s" % e)
1176			try:
1177				os.unlink(tmpFile2)
1178			except OSError as e:
1179				error("compareByDumping: unlink failed %s" % e)
1180			difference(fileName, fileType, data)
1181		return 1
1182
1183	# Remove the temporary files now.
1184	try:
1185		os.unlink(tmpFile1)
1186	except OSError as e:
1187		error("compareByDumping: unlink failed %s" % e)
1188	try:
1189		os.unlink(tmpFile2)
1190	except OSError as e:
1191		error("compareByDumping: unlink failed %s" % e)
1192
1193	return 0
1194
1195#####
1196#
1197# SIGINT signal handler. Changes thread control variable to tell the threads
1198# to finish their current job and exit.
1199#
1200def discontinue_processing(signl, frme):
1201	global keep_processing
1202
1203	print("Caught Ctrl-C, stopping the threads", file=sys.stderr)
1204	keep_processing = False
1205
1206	return 0
1207
1208#####
1209#
1210# worker thread for changedFiles processing
1211#
1212class workerThread(threading.Thread) :
1213	def run(self):
1214		global wset_lock
1215		global changedFiles
1216		global baseRoot
1217		global ptchRoot
1218		global keep_processing
1219
1220		while (keep_processing) :
1221			# grab the lock to changedFiles and remove one member
1222			# and process it
1223			wset_lock.acquire()
1224			try :
1225				fn = changedFiles.pop()
1226			except IndexError :
1227				# there is nothing more to do
1228				wset_lock.release()
1229				return
1230			wset_lock.release()
1231
1232			base = baseRoot + fn
1233			ptch = ptchRoot + fn
1234
1235			compareOneFile(base, ptch, False)
1236
1237
1238#####
1239# Compare two objects. Detect type changes.
1240# Vector off to the appropriate type specific
1241# compare routine based on the type.
1242#
1243def compareOneFile(base, ptch, quiet) :
1244
1245	# Verify the file types.
1246	# If they are different, indicate this and move on
1247	btype = getTheFileType(base)
1248	ptype = getTheFileType(ptch)
1249
1250	if btype == 'Error' or ptype == 'Error' :
1251		return -1
1252
1253	fileName = fnFormat(base)
1254
1255	if (btype != ptype) :
1256		if not quiet :
1257			difference(fileName, "file type", btype + " to " + ptype)
1258		return 1
1259	else :
1260		fileType = btype
1261
1262	if (fileType == 'ELF') :
1263		return compareElfs(base, ptch, quiet)
1264
1265	elif (fileType == 'Java Archive' or fileType == 'ELF Object Archive') :
1266		return compareArchives(base, ptch, fileType)
1267
1268	elif (fileType == 'HTML') :
1269		return compareBasic(base, ptch, quiet, fileType)
1270
1271	elif ( fileType == 'Lint Library' ) :
1272		return compareByDumping(base, ptch, quiet, fileType)
1273
1274	elif ( fileType == 'Sqlite Database' ) :
1275		return compareByDumping(base, ptch, quiet, fileType)
1276
1277	else :
1278		# it has to be some variety of text file
1279		return compareBasic(base, ptch, quiet, fileType)
1280
1281# Cleanup and self-terminate
1282def cleanup(ret) :
1283
1284	debug("Performing cleanup (" + str(ret) + ")")
1285	if os.path.isdir(tmpDir1) > 0 :
1286		shutil.rmtree(tmpDir1)
1287
1288	if os.path.isdir(tmpDir2) > 0 :
1289		shutil.rmtree(tmpDir2)
1290
1291	if logging :
1292		log.close()
1293
1294	sys.exit(ret)
1295
1296def main() :
1297
1298	# Log file handle
1299	global log
1300
1301	# Globals relating to command line options
1302	global logging, vdiffs, reportAllSects
1303
1304	# Named temporary files / directories
1305	global tmpDir1, tmpDir2
1306
1307	# Command paths
1308	global lintdump_cmd, elfdump_cmd, dump_cmd, dis_cmd, od_cmd, diff_cmd, sqlite_cmd
1309
1310	# Default search path
1311	global wsdiff_path
1312
1313	# Essentially "uname -p"
1314	global arch
1315
1316	# changed files for worker thread processing
1317	global changedFiles
1318	global baseRoot
1319	global ptchRoot
1320
1321	# Sort the list of files from a temporary file
1322	global sorted
1323	global differentFiles
1324
1325	# Debugging indicator
1326	global debugon
1327
1328	# Some globals need to be initialized
1329	debugon = logging = vdiffs = reportAllSects = sorted = False
1330
1331
1332	# Process command line arguments
1333	# Return values are returned from args() in alpha order
1334	# (Yes, python functions can return multiple values (ewww))
1335	# Note that args() also set the globals:
1336	#	logging to True if verbose logging (to a file) was enabled
1337	#	vdiffs to True if logged differences aren't to be truncated
1338	#	reportAllSects to True if all ELF section differences are to be reported
1339	#
1340	baseRoot, fileNamesFile, localTools, ptchRoot, results = args()
1341
1342	#
1343	# Set up the results/log file
1344	#
1345	if logging :
1346		try:
1347			log = open(results, "w")
1348		except:
1349			logging = False
1350			error("failed to open log file: " + log)
1351			sys.exit(1)
1352
1353		dateTimeStr= "# %04d-%02d-%02d at %02d:%02d:%02d" % time.localtime()[:6]
1354		v_info("# This file was produced by wsdiff")
1355		v_info(dateTimeStr)
1356
1357	# Changed files (used only for the sorted case)
1358	if sorted :
1359		differentFiles = []
1360
1361	#
1362	# Build paths to the tools required tools
1363	#
1364	# Try to look for tools in $SRC/tools if the "-t" option
1365	# was specified
1366	#
1367	arch = subprocess.check_output("uname -p")
1368	if localTools :
1369		try:
1370			src = os.environ['SRC']
1371		except:
1372			error("-t specified, but $SRC not set. Cannot find $SRC/tools")
1373			src = ""
1374		if len(src) > 0 :
1375			wsdiff_path.insert(0, src + "/tools/proto/opt/onbld/bin")
1376
1377	lintdump_cmd = find_tool("lintdump")
1378	elfdump_cmd = find_tool("elfdump")
1379	dump_cmd = find_tool("dump")
1380	od_cmd = find_tool("od")
1381	dis_cmd = find_tool("dis")
1382	diff_cmd = find_tool("diff")
1383	sqlite_cmd = find_tool("sqlite")
1384
1385	#
1386	# Set resource limit for number of open files as high as possible.
1387	# This might get handy with big number of threads.
1388	#
1389	(nofile_soft, nofile_hard) = resource.getrlimit(resource.RLIMIT_NOFILE)
1390	try:
1391		resource.setrlimit(resource.RLIMIT_NOFILE,
1392		    (nofile_hard, nofile_hard))
1393	except:
1394		error("cannot set resource limits for number of open files")
1395		sys.exit(1)
1396
1397	#
1398	# validate the base and patch paths
1399	#
1400	if baseRoot[-1] != '/' :
1401		baseRoot += '/'
1402
1403	if ptchRoot[-1] != '/' :
1404		ptchRoot += '/'
1405
1406	if not os.path.exists(baseRoot) :
1407		error("old proto area: " + baseRoot + " does not exist")
1408		sys.exit(1)
1409
1410	if not os.path.exists(ptchRoot) :
1411		error("new proto area: " + ptchRoot + " does not exist")
1412		sys.exit(1)
1413
1414	#
1415	# log some information identifying the run
1416	#
1417	v_info("Old proto area: " + baseRoot)
1418	v_info("New proto area: " + ptchRoot)
1419	v_info("Results file: " + results + "\n")
1420
1421	#
1422	# Set up the temporary directories / files
1423	# Could use python's tmpdir routines, but these should
1424	# be easier to identify / keep around for debugging
1425	pid = os.getpid()
1426	tmpDir1 = "/tmp/wsdiff_tmp1_" + str(pid) + "/"
1427	tmpDir2 = "/tmp/wsdiff_tmp2_" + str(pid) + "/"
1428	try:
1429		os.makedirs(tmpDir1)
1430	except OSError as e:
1431		error("main: makedir failed %s" % e)
1432	try:
1433		os.makedirs(tmpDir2)
1434	except OSError as e:
1435		error("main: makedir failed %s" % e)
1436
1437	# Derive a catalog of new, deleted, and to-be-compared objects
1438	# either from the specified base and patch proto areas, or from
1439	# from an input file list
1440	newOrDeleted = False
1441
1442	if fileNamesFile != "" :
1443		changedFiles, newFiles, deletedFiles = \
1444			      flistCatalog(baseRoot, ptchRoot, fileNamesFile)
1445	else :
1446		changedFiles, newFiles, deletedFiles = \
1447				protoCatalog(baseRoot, ptchRoot)
1448
1449	if len(newFiles) > 0 :
1450		newOrDeleted = True
1451		info("\nNew objects found: ")
1452
1453		if sorted :
1454			newFiles.sort()
1455		for fn in newFiles :
1456			info(fnFormat(fn))
1457
1458	if len(deletedFiles) > 0 :
1459		newOrDeleted = True
1460		info("\nObjects removed: ")
1461
1462		if sorted :
1463			deletedFiles.sort()
1464		for fn in deletedFiles :
1465			info(fnFormat(fn))
1466
1467	if newOrDeleted :
1468		info("\nChanged objects: ")
1469	if sorted :
1470		debug("The list will appear after the processing is done")
1471
1472	# Here's where all the heavy lifting happens
1473	# Perform a comparison on each object appearing in
1474	# both proto areas. compareOneFile will examine the
1475	# file types of each object, and will vector off to
1476	# the appropriate comparison routine, where the compare
1477	# will happen, and any differences will be reported / logged
1478
1479	# determine maximum number of worker threads by using
1480	# DMAKE_MAX_JOBS environment variable set by nightly(1)
1481	# or get number of CPUs in the system
1482	try:
1483		max_threads = int(os.environ['DMAKE_MAX_JOBS'])
1484	except:
1485		max_threads = os.sysconf("SC_NPROCESSORS_ONLN")
1486		# If we cannot get number of online CPUs in the system
1487		# run unparallelized otherwise bump the number up 20%
1488		# to achieve best results.
1489		if max_threads == -1 :
1490			max_threads = 1
1491		else :
1492			max_threads += max_threads/5
1493
1494	# Set signal handler to attempt graceful exit
1495	debug("Setting signal handler")
1496	signal.signal( signal.SIGINT, discontinue_processing )
1497
1498	# Create and unleash the threads
1499	# Only at most max_threads must be running at any moment
1500	mythreads = []
1501	debug("Spawning " + str(max_threads) + " threads");
1502	for i in range(max_threads) :
1503		thread = workerThread()
1504		mythreads.append(thread)
1505		mythreads[i].start()
1506
1507	# Wait for the threads to finish and do cleanup if interrupted
1508	debug("Waiting for the threads to finish")
1509	while True:
1510		if not True in [thread.isAlive() for thread in mythreads]:
1511		    break
1512		else:
1513		    # Some threads are still going
1514		    time.sleep(1)
1515
1516	# Interrupted by SIGINT
1517	if keep_processing == False :
1518		cleanup(1)
1519
1520	# If the list of differences was sorted it is stored in an array
1521	if sorted :
1522		differentFiles.sort()
1523		for f in differentFiles :
1524			info(fnFormat(f))
1525
1526	# We're done, cleanup.
1527	cleanup(0)
1528
1529if __name__ == '__main__' :
1530	try:
1531		main()
1532	except KeyboardInterrupt :
1533		cleanup(1);
1534
1535