xref: /illumos-gate/usr/src/tools/scripts/wsdiff.py (revision 058561cbaa119a6f2659bc27ef343e1b47266bb2)
1#!/usr/sfw/bin/python
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or http://www.opensolaris.org/os/licensing.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23# Use is subject to license terms.
24#
25#ident	"%Z%%M%	%I%	%E% SMI"
26
27#
28# wsdiff(1) is a tool that can be used to determine which compiled objects
29# have changed as a result of a given source change. Developers backporting
30# new features, RFEs and bug fixes need to be able to identify the set of
31# patch deliverables necessary for feature/fix realization on a patched system.
32#
33# The tool works by comparing objects in two trees/proto areas (one build with,
34# and without the source changes.
35#
36# Using wsdiff(1) is fairly simple:
37#	- Bringover to a fresh workspace
38#	- Perform a full non-debug build (clobber if workspace isn't fresh)
39#	- Move the proto area aside, call it proto.old, or something.
40#	- Integrate your changes to the workspace
41#	- Perform another full non-debug clobber build.
42#	- Use wsdiff(1) to see what changed:
43#		$ wsdiff proto.old proto
44#
45# By default, wsdiff will print the list of changed objects / deliverables to
46# stdout. If a results file is specified via -r, the list of differing objects,
47# and details about why wsdiff(1) thinks they are different will be logged to
48# the results file.
49#
50# By invoking nightly(1) with the -w option to NIGHTLY_FLAGS, nightly(1) will use
51# wsdiff(1) to report on what objects changed since the last build.
52#
53# For patch deliverable purposes, it's advised to have nightly do a clobber,
54# non-debug build.
55#
56# Think about the results. Was something flagged that you don't expect? Go look
57# at the results file to see details about the differences.
58#
59# Use the -i option in conjunction with -v and -V to dive deeper and have wsdiff(1)
60# report with more verbosity.
61#
62# Usage: wsdiff [-vVt] [-r results ] [-i filelist ] old new
63#
64# Where "old" is the path to the proto area build without the changes, and
65# "new" is the path to the proto area built with the changes. The following
66# options are supported:
67#
68#        -v      Do not truncate observed diffs in results
69#        -V      Log *all* ELF sect diffs vs. logging the first diff found
70#        -t      Use onbld tools in $SRC/tools
71#        -r      Log results and observed differences
72#        -i      Tell wsdiff which objects to compare via an input file list
73
74import datetime, fnmatch, getopt, profile, os, popen2, commands
75import re, select, string, struct, sys, tempfile, time
76from stat import *
77
78# Human readable diffs truncated by default if longer than this
79# Specifying -v on the command line will override
80diffs_sz_thresh = 4096
81
82# Default search path for wsdiff
83wsdiff_path = [ "/usr/bin",
84		"/usr/ccs/bin",
85		"/lib/svc/bin",
86		"/opt/onbld/bin" ]
87
88# These are objects that wsdiff will notice look different, but will not report.
89# Existence of an exceptions list, and adding things here is *dangerous*,
90# and therefore the *only* reasons why anything would be listed here is because
91# the objects do not build deterministically, yet we *cannot* fix this.
92#
93# These perl libraries use __DATE__ and therefore always look different.
94# Ideally, we would purge use the use of __DATE__ from the source, but because
95# this is source we wish to distribute with Solaris "unchanged", we cannot modify.
96#
97wsdiff_exceptions = [ "usr/perl5/5.8.4/lib/sun4-solaris-64int/CORE/libperl.so.1",
98		      "usr/perl5/5.6.1/lib/sun4-solaris-64int/CORE/libperl.so.1",
99		      "usr/perl5/5.8.4/lib/i86pc-solaris-64int/CORE/libperl.so.1",
100		      "usr/perl5/5.6.1/lib/i86pc-solaris-64int/CORE/libperl.so.1"
101		      ]
102
103#####
104# Logging routines
105#
106
107# Informational message to be printed to the screen, and the log file
108def info(msg) :
109
110	print >> sys.stdout, msg
111	if logging :
112		print >> log, msg
113	sys.stdout.flush()
114
115# Error message to be printed to the screen, and the log file
116def error(msg) :
117
118	print >> sys.stderr, "ERROR:", msg
119	sys.stderr.flush()
120	if logging :
121		print >> log, "ERROR:", msg
122		log.flush()
123
124# Informational message to be printed only to the log, if there is one.
125def v_info(msg) :
126
127	if logging :
128		print >> log, msg
129		log.flush()
130
131#
132# Flag a detected file difference
133# Display the fileName to stdout, and log the difference
134#
135def difference(f, dtype, diffs) :
136
137	if f in wsdiff_exceptions :
138		return
139
140	print >> sys.stdout, f
141	sys.stdout.flush()
142
143	log_difference(f, dtype, diffs)
144
145#
146# Do the actual logging of the difference to the results file
147#
148def log_difference(f, dtype, diffs) :
149	if logging :
150		print >> log, f
151		print >> log, "NOTE:", dtype, "difference detected."
152
153		difflen = len(diffs)
154		if difflen > 0 :
155			print >> log
156
157			if not vdiffs and difflen > diffs_sz_thresh :
158				print >> log, diffs[:diffs_sz_thresh]
159				print >> log, \
160				      "... truncated due to length: " \
161				      "use -v to override ..."
162			else :
163				print >> log, diffs
164			print >> log, "\n"
165		log.flush()
166
167
168#####
169# diff generating routines
170#
171
172#
173# Return human readable diffs from two temporary files
174#
175def diffFileData(tmpf1, tmpf2) :
176
177	# Filter the data through od(1) if the data is detected
178	# as being binary
179	if isBinary(tmpf1) or isBinary(tmpf2) :
180		tmp_od1 = tmpf1 + ".od"
181		tmp_od2 = tmpf2 + ".od"
182
183		cmd = od_cmd + " -c -t x4" + " " + tmpf1 + " > " + tmp_od1
184		os.system(cmd)
185		cmd = od_cmd + " -c -t x4" + " " + tmpf2 + " > " + tmp_od2
186		os.system(cmd)
187
188		tmpf1 = tmp_od1
189		tmpf2 = tmp_od2
190
191	data = commands.getoutput(diff_cmd + " " + tmpf1 + " " + tmpf2)
192
193	return data
194
195#
196# Return human readable diffs betweeen two datasets
197#
198def diffData(d1, d2) :
199
200	global tmpFile1
201	global tmpFile2
202
203	try:
204		fd1 = open(tmpFile1, "w")
205	except:
206		error("failed to open: " + tmpFile1)
207		cleanup(1)
208	try:
209		fd2 = open(tmpFile2, "w")
210	except:
211		error("failed to open: " + tmpFile2)
212		cleanup(1)
213
214	fd1.write(d1)
215	fd2.write(d2)
216	fd1.close()
217	fd2.close()
218
219	return diffFileData(tmpFile1, tmpFile2)
220
221#####
222# Misc utility functions
223#
224
225# Prune off the leading prefix from string s
226def str_prefix_trunc(s, prefix) :
227	snipLen = len(prefix)
228	return s[snipLen:]
229
230#
231# Prune off leading proto path goo (if there is one) to yield
232# the deliverable's eventual path relative to root
233# e.g. proto.base/root_sparc/usr/src/cmd/prstat => usr/src/cmd/prstat
234#
235def fnFormat(fn) :
236	root_arch_str = "root_" + arch
237
238	pos = fn.find(root_arch_str)
239	if pos == -1 :
240		return fn
241
242	pos = fn.find("/", pos)
243	if pos == -1 :
244		return fn
245
246	return fn[pos + 1:]
247
248#####
249# Usage / argument processing
250#
251
252#
253# Display usage message
254#
255def usage() :
256	sys.stdout.flush()
257	print >> sys.stderr, """Usage: wsdiff [-vVt] [-r results ] [-i filelist ] old new
258        -v      Do not truncate observed diffs in results
259        -V      Log *all* ELF sect diffs vs. logging the first diff found
260        -t      Use onbld tools in $SRC/tools
261        -r      Log results and observed differences
262        -i      Tell wsdiff which objects to compare via an input file list"""
263	sys.exit(1)
264
265#
266# Process command line options
267#
268def args() :
269
270	global logging
271	global vdiffs
272	global reportAllSects
273
274	validOpts = 'i:r:vVt?'
275
276	baseRoot = ""
277	ptchRoot = ""
278	fileNamesFile = ""
279	results = ""
280	localTools = False
281
282	# getopt.getopt() returns:
283	#	an option/value tuple
284	#	a list of remaining non-option arguments
285	#
286	# A correct wsdiff invocation will have exactly two non option
287	# arguments, the paths to the base (old), ptch (new) proto areas
288	try:
289		optlist, args = getopt.getopt(sys.argv[1:], validOpts)
290	except getopt.error, val:
291		usage()
292
293	if len(args) != 2 :
294		usage();
295
296	for opt,val in optlist :
297		if opt == '-i' :
298			fileNamesFile = val
299		elif opt == '-r' :
300			results = val
301			logging = True
302		elif opt == '-v' :
303			vdiffs = True
304		elif opt == '-V' :
305			reportAllSects = True
306		elif opt == '-t':
307			localTools = True
308		else:
309			usage()
310
311	baseRoot = args[0]
312	ptchRoot = args[1]
313
314	if len(baseRoot) == 0 or len(ptchRoot) == 0 :
315		usage()
316
317	if logging and len(results) == 0 :
318		usage()
319
320	if vdiffs and not logging :
321		error("The -v option requires a results file (-r)")
322		sys.exit(1)
323
324	if reportAllSects and not logging :
325		error("The -V option requires a results file (-r)")
326		sys.exit(1)
327
328	# alphabetical order
329	return	baseRoot, fileNamesFile, localTools, ptchRoot, results
330
331#####
332# File identification
333#
334
335#
336# Identify the file type.
337# If it's not ELF, use the file extension to identify
338# certain file types that require special handling to
339# compare. Otherwise just return a basic "ASCII" type.
340#
341def getTheFileType(f) :
342
343	extensions = { 'a'	:	'ELF Object Archive',
344		       'jar'	:	'Java Archive',
345		       'html'	:	'HTML',
346		       'ln'	:	'Lint Library',
347		       'db'	:	'Sqlite Database' }
348
349	if os.stat(f)[ST_SIZE] == 0 :
350		return 'ASCII'
351
352	if isELF(f) == 1 :
353		return 'ELF'
354
355	fnamelist = f.split('.')
356	if len(fnamelist) > 1 :	# Test the file extension
357		extension = fnamelist[-1]
358		if extension in extensions.keys():
359			return extensions[extension]
360
361	return 'ASCII'
362
363#
364# Return non-zero if "f" is an ELF file
365#
366elfmagic = '\177ELF'
367def isELF(f) :
368	try:
369		fd = open(f)
370	except:
371		error("failed to open: " + f)
372		return 0
373	magic = fd.read(len(elfmagic))
374	fd.close()
375
376	if magic == elfmagic :
377		return 1
378	return 0
379
380#
381# Return non-zero is "f" is binary.
382# Consider the file to be binary if it contains any null characters
383#
384def isBinary(f) :
385	try:
386		fd = open(f)
387	except:
388		error("failed to open: " + f)
389		return 0
390	s = fd.read()
391	fd.close()
392
393	if s.find('\0') == -1 :
394		return 0
395	else :
396		return 1
397
398#####
399# Directory traversal and file finding
400#
401
402#
403# Return a sorted list of files found under the specified directory
404#
405def findFiles(d) :
406	for path, subdirs, files in os.walk(d) :
407		files.sort()
408		for name in files :
409			yield os.path.join(path, name)
410
411#
412# Examine all files in base, ptch
413#
414# Return a list of files appearing in both proto areas,
415# a list of new files (files found only in ptch) and
416# a list of deleted files (files found only in base)
417#
418def protoCatalog(base, ptch) :
419	compFiles = []		# List of files in both proto areas
420	ptchList = []		# List of file in patch proto area
421
422	newFiles = []		# New files detected
423	deletedFiles = []	# Deleted files
424
425	baseFilesList = list(findFiles(base))
426	baseStringLength = len(base)
427
428	ptchFilesList = list(findFiles(ptch))
429	ptchStringLength = len(ptch)
430
431	# Inventory files in the base proto area
432	for fn in baseFilesList :
433		if os.path.islink(fn) :
434			continue
435
436		fileName = fn[baseStringLength:]
437		compFiles.append(fileName)
438
439	# Inventory files in the patch proto area
440	for fn in ptchFilesList :
441		if os.path.islink(fn) :
442			continue
443
444		fileName = fn[ptchStringLength:]
445		ptchList.append(fileName)
446
447	# Deleted files appear in the base area, but not the patch area
448	for fileName in compFiles :
449		if not fileName in ptchList :
450			deletedFiles.append(fileName)
451
452	# Eliminate "deleted" files from the list of objects appearing
453	# in both the base and patch proto areas
454	for fileName in deletedFiles :
455		try:
456		       	compFiles.remove(fileName)
457		except:
458			error("filelist.remove() failed")
459
460	# New files appear in the patch area, but not the base
461	for fileName in ptchList :
462		if not fileName in compFiles :
463			newFiles.append(fileName)
464
465	return compFiles, newFiles, deletedFiles
466
467#
468# Examine the files listed in the input file list
469#
470# Return a list of files appearing in both proto areas,
471# a list of new files (files found only in ptch) and
472# a list of deleted files (files found only in base)
473#
474def flistCatalog(base, ptch, flist) :
475	compFiles = []		# List of files in both proto areas
476	newFiles = []		# New files detected
477	deletedFiles = []	# Deleted files
478
479	try:
480		fd = open(flist, "r")
481	except:
482		error("could not open: " + flist)
483		cleanup(1)
484
485	files = []
486	files = fd.readlines()
487
488	for f in files :
489		ptch_present = True
490		base_present = True
491
492		if f == '\n' :
493			continue
494
495		# the fileNames have a trailing '\n'
496		f = f.rstrip()
497
498		# The objects in the file list have paths relative
499		# to $ROOT or to the base/ptch directory specified on
500		# the command line.
501		# If it's relative to $ROOT, we'll need to add back the
502		# root_`uname -p` goo we stripped off in fnFormat()
503		if os.path.exists(base + f) :
504			fn = f;
505		elif os.path.exists(base + "root_" + arch + "/" + f) :
506			fn = "root_" + arch + "/" + f
507		else :
508			base_present = False
509
510		if base_present :
511			if not os.path.exists(ptch + fn) :
512				ptch_present = False
513		else :
514			if os.path.exists(ptch + f) :
515				fn = f
516			elif os.path.exists(ptch + "root_" + arch + "/" + f) :
517				fn = "root_" + arch + "/" + f
518			else :
519				ptch_present = False
520
521		if os.path.islink(base + fn) :	# ignore links
522			base_present = False
523		if os.path.islink(ptch + fn) :
524			ptch_present = False
525
526		if base_present and ptch_present :
527			compFiles.append(fn)
528		elif base_present :
529			deletedFiles.append(fn)
530		elif ptch_present :
531			newFiles.append(fn)
532		else :
533			if os.path.islink(base + fn) and os.path.islink(ptch + fn) :
534				continue
535			error(f + " in file list, but not in either tree. Skipping...")
536
537	return compFiles, newFiles, deletedFiles
538
539
540#
541# Build a fully qualified path to an external tool/utility.
542# Consider the default system locations. For onbld tools, if
543# the -t option was specified, we'll try to use built tools in $SRC tools,
544# and otherwise, we'll fall back on /opt/onbld/
545#
546def find_tool(tool) :
547
548	# First, check what was passed
549	if os.path.exists(tool) :
550		return tool
551
552	# Next try in wsdiff path
553	for pdir in wsdiff_path :
554		location = pdir + "/" + tool
555		if os.path.exists(location) :
556			return location + " "
557
558		location = pdir + "/" + arch + "/" + tool
559		if os.path.exists(location) :
560			return location + " "
561
562	error("Could not find path to: " + tool);
563	sys.exit(1);
564
565
566#####
567# ELF file comparison helper routines
568#
569
570#
571# Return a dictionary of ELF section types keyed by section name
572#
573def get_elfheader(f) :
574
575	header = {}
576
577	hstring = commands.getoutput(elfdump_cmd + " -c " + f)
578
579	if len(hstring) == 0 :
580		error("Failed to dump ELF header for " + f)
581		return
582
583	# elfdump(1) dumps the section headers with the section name
584	# following "sh_name:", and the section type following "sh_type:"
585	sections = hstring.split("Section Header")
586	for sect in sections :
587		datap = sect.find("sh_name:");
588		if datap == -1 :
589			continue
590		section = sect[datap:].split()[1]
591		datap = sect.find("sh_type:");
592		if datap == -1 :
593			error("Could not get type for sect: " + section + \
594			      " in " + f)
595		sh_type = sect[datap:].split()[2]
596		header[section] = sh_type
597
598	return header
599
600#
601# Extract data in the specified ELF section from the given file
602#
603def extract_elf_section(f, section) :
604
605	data = commands.getoutput(dump_cmd + " -sn " + section + " " + f)
606
607	if len(data) == 0 :
608		error(cmd + " yielded no data")
609		return
610
611	# dump(1) displays the file name to start...
612	# get past it to the data itself
613	dbegin = data.find(":") + 1
614	data = data[dbegin:];
615
616	return (data)
617
618#
619# Return a (hopefully meaningful) human readable set of diffs
620# for the specified ELF section between f1 and f2
621#
622# Depending on the section, various means for dumping and diffing
623# the data may be employed.
624#
625text_sections = [ '.text', '.init', '.fini' ]
626def diff_elf_section(f1, f2, section, sh_type) :
627
628	if (sh_type == "SHT_RELA") : # sh_type == SHT_RELA
629		cmd1 = elfdump_cmd + " -r " + f1 + " > " + tmpFile1
630		cmd2 = elfdump_cmd + " -r " + f2 + " > " + tmpFile2
631	elif (section == ".group") :
632		cmd1 = elfdump_cmd + " -g " + f1 + " > " + tmpFile1
633		cmd2 = elfdump_cmd + " -g " + f2 + " > " + tmpFile2
634	elif (section == ".hash") :
635		cmd1 = elfdump_cmd + " -h " + f1 + " > " + tmpFile1
636		cmd2 = elfdump_cmd + " -h " + f2 + " > " + tmpFile2
637	elif (section == ".dynamic") :
638		cmd1 = elfdump_cmd + " -d " + f1 + " > " + tmpFile1
639		cmd2 = elfdump_cmd + " -d " + f2 + " > " + tmpFile2
640	elif (section == ".got") :
641		cmd1 = elfdump_cmd + " -G " + f1 + " > " + tmpFile1
642		cmd2 = elfdump_cmd + " -G " + f2 + " > " + tmpFile2
643	elif (section == ".SUNW_cap") :
644		cmd1 = elfdump_cmd + " -H " + f1 + " > " + tmpFile1
645		cmd2 = elfdump_cmd + " -H " + f2 + " > " + tmpFile2
646	elif (section == ".interp") :
647		cmd1 = elfdump_cmd + " -i " + f1 + " > " + tmpFile1
648		cmd2 = elfdump_cmd + " -i " + f2 + " > " + tmpFile2
649	elif (section == ".symtab" or section == ".dynsym") :
650		cmd1 = elfdump_cmd + " -s -N " + section + " " + f1 + " > " + tmpFile1
651		cmd2 = elfdump_cmd + " -s -N " + section + " " + f2 + " > " + tmpFile2
652	elif (section in text_sections) :
653		# dis sometimes complains when it hits something it doesn't
654		# know how to disassemble. Just ignore it, as the output
655		# being generated here is human readable, and we've already
656		# correctly flagged the difference.
657		cmd1 = dis_cmd + " -t " + section + " " + f1 + \
658		       " 2>/dev/null | grep -v disassembly > " + tmpFile1
659		cmd2 = dis_cmd + " -t " + section + " " + f2 + \
660		       " 2>/dev/null | grep -v disassembly > " + tmpFile2
661	else :
662		cmd1 = elfdump_cmd + " -w " + tmpFile1 + " -N " + \
663		       section + " " + f1
664		cmd2 = elfdump_cmd + " -w " + tmpFile2 + " -N " + \
665		       section + " " + f2
666
667	os.system(cmd1)
668	os.system(cmd2)
669
670	data = diffFileData(tmpFile1, tmpFile2)
671
672	return (data)
673
674#
675# compare the relevant sections of two ELF binaries
676# and report any differences
677#
678# Returns: 1 if any differenes found
679#          0 if no differences found
680#	  -1 on error
681#
682
683# Sections deliberately not considered when comparing two ELF
684# binaries. Differences observed in these sections are not considered
685# significant where patch deliverable identification is concerned.
686sections_to_skip = [ ".SUNW_signature",
687		     ".comment",
688		     ".SUNW_ctf",
689		     ".debug",
690		     ".plt",
691		     ".rela.bss",
692		     ".rela.plt",
693		     ".line",
694		     ".note",
695		     ".compcom",
696		     ]
697
698sections_preferred = [ ".rodata.str1.8",
699		       ".rodata.str1.1",
700		       ".rodata",
701		       ".data1",
702		       ".data",
703		       ".text",
704		       ]
705
706def compareElfs(base, ptch, quiet) :
707
708	global logging
709
710	base_header = get_elfheader(base)
711 	sections = base_header.keys()
712
713	ptch_header = get_elfheader(ptch)
714	e2_only_sections = ptch_header.keys()
715
716	e1_only_sections = []
717
718	fileName = fnFormat(base)
719
720	# Derive the list of ELF sections found only in
721	# either e1 or e2.
722	for sect in sections :
723		if not sect in e2_only_sections :
724			e1_only_sections.append(sect)
725		else :
726			e2_only_sections.remove(sect)
727
728	if len(e1_only_sections) > 0 :
729		if quiet :
730			return 1
731		info(fileName);
732		if not logging :
733			return 1
734
735		slist = ""
736		for sect in e1_only_sections :
737			slist = slist + sect + "\t"
738		v_info("\nELF sections found in " + \
739		      base + " but not in " + ptch)
740		v_info("\n" + slist)
741		return 1
742
743	if len(e2_only_sections) > 0 :
744		if quiet :
745			return 1
746
747		info(fileName);
748		if not logging :
749			return 1
750
751		slist = ""
752		for sect in e2_only_sections :
753			slist = slist + sect + "\t"
754		v_info("\nELF sections found in " + \
755		      ptch + " but not in " + base)
756		v_info("\n" + slist)
757		return 1
758
759	# Look for preferred sections, and put those at the
760	# top of the list of sections to compare
761	for psect in sections_preferred :
762		if psect in sections :
763			sections.remove(psect)
764			sections.insert(0, psect)
765
766	# Compare ELF sections
767	first_section = True
768	for sect in sections :
769
770		if sect in sections_to_skip :
771			continue
772
773		s1 = extract_elf_section(base, sect);
774		s2 = extract_elf_section(ptch, sect);
775
776		if len(s1) != len (s2) or s1 != s2:
777			if not quiet:
778				sh_type = base_header[sect]
779				data = diff_elf_section(base, ptch, sect, \
780							sh_type)
781
782				# If all ELF sections are being reported, then
783				# invoke difference() to flag the file name to
784				# stdout only once. Any other section differences
785				# should be logged to the results file directly
786				if not first_section :
787					log_difference(fileName, "ELF " + sect, data)
788				else :
789					difference(fileName, "ELF " + sect, data)
790
791			if not reportAllSects :
792				return 1
793			first_section = False
794	return 0
795
796#####
797# Archive object comparison
798#
799# Returns 1 if difference detected
800#         0 if no difference detected
801#        -1 on error
802#
803def compareArchives(base, ptch, fileType) :
804
805	fileName = fnFormat(base)
806
807	# clear the temp directories
808	baseCmd = "rm -rf " + tmpDir1 + "*"
809	status, output = commands.getstatusoutput(baseCmd)
810	if status != 0 :
811		error(baseCmd + " failed: " + output)
812		return -1
813
814	ptchCmd = "rm -rf " + tmpDir2 + "*"
815	status, output = commands.getstatusoutput(ptchCmd)
816	if status != 0 :
817		error(ptchCmd + " failed: " + output)
818		return -1
819
820	#
821	# Be optimistic and first try a straight file compare
822	# as it will allow us to finish up quickly.
823	if compareBasic(base, ptch, True, fileType) == 0 :
824		return 0
825
826	# copy over the objects to the temp areas, and
827	# unpack them
828	baseCmd = "cp -fp " + base + " " + tmpDir1
829	status, output = commands.getstatusoutput(baseCmd)
830	if status != 0 :
831		error(baseCmd + " failed: " + output)
832		return -1
833
834	ptchCmd = "cp -fp " + ptch + " " + tmpDir2
835	status, output = commands.getstatusoutput(ptchCmd)
836	if status != 0 :
837		error(ptchCmd + " failed: " + output)
838		return -1
839
840	bname = string.split(fileName, '/')[-1]
841	if fileType == "Java Archive" :
842		baseCmd = "cd " + tmpDir1 + "; " + "jar xf " + bname + \
843			  "; rm -f " + bname + " META-INF/MANIFEST.MF"
844		ptchCmd = "cd " + tmpDir2 + "; " + "jar xf " + bname + \
845			  "; rm -f " + bname + " META-INF/MANIFEST.MF"
846	elif fileType == "ELF Object Archive" :
847		baseCmd = "cd " + tmpDir1 + "; " + "/usr/ccs/bin/ar x " + \
848			  bname + "; rm -f " + bname
849		ptchCmd = "cd " + tmpDir2 + "; " + "/usr/ccs/bin/ar x " + \
850			  bname + "; rm -f " + bname
851	else :
852		error("unexpected file type: " + fileType)
853		return -1
854
855	os.system(baseCmd)
856	os.system(ptchCmd)
857
858	baseFlist = list(findFiles(tmpDir1))
859	ptchFlist = list(findFiles(tmpDir2))
860
861	# Trim leading path off base/ptch file lists
862	flist = []
863	for fn in baseFlist :
864		flist.append(str_prefix_trunc(fn, tmpDir1))
865	baseFlist = flist
866
867	flist = []
868	for fn in ptchFlist :
869		flist.append(str_prefix_trunc(fn, tmpDir2))
870	ptchFlist = flist
871
872	for fn in ptchFlist :
873		if not fn in baseFlist :
874			difference(fileName, fileType, \
875				   fn + " added to " + fileName)
876			return 1
877
878	for fn in baseFlist :
879		if not fn in ptchFlist :
880			difference(fileName, fileType, \
881				   fn + " removed from " + fileName)
882			return 1
883
884		differs = compareOneFile((tmpDir1 + fn), (tmpDir2 + fn), True)
885		if differs :
886			difference(fileName, fileType, \
887				   fn + " in " + fileName + " differs")
888			return 1
889	return 0
890
891#####
892# (Basic) file comparison
893#
894# There's some special case code here for Javadoc HTML files
895#
896# Returns 1 if difference detected
897#         0 if no difference detected
898#        -1 on error
899#
900def compareBasic(base, ptch, quiet, fileType) :
901
902	fileName = fnFormat(base);
903
904	if quiet and os.stat(base)[ST_SIZE] != os.stat(ptch)[ST_SIZE] :
905		return 1
906
907	try:
908		baseFile = open(base)
909	except:
910		error("could not open " + base)
911		return -1
912	try:
913		ptchFile = open(ptch)
914	except:
915		error("could not open " + ptch)
916		return -1
917
918	baseData = baseFile.read()
919	ptchData = ptchFile.read()
920
921	baseFile.close()
922	ptchFile.close()
923
924	needToSnip = False
925	if fileType == "HTML" :
926		needToSnip = True
927		toSnipBeginStr = "<!-- Generated by javadoc"
928		toSnipEndStr = "-->\n"
929
930	if needToSnip :
931		toSnipBegin = string.find(baseData, toSnipBeginStr)
932		if toSnipBegin != -1 :
933			toSnipEnd = string.find(baseData[toSnipBegin:], \
934						toSnipEndStr) + \
935						len(toSnipEndStr)
936			baseData = baseData[:toSnipBegin] + \
937				   baseData[toSnipBegin + toSnipEnd:]
938			ptchData = ptchData[:toSnipBegin] + \
939				   ptchData[toSnipBegin + toSnipEnd:]
940
941	if quiet :
942		if baseData != ptchData :
943			return 1
944	else :
945		if len(baseData) != len(ptchData) or baseData != ptchData :
946			diffs = diffData(baseData, ptchData)
947			difference(fileName, fileType, diffs)
948			return 1
949	return 0
950
951
952#####
953# Compare two objects by producing a data dump from
954# each object, and then comparing the dump data
955#
956# Returns: 1 if a difference is detected
957#          0 if no difference detected
958#         -1 upon error
959#
960def compareByDumping(base, ptch, quiet, fileType) :
961
962	fileName = fnFormat(base);
963
964	if fileType == "Lint Library" :
965		baseCmd = lintdump_cmd + " -ir " + base + \
966			  " | grep -v LINTLIB:" + " > " + tmpFile1
967		ptchCmd = lintdump_cmd + " -ir " + ptch + \
968			  " | grep -v LINTLIB:" + " > " + tmpFile2
969	elif fileType == "Sqlite Database" :
970		baseCmd = "echo .dump | " + sqlite_cmd + base + " > " + \
971			  tmpFile1
972		ptchCmd = "echo .dump | " + sqlite_cmd + ptch + " > " + \
973			  tmpFile2
974
975	os.system(baseCmd)
976	os.system(ptchCmd)
977
978	try:
979		baseFile = open(tmpFile1)
980	except:
981		error("could not open: " + tmpFile1)
982	try:
983		ptchFile = open(tmpFile2)
984	except:
985		error("could not open: " + tmpFile2)
986
987	baseData = baseFile.read()
988	ptchData = ptchFile.read()
989
990	baseFile.close()
991	ptchFile.close()
992
993	if len(baseData) != len(ptchData) or baseData != ptchData :
994		if not quiet :
995			data = diffFileData(tmpFile1, tmpFile2);
996			difference(fileName, fileType, data)
997 		return 1
998	return 0
999
1000#####
1001# Compare two objects. Detect type changes.
1002# Vector off to the appropriate type specific
1003# compare routine based on the type.
1004#
1005def compareOneFile(base, ptch, quiet) :
1006
1007	# Verify the file types.
1008	# If they are different, indicate this and move on
1009	btype = getTheFileType(base)
1010	ptype = getTheFileType(ptch)
1011
1012	fileName = fnFormat(base)
1013
1014	if (btype != ptype) :
1015		difference(fileName, "file type", btype + " to " + ptype)
1016		return 1
1017	else :
1018		fileType = btype
1019
1020	if (fileType == 'ELF') :
1021		return compareElfs(base, ptch, quiet)
1022
1023	elif (fileType == 'Java Archive' or fileType == 'ELF Object Archive') :
1024		return compareArchives(base, ptch, fileType)
1025
1026	elif (fileType == 'HTML') :
1027		return compareBasic(base, ptch, quiet, fileType)
1028
1029	elif ( fileType == 'Lint Library' ) :
1030		return compareByDumping(base, ptch, quiet, fileType)
1031
1032	elif ( fileType == 'Sqlite Database' ) :
1033		return compareByDumping(base, ptch, quiet, fileType)
1034	else :
1035		# it has to be some variety of text file
1036		return compareBasic(base, ptch, quiet, fileType)
1037
1038# Cleanup and self-terminate
1039def cleanup(ret) :
1040
1041	if len(tmpDir1) > 0 and len(tmpDir2) > 0 :
1042
1043		baseCmd = "rm -rf " + tmpDir1
1044		ptchCmd = "rm -rf " + tmpDir2
1045
1046		os.system(baseCmd)
1047		os.system(ptchCmd)
1048
1049	if logging :
1050		log.close()
1051
1052	sys.exit(ret)
1053
1054def main() :
1055
1056	# Log file handle
1057	global log
1058
1059	# Globals relating to command line options
1060	global logging, vdiffs, reportAllSects
1061
1062	# Named temporary files / directories
1063	global tmpDir1, tmpDir2, tmpFile1, tmpFile2
1064
1065	# Command paths
1066	global lintdump_cmd, elfdump_cmd, dump_cmd, dis_cmd, od_cmd, diff_cmd, sqlite_cmd
1067
1068	# Default search path
1069	global wsdiff_path
1070
1071	# Essentially "uname -p"
1072	global arch
1073
1074	# Some globals need to be initialized
1075	logging = vdiffs = reportAllSects = False
1076
1077
1078	# Process command line arguments
1079	# Return values are returned from args() in alpha order
1080	# (Yes, python functions can return multiple values (ewww))
1081	# Note that args() also set the globals:
1082	#	logging to True if verbose logging (to a file) was enabled
1083	#	vdiffs to True if logged differences aren't to be truncated
1084	#	reportAllSects to True if all ELF section differences are to be reported
1085	#
1086	baseRoot, fileNamesFile, localTools, ptchRoot, results = args()
1087
1088	#
1089	# Set up the results/log file
1090	#
1091	if logging :
1092		try:
1093			log = open(results, "w")
1094		except:
1095			logging = False
1096			error("failed to open log file: " + log)
1097			sys.exit(1)
1098
1099		dateTimeStr= "# %d/%d/%d at %d:%d:%d" % time.localtime()[:6]
1100		v_info("# This file was produced by wsdiff")
1101		v_info(dateTimeStr)
1102
1103	#
1104	# Build paths to the tools required tools
1105	#
1106	# Try to look for tools in $SRC/tools if the "-t" option
1107	# was specified
1108	#
1109	arch = commands.getoutput("uname -p")
1110	if localTools :
1111		try:
1112			src = os.environ['SRC']
1113		except:
1114			error("-t specified, but $SRC not set. Cannot find $SRC/tools")
1115			src = ""
1116		if len(src) > 0 :
1117			wsdiff_path.insert(0, src + "/tools/proto/opt/onbld/bin")
1118
1119	lintdump_cmd = find_tool("lintdump")
1120	elfdump_cmd = find_tool("elfdump")
1121	dump_cmd = find_tool("dump")
1122	od_cmd = find_tool("od")
1123	dis_cmd = find_tool("dis")
1124	diff_cmd = find_tool("diff")
1125	sqlite_cmd = find_tool("sqlite")
1126
1127	#
1128	# validate the base and patch paths
1129	#
1130	if baseRoot[-1] != '/' :
1131		baseRoot += '/'
1132
1133	if ptchRoot[-1] != '/' :
1134		ptchRoot += '/'
1135
1136	if not os.path.exists(baseRoot) :
1137		error("old proto area: " + baseRoot + " does not exist")
1138		sys.exit(1)
1139
1140	if not os.path.exists(ptchRoot) :
1141		error("new proto area: " + ptchRoot + \
1142		      " does not exist")
1143		sys.exit(1)
1144
1145	#
1146	# log some information identifying the run
1147	#
1148	v_info("Old proto area: " + baseRoot)
1149	v_info("New proto area: " + ptchRoot)
1150	v_info("Results file: " + results + "\n")
1151
1152	#
1153	# Set up the temporary directories / files
1154	# Could use python's tmpdir routines, but these should
1155	# be easier to identify / keep around for debugging
1156	pid = os.getpid()
1157	tmpDir1 = "/tmp/wsdiff_tmp1_" + str(pid) + "/"
1158	tmpDir2 = "/tmp/wsdiff_tmp2_" + str(pid) + "/"
1159	if not os.path.exists(tmpDir1) :
1160		os.makedirs(tmpDir1)
1161	if not os.path.exists(tmpDir2) :
1162		os.makedirs(tmpDir2)
1163
1164	tmpFile1 = tmpDir1 + "f1"
1165	tmpFile2 = tmpDir2 + "f2"
1166
1167	# Derive a catalog of new, deleted, and to-be-compared objects
1168	# either from the specified base and patch proto areas, or from
1169	# from an input file list
1170	newOrDeleted = False
1171
1172	if fileNamesFile != "" :
1173		changedFiles, newFiles, deletedFiles = \
1174			      flistCatalog(baseRoot, ptchRoot, fileNamesFile)
1175	else :
1176		changedFiles, newFiles, deletedFiles = protoCatalog(baseRoot, ptchRoot)
1177
1178	if len(newFiles) > 0 :
1179		newOrDeleted = True
1180		info("\nNew objects found: ")
1181
1182		for fn in newFiles :
1183			info(fnFormat(fn))
1184
1185	if len(deletedFiles) > 0 :
1186		newOrDeleted = True
1187		info("\nObjects removed: ")
1188
1189		for fn in deletedFiles :
1190			info(fnFormat(fn))
1191
1192	if newOrDeleted :
1193		info("\nChanged objects: ");
1194
1195
1196	# Here's where all the heavy lifting happens
1197	# Perform a comparison on each object appearing in
1198	# both proto areas. compareOneFile will examine the
1199	# file types of each object, and will vector off to
1200	# the appropriate comparison routine, where the compare
1201	# will happen, and any differences will be reported / logged
1202	for fn in changedFiles :
1203		base = baseRoot + fn
1204		ptch = ptchRoot + fn
1205
1206		compareOneFile(base, ptch, False)
1207
1208	# We're done, cleanup.
1209	cleanup(0)
1210
1211if __name__ == '__main__' :
1212	try:
1213		main()
1214	except KeyboardInterrupt :
1215		cleanup(1);
1216
1217
1218