xref: /titanic_41/usr/src/tools/scripts/wsdiff.py (revision f63f7506be0210195779706f51c58646e568cc40)
1#!/usr/sfw/bin/python
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or http://www.opensolaris.org/os/licensing.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23# Use is subject to license terms.
24#
25#ident	"%Z%%M%	%I%	%E% SMI"
26
27#
28# wsdiff(1) is a tool that can be used to determine which compiled objects
29# have changed as a result of a given source change. Developers backporting
30# new features, RFEs and bug fixes need to be able to identify the set of
31# patch deliverables necessary for feature/fix realization on a patched system.
32#
33# The tool works by comparing objects in two trees/proto areas (one build with,
34# and without the source changes.
35#
36# Using wsdiff(1) is fairly simple:
37#	- Bringover to a fresh workspace
38#	- Perform a full non-debug build (clobber if workspace isn't fresh)
39#	- Move the proto area aside, call it proto.old, or something.
40#	- Integrate your changes to the workspace
41#	- Perform another full non-debug clobber build.
42#	- Use wsdiff(1) to see what changed:
43#		$ wsdiff proto.old proto
44#
45# By default, wsdiff will print the list of changed objects / deliverables to
46# stdout. If a results file is specified via -r, the list of differing objects,
47# and details about why wsdiff(1) thinks they are different will be logged to
48# the results file.
49#
50# By invoking nightly(1) with the -w option to NIGHTLY_FLAGS, nightly(1) will use
51# wsdiff(1) to report on what objects changed since the last build.
52#
53# For patch deliverable purposes, it's advised to have nightly do a clobber,
54# non-debug build.
55#
56# Think about the results. Was something flagged that you don't expect? Go look
57# at the results file to see details about the differences.
58#
59# Use the -i option in conjunction with -v and -V to dive deeper and have wsdiff(1)
60# report with more verbosity.
61#
62# Usage: wsdiff [-vVt] [-r results ] [-i filelist ] old new
63#
64# Where "old" is the path to the proto area build without the changes, and
65# "new" is the path to the proto area built with the changes. The following
66# options are supported:
67#
68#        -v      Do not truncate observed diffs in results
69#        -V      Log *all* ELF sect diffs vs. logging the first diff found
70#        -t      Use onbld tools in $SRC/tools
71#        -r      Log results and observed differences
72#        -i      Tell wsdiff which objects to compare via an input file list
73
74import datetime, fnmatch, getopt, profile, os, popen2, commands
75import re, select, string, struct, sys, tempfile, time
76from stat import *
77
78# Human readable diffs truncated by default if longer than this
79# Specifying -v on the command line will override
80diffs_sz_thresh = 4096
81
82# Default search path for wsdiff
83wsdiff_path = [ "/usr/bin",
84		"/usr/ccs/bin",
85		"/lib/svc/bin",
86		"/opt/onbld/bin" ]
87
88# These are objects that wsdiff will notice look different, but will not report.
89# Existence of an exceptions list, and adding things here is *dangerous*,
90# and therefore the *only* reasons why anything would be listed here is because
91# the objects do not build deterministically, yet we *cannot* fix this.
92#
93# These perl libraries use __DATE__ and therefore always look different.
94# Ideally, we would purge use the use of __DATE__ from the source, but because
95# this is source we wish to distribute with Solaris "unchanged", we cannot modify.
96#
97wsdiff_exceptions = [ "usr/perl5/5.8.4/lib/sun4-solaris-64int/CORE/libperl.so.1",
98		      "usr/perl5/5.6.1/lib/sun4-solaris-64int/CORE/libperl.so.1",
99		      "usr/perl5/5.8.4/lib/i86pc-solaris-64int/CORE/libperl.so.1",
100		      "usr/perl5/5.6.1/lib/i86pc-solaris-64int/CORE/libperl.so.1"
101		      ]
102
103#####
104# Logging routines
105#
106
107# Informational message to be printed to the screen, and the log file
108def info(msg) :
109
110	print >> sys.stdout, msg
111	if logging :
112		print >> log, msg
113	sys.stdout.flush()
114
115# Error message to be printed to the screen, and the log file
116def error(msg) :
117
118	print >> sys.stderr, "ERROR:", msg
119	sys.stderr.flush()
120	if logging :
121		print >> log, "ERROR:", msg
122		log.flush()
123
124# Informational message to be printed only to the log, if there is one.
125def v_info(msg) :
126
127	if logging :
128		print >> log, msg
129		log.flush()
130
131#
132# Flag a detected file difference
133# Display the fileName to stdout, and log the difference
134#
135def difference(f, dtype, diffs) :
136
137	if f in wsdiff_exceptions :
138		return
139
140	print >> sys.stdout, f
141	sys.stdout.flush()
142
143	log_difference(f, dtype, diffs)
144
145#
146# Do the actual logging of the difference to the results file
147#
148def log_difference(f, dtype, diffs) :
149	if logging :
150		print >> log, f
151		print >> log, "NOTE:", dtype, "difference detected."
152
153		difflen = len(diffs)
154		if difflen > 0 :
155			print >> log
156
157			if not vdiffs and difflen > diffs_sz_thresh :
158				print >> log, diffs[:diffs_sz_thresh]
159				print >> log, \
160				      "... truncated due to length: " \
161				      "use -v to override ..."
162			else :
163				print >> log, diffs
164			print >> log, "\n"
165		log.flush()
166
167
168#####
169# diff generating routines
170#
171
172#
173# Return human readable diffs from two temporary files
174#
175def diffFileData(tmpf1, tmpf2) :
176
177	# Filter the data through od(1) if the data is detected
178	# as being binary
179	if isBinary(tmpf1) or isBinary(tmpf2) :
180		tmp_od1 = tmpf1 + ".od"
181		tmp_od2 = tmpf2 + ".od"
182
183		cmd = od_cmd + " -c -t x4" + " " + tmpf1 + " > " + tmp_od1
184		os.system(cmd)
185		cmd = od_cmd + " -c -t x4" + " " + tmpf2 + " > " + tmp_od2
186		os.system(cmd)
187
188		tmpf1 = tmp_od1
189		tmpf2 = tmp_od2
190
191	data = commands.getoutput(diff_cmd + " " + tmpf1 + " " + tmpf2)
192
193	return data
194
195#
196# Return human readable diffs betweeen two datasets
197#
198def diffData(d1, d2) :
199
200	global tmpFile1
201	global tmpFile2
202
203	try:
204		fd1 = open(tmpFile1, "w")
205	except:
206		error("failed to open: " + tmpFile1)
207		cleanup(1)
208	try:
209		fd2 = open(tmpFile2, "w")
210	except:
211		error("failed to open: " + tmpFile2)
212		cleanup(1)
213
214	fd1.write(d1)
215	fd2.write(d2)
216	fd1.close()
217	fd2.close()
218
219	return diffFileData(tmpFile1, tmpFile2)
220
221#####
222# Misc utility functions
223#
224
225# Prune off the leading prefix from string s
226def str_prefix_trunc(s, prefix) :
227	snipLen = len(prefix)
228	return s[snipLen:]
229
230#
231# Prune off leading proto path goo (if there is one) to yield
232# the deliverable's eventual path relative to root
233# e.g. proto.base/root_sparc/usr/src/cmd/prstat => usr/src/cmd/prstat
234#
235def fnFormat(fn) :
236	root_arch_str = "root_" + arch
237
238	pos = fn.find(root_arch_str)
239	if pos == -1 :
240		return fn
241
242	pos = fn.find("/", pos)
243	if pos == -1 :
244		return fn
245
246	return fn[pos + 1:]
247
248#####
249# Usage / argument processing
250#
251
252#
253# Display usage message
254#
255def usage() :
256	sys.stdout.flush()
257	print >> sys.stderr, """Usage: wsdiff [-vVt] [-r results ] [-i filelist ] old new
258        -v      Do not truncate observed diffs in results
259        -V      Log *all* ELF sect diffs vs. logging the first diff found
260        -t      Use onbld tools in $SRC/tools
261        -r      Log results and observed differences
262        -i      Tell wsdiff which objects to compare via an input file list"""
263	sys.exit(1)
264
265#
266# Process command line options
267#
268def args() :
269
270	global logging
271	global vdiffs
272	global reportAllSects
273
274	validOpts = 'i:r:vVt?'
275
276	baseRoot = ""
277	ptchRoot = ""
278	fileNamesFile = ""
279	results = ""
280	localTools = False
281
282	# getopt.getopt() returns:
283	#	an option/value tuple
284	#	a list of remaining non-option arguments
285	#
286	# A correct wsdiff invocation will have exactly two non option
287	# arguments, the paths to the base (old), ptch (new) proto areas
288	try:
289		optlist, args = getopt.getopt(sys.argv[1:], validOpts)
290	except getopt.error, val:
291		usage()
292
293	if len(args) != 2 :
294		usage();
295
296	for opt,val in optlist :
297		if opt == '-i' :
298			fileNamesFile = val
299		elif opt == '-r' :
300			results = val
301			logging = True
302		elif opt == '-v' :
303			vdiffs = True
304		elif opt == '-V' :
305			reportAllSects = True
306		elif opt == '-t':
307			localTools = True
308		else:
309			usage()
310
311	baseRoot = args[0]
312	ptchRoot = args[1]
313
314	if len(baseRoot) == 0 or len(ptchRoot) == 0 :
315		usage()
316
317	if logging and len(results) == 0 :
318		usage()
319
320	if vdiffs and not logging :
321		error("The -v option requires a results file (-r)")
322		sys.exit(1)
323
324	if reportAllSects and not logging :
325		error("The -V option requires a results file (-r)")
326		sys.exit(1)
327
328	# alphabetical order
329	return	baseRoot, fileNamesFile, localTools, ptchRoot, results
330
331#####
332# File identification
333#
334
335#
336# Identify the file type.
337# If it's not ELF, use the file extension to identify
338# certain file types that require special handling to
339# compare. Otherwise just return a basic "ASCII" type.
340#
341def getTheFileType(f) :
342
343	extensions = { 'a'	:	'ELF Object Archive',
344		       'jar'	:	'Java Archive',
345		       'html'	:	'HTML',
346		       'ln'	:	'Lint Library',
347		       'db'	:	'Sqlite Database' }
348
349	if os.stat(f)[ST_SIZE] == 0 :
350		return 'ASCII'
351
352	if isELF(f) == 1 :
353		return 'ELF'
354
355	fnamelist = f.split('.')
356	if len(fnamelist) > 1 :	# Test the file extension
357		extension = fnamelist[-1]
358		if extension in extensions.keys():
359			return extensions[extension]
360
361	return 'ASCII'
362
363#
364# Return non-zero if "f" is an ELF file
365#
366elfmagic = '\177ELF'
367def isELF(f) :
368	try:
369		fd = open(f)
370	except:
371		error("failed to open: " + f)
372		return 0
373	magic = fd.read(len(elfmagic))
374	fd.close()
375
376	if magic == elfmagic :
377		return 1
378	return 0
379
380#
381# Return non-zero is "f" is binary.
382# Consider the file to be binary if it contains any null characters
383#
384def isBinary(f) :
385	try:
386		fd = open(f)
387	except:
388		error("failed to open: " + f)
389		return 0
390	s = fd.read()
391	fd.close()
392
393	if s.find('\0') == -1 :
394		return 0
395	else :
396		return 1
397
398#####
399# Directory traversal and file finding
400#
401
402#
403# Return a sorted list of files found under the specified directory
404#
405def findFiles(d) :
406	for path, subdirs, files in os.walk(d) :
407		files.sort()
408		for name in files :
409			yield os.path.join(path, name)
410
411#
412# Examine all files in base, ptch
413#
414# Return a list of files appearing in both proto areas,
415# a list of new files (files found only in ptch) and
416# a list of deleted files (files found only in base)
417#
418def protoCatalog(base, ptch) :
419	compFiles = []		# List of files in both proto areas
420	ptchList = []		# List of file in patch proto area
421
422	newFiles = []		# New files detected
423	deletedFiles = []	# Deleted files
424
425	baseFilesList = list(findFiles(base))
426	baseStringLength = len(base)
427
428	ptchFilesList = list(findFiles(ptch))
429	ptchStringLength = len(ptch)
430
431	# Inventory files in the base proto area
432	for fn in baseFilesList :
433		if os.path.islink(fn) :
434			continue
435
436		fileName = fn[baseStringLength:]
437		compFiles.append(fileName)
438
439	# Inventory files in the patch proto area
440	for fn in ptchFilesList :
441		if os.path.islink(fn) :
442			continue
443
444		fileName = fn[ptchStringLength:]
445		ptchList.append(fileName)
446
447	# Deleted files appear in the base area, but not the patch area
448	for fileName in compFiles :
449		if not fileName in ptchList :
450			deletedFiles.append(fileName)
451
452	# Eliminate "deleted" files from the list of objects appearing
453	# in both the base and patch proto areas
454	for fileName in deletedFiles :
455		try:
456		       	compFiles.remove(fileName)
457		except:
458			error("filelist.remove() failed")
459
460	# New files appear in the patch area, but not the base
461	for fileName in ptchList :
462		if not fileName in compFiles :
463			newFiles.append(fileName)
464
465	return compFiles, newFiles, deletedFiles
466
467#
468# Examine the files listed in the input file list
469#
470# Return a list of files appearing in both proto areas,
471# a list of new files (files found only in ptch) and
472# a list of deleted files (files found only in base)
473#
474def flistCatalog(base, ptch, flist) :
475	compFiles = []		# List of files in both proto areas
476	newFiles = []		# New files detected
477	deletedFiles = []	# Deleted files
478
479	try:
480		fd = open(flist, "r")
481	except:
482		error("could not open: " + flist)
483		cleanup(1)
484
485	files = []
486	files = fd.readlines()
487
488	for f in files :
489		ptch_present = True
490		base_present = True
491
492		if f == '\n' :
493			continue
494
495		# the fileNames have a trailing '\n'
496		f = f.rstrip()
497
498		# The objects in the file list have paths relative
499		# to $ROOT or to the base/ptch directory specified on
500		# the command line.
501		# If it's relative to $ROOT, we'll need to add back the
502		# root_`uname -p` goo we stripped off in fnFormat()
503		if os.path.exists(base + f) :
504			fn = f;
505		elif os.path.exists(base + "root_" + arch + "/" + f) :
506			fn = "root_" + arch + "/" + f
507		else :
508			base_present = False
509
510		if base_present :
511			if not os.path.exists(ptch + fn) :
512				ptch_present = False
513		else :
514			if os.path.exists(ptch + f) :
515				fn = f
516			elif os.path.exists(ptch + "root_" + arch + "/" + f) :
517				fn = "root_" + arch + "/" + f
518			else :
519				ptch_present = False
520
521		if os.path.islink(base + fn) :	# ignore links
522			base_present = False
523		if os.path.islink(ptch + fn) :
524			ptch_present = False
525
526		if base_present and ptch_present :
527			compFiles.append(fn)
528		elif base_present :
529			deletedFiles.append(fn)
530		elif ptch_present :
531			newFiles.append(fn)
532		else :
533			if os.path.islink(base + fn) and os.path.islink(ptch + fn) :
534				continue
535			error(f + " in file list, but not in either tree. Skipping...")
536
537	return compFiles, newFiles, deletedFiles
538
539
540#
541# Build a fully qualified path to an external tool/utility.
542# Consider the default system locations. For onbld tools, if
543# the -t option was specified, we'll try to use built tools in $SRC tools,
544# and otherwise, we'll fall back on /opt/onbld/
545#
546def find_tool(tool) :
547
548	# First, check what was passed
549	if os.path.exists(tool) :
550		return tool
551
552	# Next try in wsdiff path
553	for pdir in wsdiff_path :
554		location = pdir + "/" + tool
555		if os.path.exists(location) :
556			return location + " "
557
558		location = pdir + "/" + arch + "/" + tool
559		if os.path.exists(location) :
560			return location + " "
561
562	error("Could not find path to: " + tool);
563	sys.exit(1);
564
565
566#####
567# ELF file comparison helper routines
568#
569
570#
571# Return a dictionary of ELF section types keyed by section name
572#
573def get_elfheader(f) :
574
575	header = {}
576
577	hstring = commands.getoutput(elfdump_cmd + " -c " + f)
578
579	if len(hstring) == 0 :
580		error("Failed to dump ELF header for " + f)
581		return
582
583	# elfdump(1) dumps the section headers with the section name
584	# following "sh_name:", and the section type following "sh_type:"
585	sections = hstring.split("Section Header")
586	for sect in sections :
587		datap = sect.find("sh_name:");
588		if datap == -1 :
589			continue
590		section = sect[datap:].split()[1]
591		datap = sect.find("sh_type:");
592		if datap == -1 :
593			error("Could not get type for sect: " + section + \
594			      " in " + f)
595		sh_type = sect[datap:].split()[2]
596		header[section] = sh_type
597
598	return header
599
600#
601# Extract data in the specified ELF section from the given file
602#
603def extract_elf_section(f, section) :
604
605	data = commands.getoutput(dump_cmd + " -sn " + section + " " + f)
606
607	if len(data) == 0 :
608		error(cmd + " yielded no data")
609		return
610
611	# dump(1) displays the file name to start...
612	# get past it to the data itself
613	dbegin = data.find(":") + 1
614	data = data[dbegin:];
615
616	return (data)
617
618#
619# Return a (hopefully meaningful) human readable set of diffs
620# for the specified ELF section between f1 and f2
621#
622# Depending on the section, various means for dumping and diffing
623# the data may be employed.
624#
625text_sections = [ '.text', '.init', '.fini' ]
626def diff_elf_section(f1, f2, section, sh_type) :
627
628	if (sh_type == "SHT_RELA") : # sh_type == SHT_RELA
629		cmd1 = elfdump_cmd + " -r " + f1 + " > " + tmpFile1
630		cmd2 = elfdump_cmd + " -r " + f2 + " > " + tmpFile2
631	elif (section == ".group") :
632		cmd1 = elfdump_cmd + " -g " + f1 + " > " + tmpFile1
633		cmd2 = elfdump_cmd + " -g " + f2 + " > " + tmpFile2
634	elif (section == ".hash") :
635		cmd1 = elfdump_cmd + " -h " + f1 + " > " + tmpFile1
636		cmd2 = elfdump_cmd + " -h " + f2 + " > " + tmpFile2
637	elif (section == ".dynamic") :
638		cmd1 = elfdump_cmd + " -d " + f1 + " > " + tmpFile1
639		cmd2 = elfdump_cmd + " -d " + f2 + " > " + tmpFile2
640	elif (section == ".got") :
641		cmd1 = elfdump_cmd + " -G " + f1 + " > " + tmpFile1
642		cmd2 = elfdump_cmd + " -G " + f2 + " > " + tmpFile2
643	elif (section == ".SUNW_cap") :
644		cmd1 = elfdump_cmd + " -H " + f1 + " > " + tmpFile1
645		cmd2 = elfdump_cmd + " -H " + f2 + " > " + tmpFile2
646	elif (section == ".interp") :
647		cmd1 = elfdump_cmd + " -i " + f1 + " > " + tmpFile1
648		cmd2 = elfdump_cmd + " -i " + f2 + " > " + tmpFile2
649	elif (section == ".symtab" or section == ".dynsym") :
650		cmd1 = elfdump_cmd + " -s -N " + section + " " + f1 + " > " + tmpFile1
651		cmd2 = elfdump_cmd + " -s -N " + section + " " + f2 + " > " + tmpFile2
652	elif (section in text_sections) :
653		# dis sometimes complains when it hits something it doesn't
654		# know how to disassemble. Just ignore it, as the output
655		# being generated here is human readable, and we've already
656		# correctly flagged the difference.
657		cmd1 = dis_cmd + " -t " + section + " " + f1 + \
658		       " 2>/dev/null | grep -v disassembly > " + tmpFile1
659		cmd2 = dis_cmd + " -t " + section + " " + f2 + \
660		       " 2>/dev/null | grep -v disassembly > " + tmpFile2
661	else :
662		cmd1 = elfdump_cmd + " -w " + tmpFile1 + " -N " + \
663		       section + " " + f1
664		cmd2 = elfdump_cmd + " -w " + tmpFile2 + " -N " + \
665		       section + " " + f2
666
667	os.system(cmd1)
668	os.system(cmd2)
669
670	data = diffFileData(tmpFile1, tmpFile2)
671
672	return (data)
673
674#
675# compare the relevant sections of two ELF binaries
676# and report any differences
677#
678# Returns: 1 if any differenes found
679#          0 if no differences found
680#	  -1 on error
681#
682
683# Sections deliberately not considered when comparing two ELF
684# binaries. Differences observed in these sections are not considered
685# significant where patch deliverable identification is concerned.
686sections_to_skip = [ ".SUNW_signature",
687		     ".comment",
688		     ".SUNW_ctf",
689		     ".debug",
690		     ".plt",
691		     ".rela.bss",
692		     ".rela.plt",
693		     ".line",
694		     ".note",
695		     ]
696
697sections_preferred = [ ".rodata.str1.8",
698		       ".rodata.str1.1",
699		       ".rodata",
700		       ".data1",
701		       ".data",
702		       ".text",
703		       ]
704
705def compareElfs(base, ptch, quiet) :
706
707	global logging
708
709	base_header = get_elfheader(base)
710 	sections = base_header.keys()
711
712	ptch_header = get_elfheader(ptch)
713	e2_only_sections = ptch_header.keys()
714
715	e1_only_sections = []
716
717	fileName = fnFormat(base)
718
719	# Derive the list of ELF sections found only in
720	# either e1 or e2.
721	for sect in sections :
722		if not sect in e2_only_sections :
723			e1_only_sections.append(sect)
724		else :
725			e2_only_sections.remove(sect)
726
727	if len(e1_only_sections) > 0 :
728		if quiet :
729			return 1
730		info(fileName);
731		if not logging :
732			return 1
733
734		slist = ""
735		for sect in e1_only_sections :
736			slist = slist + sect + "\t"
737		v_info("\nELF sections found in " + \
738		      base + " but not in " + ptch)
739		v_info("\n" + slist)
740		return 1
741
742	if len(e2_only_sections) > 0 :
743		if quiet :
744			return 1
745
746		info(fileName);
747		if not logging :
748			return 1
749
750		slist = ""
751		for sect in e2_only_sections :
752			slist = slist + sect + "\t"
753		v_info("\nELF sections found in " + \
754		      ptch + " but not in " + base)
755		v_info("\n" + slist)
756		return 1
757
758	# Look for preferred sections, and put those at the
759	# top of the list of sections to compare
760	for psect in sections_preferred :
761		if psect in sections :
762			sections.remove(psect)
763			sections.insert(0, psect)
764
765	# Compare ELF sections
766	first_section = True
767	for sect in sections :
768
769		if sect in sections_to_skip :
770			continue
771
772		s1 = extract_elf_section(base, sect);
773		s2 = extract_elf_section(ptch, sect);
774
775		if len(s1) != len (s2) or s1 != s2:
776			if not quiet:
777				sh_type = base_header[sect]
778				data = diff_elf_section(base, ptch, sect, \
779							sh_type)
780
781				# If all ELF sections are being reported, then
782				# invoke difference() to flag the file name to
783				# stdout only once. Any other section differences
784				# should be logged to the results file directly
785				if not first_section :
786					log_difference(fileName, "ELF " + sect, data)
787				else :
788					difference(fileName, "ELF " + sect, data)
789
790			if not reportAllSects :
791				return 1
792			first_section = False
793	return 0
794
795#####
796# Archive object comparison
797#
798# Returns 1 if difference detected
799#         0 if no difference detected
800#        -1 on error
801#
802def compareArchives(base, ptch, fileType) :
803
804	fileName = fnFormat(base)
805
806	# clear the temp directories
807	baseCmd = "rm -rf " + tmpDir1 + "*"
808	status, output = commands.getstatusoutput(baseCmd)
809	if status != 0 :
810		error(baseCmd + " failed: " + output)
811		return -1
812
813	ptchCmd = "rm -rf " + tmpDir2 + "*"
814	status, output = commands.getstatusoutput(ptchCmd)
815	if status != 0 :
816		error(ptchCmd + " failed: " + output)
817		return -1
818
819	#
820	# Be optimistic and first try a straight file compare
821	# as it will allow us to finish up quickly.
822	if compareBasic(base, ptch, True, fileType) == 0 :
823		return 0
824
825	# copy over the objects to the temp areas, and
826	# unpack them
827	baseCmd = "cp -fp " + base + " " + tmpDir1
828	status, output = commands.getstatusoutput(baseCmd)
829	if status != 0 :
830		error(baseCmd + " failed: " + output)
831		return -1
832
833	ptchCmd = "cp -fp " + ptch + " " + tmpDir2
834	status, output = commands.getstatusoutput(ptchCmd)
835	if status != 0 :
836		error(ptchCmd + " failed: " + output)
837		return -1
838
839	bname = string.split(fileName, '/')[-1]
840	if fileType == "Java Archive" :
841		baseCmd = "cd " + tmpDir1 + "; " + "jar xf " + bname + \
842			  "; rm -f " + bname + " META-INF/MANIFEST.MF"
843		ptchCmd = "cd " + tmpDir2 + "; " + "jar xf " + bname + \
844			  "; rm -f " + bname + " META-INF/MANIFEST.MF"
845	elif fileType == "ELF Object Archive" :
846		baseCmd = "cd " + tmpDir1 + "; " + "/usr/ccs/bin/ar x " + \
847			  bname + "; rm -f " + bname
848		ptchCmd = "cd " + tmpDir2 + "; " + "/usr/ccs/bin/ar x " + \
849			  bname + "; rm -f " + bname
850	else :
851		error("unexpected file type: " + fileType)
852		return -1
853
854	os.system(baseCmd)
855	os.system(ptchCmd)
856
857	baseFlist = list(findFiles(tmpDir1))
858	ptchFlist = list(findFiles(tmpDir2))
859
860	# Trim leading path off base/ptch file lists
861	flist = []
862	for fn in baseFlist :
863		flist.append(str_prefix_trunc(fn, tmpDir1))
864	baseFlist = flist
865
866	flist = []
867	for fn in ptchFlist :
868		flist.append(str_prefix_trunc(fn, tmpDir2))
869	ptchFlist = flist
870
871	for fn in ptchFlist :
872		if not fn in baseFlist :
873			difference(fileName, fileType, \
874				   fn + " added to " + fileName)
875			return 1
876
877	for fn in baseFlist :
878		if not fn in ptchFlist :
879			difference(fileName, fileType, \
880				   fn + " removed from " + fileName)
881			return 1
882
883		differs = compareOneFile((tmpDir1 + fn), (tmpDir2 + fn), True)
884		if differs :
885			difference(fileName, fileType, \
886				   fn + " in " + fileName + " differs")
887			return 1
888	return 0
889
890#####
891# (Basic) file comparison
892#
893# There's some special case code here for Javadoc HTML files
894#
895# Returns 1 if difference detected
896#         0 if no difference detected
897#        -1 on error
898#
899def compareBasic(base, ptch, quiet, fileType) :
900
901	fileName = fnFormat(base);
902
903	if quiet and os.stat(base)[ST_SIZE] != os.stat(ptch)[ST_SIZE] :
904		return 1
905
906	try:
907		baseFile = open(base)
908	except:
909		error("could not open " + base)
910		return -1
911	try:
912		ptchFile = open(ptch)
913	except:
914		error("could not open " + ptch)
915		return -1
916
917	baseData = baseFile.read()
918	ptchData = ptchFile.read()
919
920	baseFile.close()
921	ptchFile.close()
922
923	needToSnip = False
924	if fileType == "HTML" :
925		needToSnip = True
926		toSnipBeginStr = "<!-- Generated by javadoc"
927		toSnipEndStr = "-->\n"
928
929	if needToSnip :
930		toSnipBegin = string.find(baseData, toSnipBeginStr)
931		if toSnipBegin != -1 :
932			toSnipEnd = string.find(baseData[toSnipBegin:], \
933						toSnipEndStr) + \
934						len(toSnipEndStr)
935			baseData = baseData[:toSnipBegin] + \
936				   baseData[toSnipBegin + toSnipEnd:]
937			ptchData = ptchData[:toSnipBegin] + \
938				   ptchData[toSnipBegin + toSnipEnd:]
939
940	if quiet :
941		if baseData != ptchData :
942			return 1
943	else :
944		if len(baseData) != len(ptchData) or baseData != ptchData :
945			diffs = diffData(baseData, ptchData)
946			difference(fileName, fileType, diffs)
947			return 1
948	return 0
949
950
951#####
952# Compare two objects by producing a data dump from
953# each object, and then comparing the dump data
954#
955# Returns: 1 if a difference is detected
956#          0 if no difference detected
957#         -1 upon error
958#
959def compareByDumping(base, ptch, quiet, fileType) :
960
961	fileName = fnFormat(base);
962
963	if fileType == "Lint Library" :
964		baseCmd = lintdump_cmd + " -ir " + base + \
965			  " | grep -v LINTLIB:" + " > " + tmpFile1
966		ptchCmd = lintdump_cmd + " -ir " + ptch + \
967			  " | grep -v LINTLIB:" + " > " + tmpFile2
968	elif fileType == "Sqlite Database" :
969		baseCmd = "echo .dump | " + sqlite_cmd + base + " > " + \
970			  tmpFile1
971		ptchCmd = "echo .dump | " + sqlite_cmd + ptch + " > " + \
972			  tmpFile2
973
974	os.system(baseCmd)
975	os.system(ptchCmd)
976
977	try:
978		baseFile = open(tmpFile1)
979	except:
980		error("could not open: " + tmpFile1)
981	try:
982		ptchFile = open(tmpFile2)
983	except:
984		error("could not open: " + tmpFile2)
985
986	baseData = baseFile.read()
987	ptchData = ptchFile.read()
988
989	baseFile.close()
990	ptchFile.close()
991
992	if len(baseData) != len(ptchData) or baseData != ptchData :
993		if not quiet :
994			data = diffFileData(tmpFile1, tmpFile2);
995			difference(fileName, fileType, data)
996 		return 1
997	return 0
998
999#####
1000# Compare two objects. Detect type changes.
1001# Vector off to the appropriate type specific
1002# compare routine based on the type.
1003#
1004def compareOneFile(base, ptch, quiet) :
1005
1006	# Verify the file types.
1007	# If they are different, indicate this and move on
1008	btype = getTheFileType(base)
1009	ptype = getTheFileType(ptch)
1010
1011	fileName = fnFormat(base)
1012
1013	if (btype != ptype) :
1014		difference(fileName, "file type", btype + " to " + ptype)
1015		return 1
1016	else :
1017		fileType = btype
1018
1019	if (fileType == 'ELF') :
1020		return compareElfs(base, ptch, quiet)
1021
1022	elif (fileType == 'Java Archive' or fileType == 'ELF Object Archive') :
1023		return compareArchives(base, ptch, fileType)
1024
1025	elif (fileType == 'HTML') :
1026		return compareBasic(base, ptch, quiet, fileType)
1027
1028	elif ( fileType == 'Lint Library' ) :
1029		return compareByDumping(base, ptch, quiet, fileType)
1030
1031	elif ( fileType == 'Sqlite Database' ) :
1032		return compareByDumping(base, ptch, quiet, fileType)
1033	else :
1034		# it has to be some variety of text file
1035		return compareBasic(base, ptch, quiet, fileType)
1036
1037# Cleanup and self-terminate
1038def cleanup(ret) :
1039
1040	if len(tmpDir1) > 0 and len(tmpDir2) > 0 :
1041
1042		baseCmd = "rm -rf " + tmpDir1
1043		ptchCmd = "rm -rf " + tmpDir2
1044
1045		os.system(baseCmd)
1046		os.system(ptchCmd)
1047
1048	if logging :
1049		log.close()
1050
1051	sys.exit(ret)
1052
1053def main() :
1054
1055	# Log file handle
1056	global log
1057
1058	# Globals relating to command line options
1059	global logging, vdiffs, reportAllSects
1060
1061	# Named temporary files / directories
1062	global tmpDir1, tmpDir2, tmpFile1, tmpFile2
1063
1064	# Command paths
1065	global lintdump_cmd, elfdump_cmd, dump_cmd, dis_cmd, od_cmd, diff_cmd, sqlite_cmd
1066
1067	# Default search path
1068	global wsdiff_path
1069
1070	# Essentially "uname -p"
1071	global arch
1072
1073	# Some globals need to be initialized
1074	logging = vdiffs = reportAllSects = False
1075
1076
1077	# Process command line arguments
1078	# Return values are returned from args() in alpha order
1079	# (Yes, python functions can return multiple values (ewww))
1080	# Note that args() also set the globals:
1081	#	logging to True if verbose logging (to a file) was enabled
1082	#	vdiffs to True if logged differences aren't to be truncated
1083	#	reportAllSects to True if all ELF section differences are to be reported
1084	#
1085	baseRoot, fileNamesFile, localTools, ptchRoot, results = args()
1086
1087	#
1088	# Set up the results/log file
1089	#
1090	if logging :
1091		try:
1092			log = open(results, "w")
1093		except:
1094			logging = False
1095			error("failed to open log file: " + log)
1096			sys.exit(1)
1097
1098		dateTimeStr= "# %d/%d/%d at %d:%d:%d" % time.localtime()[:6]
1099		v_info("# This file was produced by wsdiff")
1100		v_info(dateTimeStr)
1101
1102	#
1103	# Build paths to the tools required tools
1104	#
1105	# Try to look for tools in $SRC/tools if the "-t" option
1106	# was specified
1107	#
1108	arch = commands.getoutput("uname -p")
1109	if localTools :
1110		try:
1111			src = os.environ['SRC']
1112		except:
1113			error("-t specified, but $SRC not set. Cannot find $SRC/tools")
1114			src = ""
1115		if len(src) > 0 :
1116			wsdiff_path.insert(0, src + "/tools/proto/opt/onbld/bin")
1117
1118	lintdump_cmd = find_tool("lintdump")
1119	elfdump_cmd = find_tool("elfdump")
1120	dump_cmd = find_tool("dump")
1121	od_cmd = find_tool("od")
1122	dis_cmd = find_tool("dis")
1123	diff_cmd = find_tool("diff")
1124	sqlite_cmd = find_tool("sqlite")
1125
1126	#
1127	# validate the base and patch paths
1128	#
1129	if baseRoot[-1] != '/' :
1130		baseRoot += '/'
1131
1132	if ptchRoot[-1] != '/' :
1133		ptchRoot += '/'
1134
1135	if not os.path.exists(baseRoot) :
1136		error("old proto area: " + baseRoot + " does not exist")
1137		sys.exit(1)
1138
1139	if not os.path.exists(ptchRoot) :
1140		error("new proto area: " + ptchRoot + \
1141		      " does not exist")
1142		sys.exit(1)
1143
1144	#
1145	# log some information identifying the run
1146	#
1147	v_info("Old proto area: " + baseRoot)
1148	v_info("New proto area: " + ptchRoot)
1149	v_info("Results file: " + results + "\n")
1150
1151	#
1152	# Set up the temporary directories / files
1153	# Could use python's tmpdir routines, but these should
1154	# be easier to identify / keep around for debugging
1155	pid = os.getpid()
1156	tmpDir1 = "/tmp/wsdiff_tmp1_" + str(pid) + "/"
1157	tmpDir2 = "/tmp/wsdiff_tmp2_" + str(pid) + "/"
1158	if not os.path.exists(tmpDir1) :
1159		os.makedirs(tmpDir1)
1160	if not os.path.exists(tmpDir2) :
1161		os.makedirs(tmpDir2)
1162
1163	tmpFile1 = tmpDir1 + "f1"
1164	tmpFile2 = tmpDir2 + "f2"
1165
1166	# Derive a catalog of new, deleted, and to-be-compared objects
1167	# either from the specified base and patch proto areas, or from
1168	# from an input file list
1169	newOrDeleted = False
1170
1171	if fileNamesFile != "" :
1172		changedFiles, newFiles, deletedFiles = \
1173			      flistCatalog(baseRoot, ptchRoot, fileNamesFile)
1174	else :
1175		changedFiles, newFiles, deletedFiles = protoCatalog(baseRoot, ptchRoot)
1176
1177	if len(newFiles) > 0 :
1178		newOrDeleted = True
1179		info("\nNew objects found: ")
1180
1181		for fn in newFiles :
1182			info(fnFormat(fn))
1183
1184	if len(deletedFiles) > 0 :
1185		newOrDeleted = True
1186		info("\nObjects removed: ")
1187
1188		for fn in deletedFiles :
1189			info(fnFormat(fn))
1190
1191	if newOrDeleted :
1192		info("\nChanged objects: ");
1193
1194
1195	# Here's where all the heavy lifting happens
1196	# Perform a comparison on each object appearing in
1197	# both proto areas. compareOneFile will examine the
1198	# file types of each object, and will vector off to
1199	# the appropriate comparison routine, where the compare
1200	# will happen, and any differences will be reported / logged
1201	for fn in changedFiles :
1202		base = baseRoot + fn
1203		ptch = ptchRoot + fn
1204
1205		compareOneFile(base, ptch, False)
1206
1207	# We're done, cleanup.
1208	cleanup(0)
1209
1210if __name__ == '__main__' :
1211	try:
1212		main()
1213	except KeyboardInterrupt :
1214		cleanup(1);
1215
1216
1217