xref: /titanic_52/usr/src/cmd/sgs/lari/lari.pl (revision bdfc6d18da790deeec2e0eb09c625902defe2498)
1#!/usr/perl5/bin/perl -w
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License, Version 1.0 only
7# (the "License").  You may not use this file except in compliance
8# with the License.
9#
10# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11# or http://www.opensolaris.org/os/licensing.
12# See the License for the specific language governing permissions
13# and limitations under the License.
14#
15# When distributing Covered Code, include this CDDL HEADER in each
16# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17# If applicable, add the following below this CDDL HEADER, with the
18# fields enclosed by brackets "[]" replaced with your own identifying
19# information: Portions Copyright [yyyy] [name of copyright owner]
20#
21# CDDL HEADER END
22#
23#
24# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
25# Use is subject to license terms.
26#
27# ident	"%Z%%M%	%I%	%E% SMI"
28#
29# Link Analysis of Runtime Interfaces.
30#
31
32# Define all global variables (required for strict)
33use vars  qw($Prog $DestDir $ObjRef $ObjFlag $ObjSize $TmpDir $LddArgs);
34use vars  qw($Glob $Intp $Cpyr $Prot $Extn $Self $Filt $Dirc $Plta $User $Func);
35use vars  qw($Objt $UndefSym $IgnSyms $Rtld $MultSyms $CrtSyms $GlobWeak);
36use vars  qw($DbgSeed %opt %Symbols %Objects %Versioned %DemSyms);
37use vars  qw($Platform $Nodi $Osft $Oaft $Ssft $Saft $Msft);
38
39use strict;
40
41use Getopt::Std;
42use File::Basename;
43
44# Pattern match to skip objects.
45$Rtld = qr{ ^(?:
46	/lib/ld\.so\.1 |
47	/usr/lib/ld\.so\.1 |
48	/lib/sparcv9/ld\.so\.1 |
49	/usr/lib/sparcv9/ld\.so\.1
50	)$
51}x;
52
53# Pattern matching required to determine a global symbol.
54$GlobWeak = qr{ ^(?:
55	GLOB |
56	WEAK
57	)$
58}x;
59
60# Pattern matching to determine link-editor specific symbols and those common
61# to the compilation environment (ie. provided by all crt's).
62$MultSyms = qr{ ^(?:
63	 _DYNAMIC |
64	 _GLOBAL_OFFSET_TABLE_ |
65	 _PROCEDURE_LINKAGE_TABLE_ |
66	 _etext |
67	 _edata |
68	 _end |
69	 _init |
70	 _fini |
71	 _lib_version |			# Defined in values
72	 __xpg4 |			# Defined in values
73	 __xpg6				# Defined in values
74	)$
75}x;
76
77$CrtSyms = qr{ ^(?:
78	 ___Argv |			# Defined in crt
79	 __environ_lock |		# Defined in crt
80	 _environ |			# Defined in crt
81	 environ			# Defined in crt
82	 )$
83}x;
84
85# Pattern match to remove undefined, NOTY and versioning symbols.
86$UndefSym = qr{ ^(?:
87	UNDEF
88	)$
89}x;
90
91$IgnSyms = qr{ ^(?:
92	NOTY |
93	ABS
94	)$
95}x;
96
97# Symbol flags.
98$Glob = 0x00001;	# symbol is global
99$Intp = 0x00010;	# symbol originates for explicit interposer
100$Dirc = 0x00020;	# symbol bound to directly
101$Cpyr = 0x00040;	# symbol bound to copy-relocation reference
102$Prot = 0x00080;	# symbol is protected (symbolic)
103$Extn = 0x00100;	# symbol has been bound to from an external reference
104$Self = 0x00200;	# symbol has been bound to from the same object
105$Filt = 0x00400;	# symbol bound to a filtee
106$Plta = 0x00800;	# symbol bound to executables plt address
107$User = 0x01000;	# symbol binding originates from user (dlsym) request
108$Func = 0x02000;	# symbol is of type function
109$Objt = 0x04000;	# symbol is of type object
110$Nodi = 0x08000;	# symbol prohibits direct binding
111
112$Osft = 0x10000;	# symbol is an standard object filter
113$Oaft = 0x20000;	# symbol is an auxiliary object filter
114$Ssft = 0x40000;	# symbol is a per-symbol standard filter
115$Saft = 0x80000;	# symbol is a per-symbol auxilary filter
116$Msft = 0xf0000;	# filter mask
117
118# Offsets into $Symbols{$SymName}{$Obj} array.
119$ObjRef =	0;
120$ObjFlag =	1;
121$ObjSize =	2;
122
123
124# Establish locale
125use POSIX qw(locale_h);
126use Sun::Solaris::Utils qw(textdomain gettext);
127
128setlocale(LC_ALL, "");
129textdomain("SUNW_OST_SGS");
130
131# Establish a program name for any error diagnostics.
132$Prog = basename($0);
133
134sub inappropriate {
135	my ($Opt1, $Opt2, $Flag) = @_;
136
137	if ($Flag) {
138	    printf STDERR
139		gettext("%s: inappropriate use of %s with %s: %s ignored\n"),
140		$Prog, $Opt1, $Opt2, $Opt1;
141	} else {
142	    printf STDERR
143		gettext("%s: inappropriate use of %s without %s: %s ignored\n"),
144		$Prog, $Opt1, $Opt2, $Opt1;
145	}
146}
147
148# Cleanup any temporary files on interruption
149sub Cleanup {
150	my ($Sig) = @_;
151
152	$SIG{$Sig} = 'IGNORE';
153
154	if ($DbgSeed ne "") {
155		foreach my $File (<\Q${DbgSeed}\E.*>) {
156			if ($File =~ /^\Q$DbgSeed\E\.\d+$/) {
157				unlink($File);
158			}
159		}
160	}
161	exit 1;
162}
163
164# Check that we have arguments.
165if ((getopts('abCDd:imosv', \%opt) == 0) || ($#ARGV < 0)) {
166	printf STDERR gettext("usage:\n");
167	printf STDERR
168	    gettext("    %s [-bCDsv] [-a | -i | -o ] file | dir ...\n"), $Prog;
169	printf STDERR
170	    gettext("    %s [-CDosv] [-m [-d mapdir]] file\n"), $Prog;
171	print STDERR
172	    gettext("\t[-a]     print diagnostics for all symbols\n");
173	print STDERR
174	    gettext("\t[-b]     print diagnostics for multiple-bound " .
175		"symbols\n");
176	print STDERR
177	    gettext("\t[-C]     print demangled symbol names also\n");
178	print STDERR
179	    gettext("\t[-D]     read debugging information from \"file\"\n");
180	print STDERR
181	    gettext("\t[-d dir] create mapfiles in \"mapdir\"\n");
182	print STDERR
183	    gettext("\t[-i]     print interesting information (default)\n");
184	print STDERR
185	    gettext("\t[-m]     create mapfiles for interface requirements\n");
186	print STDERR
187	    gettext("\t[-o]     print overhead information\n");
188	print STDERR
189	    gettext("\t[-s]     save bindings information created by ldd(1)\n");
190	print STDERR
191	    gettext("\t[-v]     ignore versioned objects\n");
192	exit 1;
193} else {
194	my ($Mult, $Error);
195
196	# Catch any incompatible argument usage.
197	if ($opt{m}) {
198		if ($opt{a}) {
199			inappropriate("-a", "-m", 1);
200			$opt{a} = 0;
201		}
202		if ($opt{i}) {
203			inappropriate("-i", "-m", 1);
204			$opt{i} = 0;
205		}
206	} else {
207		if ($opt{d}) {
208			inappropriate("-d", "-m", 0);
209			$opt{d} = 0;
210		}
211	}
212	if ($opt{a}) {
213		if ($opt{o}) {
214			inappropriate("-a", "-o", 1);
215			$opt{o} = 0;
216		}
217		if ($opt{i}) {
218			inappropriate("-a", "-i", 1);
219			$opt{i} = 0;
220		}
221	}
222	if ($opt{o} && $opt{i}) {
223		inappropriate("-o", "-i", 1);
224			$opt{i} = 0;
225	}
226
227	# If -m is used, only one input file is applicable.
228	if ($opt{m} && ($#ARGV != 0)) {
229		printf STDERR gettext("%s: only one input file is allowed " .
230		    "with the -m option\n"), $Prog;
231		exit 1;
232	}
233
234	# Insure any specified directory exists, or apply a default.
235	if ($opt{d}) {
236		# User specified directory - make sure it exists.
237		if (! -d $opt{d}) {
238			printf STDERR gettext("%s: %s is not a directory\n"),
239			    $Prog, $opt{d};
240			exit 1;
241		}
242		$DestDir = $opt{d};
243	} else {
244		$DestDir = ".";
245	}
246
247	# Establish a temporary directory if necessary.
248	if (!$opt{D}) {
249		if (!($TmpDir = $ENV{TMPDIR}) || (! -d $TmpDir)) {
250			$TmpDir = "/tmp";
251		}
252	}
253
254	# Establish any initial ldd(1) argument requirements.
255	if ($LddArgs = $ENV{LARI_LDD_ARGS}) {
256		$LddArgs = $LddArgs . ' -r -e LD_DEBUG=bindings,files,detail';
257	} else {
258		$LddArgs = '-r -e LD_DEBUG=bindings,files,detail';
259	}
260
261	# If we've been asked to demangle symbols, make sure we can find the
262	# demangler.
263	if ($opt{C}) {
264		my ($DemName) = `dem XXXX 2> /dev/null`;
265		if (!$DemName) {
266			printf STDERR gettext("%s: can not locate demangler: " .
267			    "-C ignored\n"), $Prog;
268			$opt{C} = 0;
269		}
270	}
271
272	# If -a or -o hasn't been specified, default to -i.
273	if (!$opt{a} && !$opt{o}) {
274		$opt{i} = 1;
275	}
276
277	# Determine whether we have a multiple input files.
278	if ($#ARGV == 0) {
279		$Mult = 0;
280	} else {
281		$Mult = 1;
282	}
283
284	# Determine what platform we're running on - some inappropriate
285	# platform specific dependencies are better skipped.
286	chomp($Platform = `uname -i`);
287
288	# Establish signal handlers
289	$SIG{INT} = \&Cleanup;
290	$SIG{QUIT} = \&Cleanup;
291
292	$DbgSeed = "";
293
294	# For each argument determine if we're dealing with a file or directory.
295	$Error = 0;
296	foreach my $Arg (@ARGV) {
297		if (!stat($Arg)) {
298			printf STDERR gettext("%s: %s: unable to stat file\n"),
299			    $Prog, $Arg;
300			$Error = 1;
301			next;
302		}
303
304		# Process simple files.
305		if (-f _) {
306			if (!-r _) {
307				printf STDERR gettext("%s: %s: unable to " .
308				   "read file\n"), $Prog, $Arg;
309				$Error = 1;
310				next;
311			}
312			if (!$opt{D}) {
313				if (ProcFile($Arg, $Mult, 1) == 0) {
314					$Error = 1;
315				}
316			} else {
317				# If the -D option is specified, read the
318				# bindings debugging information from the
319				# specified file.
320				if ($Mult) {
321					print STDOUT "$Arg:\n";
322				}
323				ProcBindings($Arg, $Mult, $Arg);
324			}
325			next;
326		}
327
328		# Process directories.
329		if (-d _) {
330			ProcDir($Arg);
331			next;
332		}
333
334		printf STDERR gettext("%s: %s: is not a file or directory\n"),
335		    $Prog, $Arg;
336		$Error = 1;
337	}
338	exit $Error;
339}
340
341sub ProcDir {
342	my ($Dir) = @_;
343	my ($File);
344
345	# Open the directory and read each entry, omit "." and "..".  Sorting
346	# the directory listing makes analyzing different source hierarchies
347	# easier.
348	if (opendir(DIR, $Dir)) {
349		foreach my $Entry (sort(readdir(DIR))) {
350			if (($Entry eq '.') || ($Entry eq '..')) {
351				next;
352			}
353
354			# If we're decending into a platform directory, ignore
355			# any inappropriate platform specific files.  These
356			# files can have dependencies that in turn bring in the
357			# appropriate platform specific file, resulting in more
358			# than one dependency offering the same interfaces.  In
359			# practice, the non-appropriate platform specific file
360			# wouldn't be loaded with a process.
361			if (($Dir =~ /\/platform$/) &&
362			    ($Entry !~ /^$Platform$/)) {
363				next;
364			}
365
366			$File = "$Dir/$Entry";
367			if (!lstat($File)) {
368				next;
369			}
370			# Ignore symlinks.
371			if (-l _) {
372				next;
373			}
374
375			# Descend into, and process any directories.
376			if (-d _) {
377				ProcDir($File);
378				next;
379			}
380
381			# Process any standard files.
382			if (-f _ && -r _) {
383				ProcFile($File, 1, 0);
384				next;
385
386			}
387		}
388		closedir(DIR);
389	}
390}
391
392# Process a file.  If the file was explicitly defined on the command-line, and
393# an error occurs, tell the user.  Otherwise, this file probably came about from
394# scanning a directory, in which case just skip it and move on.
395sub ProcFile {
396	my ($File, $Mult, $CmdLine) = @_;
397	my (@Ldd, $NoFound, $DbgFile, @DbgGlob, $Type);
398
399	$Type = `LC_ALL=C file '$File' 2>&1`;
400	if (($Type !~ /dynamically linked/) || ($Type =~ /Sun demand paged/)) {
401		if ($CmdLine) {
402			printf STDERR gettext("%s: %s: is an invalid file " .
403			    "type\n"), $Prog, $File;
404		}
405		return 0;
406	}
407
408	# Create a temporary filename for capturing binding information.
409	$DbgSeed = basename($File);
410	$DbgSeed = "$TmpDir/lari.dbg.$$.$DbgSeed";
411
412	# Exercise the file under ldd(1), capturing all the bindings.
413	@Ldd = split(/\n/,
414	    `LC_ALL=C ldd $LddArgs -e LD_DEBUG_OUTPUT='$DbgSeed' '$File' 2>&1`);
415
416	# If ldd isn't -e capable we'll get a usage message.  The -e option was
417	# introduced in Solaris 9 and related patches.  Also, make sure the user
418	# sees any ldd errors.
419	$NoFound = 0;
420	for my $Line (@Ldd) {
421		if ($Line =~ /^usage: ldd/) {
422			printf STDERR gettext("%s: ldd: does not support -e, " .
423			    "unable to capture bindings output\n"), $Prog;
424			exit 1;
425		}
426		if ($Line =~ /not found/) {
427			$NoFound = 1;
428			last;
429		}
430	}
431
432	# The runtime linker will have appended a process id to the debug file.
433	# As we have to intuit the name, make sure there is only one debug
434	# file match, otherwise there must be some clutter in the output
435	# directory that is going to mess up our analysis.
436	foreach my $Match (<\Q${DbgSeed}\E.*>) {
437		if ($Match =~ /^\Q$DbgSeed\E\.\d+$/) {
438			push(@DbgGlob, $Match);
439		}
440	}
441	if (@DbgGlob == 0) {
442		# If there is no debug file, bail.  This can occur if the file
443		# being processed is secure.
444		if ($CmdLine) {
445			printf STDERR gettext("%s: %s: unable to capture " .
446			    "bindings output - possible secure application?\n"),
447			    $Prog, $File;
448		}
449		return 0;
450	} elsif (@DbgGlob > 1) {
451		# Too many debug files found.
452		if ($CmdLine) {
453			printf STDERR gettext("%s: %s: multiple bindings " .
454			    "output files exist: %s: clean up temporary " .
455			    "directory\n"), $Prog, $File, $DbgSeed;
456		}
457		return 0;
458	} else {
459		$DbgFile = $DbgGlob[0];
460	}
461
462	# Ok, we're ready to process the bindings information.  Print a header
463	# if necessary, and if there were any ldd(1) errors push some of them
464	# out before any bindings information.  Limit the output, as it can
465	# sometimes be excessive.  If there are errors, the bindings information
466	# is likely to be incomplete.
467	if ($Mult) {
468		print STDOUT "$File:\n";
469	}
470	if ($NoFound) {
471		my ($Cnt) = 4;
472
473		for my $Line (@Ldd) {
474			if ($Line =~ /not found/) {
475				print STDOUT "$Line\n";
476				$Cnt--;
477			}
478			if ($Cnt == 0) {
479				print STDOUT gettext("\tcontinued ...\n");
480				last;
481			}
482		}
483	}
484
485	# If the user wants the original debugging file left behind, rename it
486	# so that it doesn't get re-read by another instance of lari processing
487	# this file.
488	if ($opt{s}) {
489		rename($DbgFile, $DbgSeed);
490		$DbgFile = $DbgSeed;
491		printf STDOUT gettext("%s: %s: bindings information " .
492		    "saved as: %s\n"), $Prog, $File, $DbgFile;
493	}
494
495	ProcBindings($File, $Mult, $DbgFile);
496
497	# Now that we've finished with the debugging file, nuke it if necessary.
498	if (!$opt{s}) {
499		unlink($DbgFile);
500	}
501	$DbgSeed = "";
502	return 1;
503}
504
505sub ProcBindings {
506	my ($File, $Mult, $DbgFile) = @_;
507	my (%Filtees, $FileHandle);
508
509	# Reinitialize our arrays when we're dealing with multiple files.
510	if ($Mult) {
511		%Symbols = ();
512		%Objects = ();
513		%Versioned = ();
514	}
515
516	# As debugging output can be significant, read a line at a time.
517	open($FileHandle, "<$DbgFile");
518	while (defined(my $Line = <$FileHandle>)) {
519		chomp($Line);
520
521		# If we find a relationship between a filter and filtee, save
522		# it, we'll come back to this once we've gathered everybodies
523		# symbols.
524		if ($Line =~ /;  filtered by /) {
525			my ($Filtee) = $Line;
526			my ($Filter) = $Line;
527
528			# Separate the filter and filtee names, ignore the
529			# runtime linker.
530			$Filtee =~ s/^.*: file=(.*);  filtered by .*/$1/;
531			if ($Filtee =~ $Rtld) {
532				next;
533			}
534			$Filter =~ s/^.*;  filtered by //;
535			$Filtees{$Filtee}{$Filter} = 1;
536			next;
537		}
538
539		# If we find a configuration alternative, determine whether it
540		# is for one of our filtees, and if so record it.
541		if ($Line =~ / configuration alternate found:/) {
542			my ($Orig) = $Line;
543			my ($Altr) = $Line;
544
545			# Separate the original and alternative names.
546			$Orig =~ s/^.*: file=(.*)  .*$/$1/;
547			$Altr =~ s/^.* configuration alternate found: (.*)$/$1/;
548
549			for my $Filtee (keys(%Filtees)) {
550				if ($Filtee ne $Orig) {
551					next;
552				}
553				for my $Filter (keys(%{$Filtees{$Filtee}})) {
554					$Filtees{$Altr}{$Filter} = 1;
555				}
556			}
557			next;
558		}
559
560		# Collect the symbols from any file analyzed.
561		if ($Line =~ /^.*: file=(.*);  analyzing .*/) {
562			GetAllSymbols($1);
563			next;
564		}
565
566		# Process any symbolic relocations that bind to a file.
567		if ($Line =~ /: binding file=.* to file=/) {
568			my ($RefFile, $DstFile, $SymName);
569			my (@Syms, $Found, @Fields);
570			my ($BndInfo) = 0;
571			my ($Offset) = 1;
572			my ($Dlsym) = 0;
573			my ($Detail) = 0;
574
575			# For greatest flexibility, split the line into fields
576			# and walk each field until we find what we need.
577			@Fields = split(' ', $Line);
578
579			# The referencing file, "... binding file=".*".
580			while ($Fields[$Offset]) {
581				if ($Fields[$Offset] =~ /^file=(.*)/) {
582					$RefFile = $1;
583					$Offset++;
584					last;
585				}
586				$Offset++;
587			}
588			# The referencing offset, typically this is the address
589			# of the reference, "(0x1234...)", but in the case of a
590			# user lookup it's the string "(dlsym)".  If we don't
591			# find this offset information we've been given a debug
592			# file that didn't user the "datail" token, in which case
593			# we're not getting all the information we need.
594			if ($Fields[$Offset] =~ /^\((.*)\)/) {
595				if ($1 eq 'dlsym') {
596					$Dlsym = 1;
597				}
598				$Detail = 1;
599				$Offset++;
600			}
601			# The destination file, "... to file=".*".
602			while ($Fields[$Offset]) {
603				if ($Fields[$Offset] =~ /^file=(.*)/) {
604					$DstFile = $1;
605					$Offset++;
606					last;
607				}
608				$Offset++;
609			}
610			# The symbol being bound, "... symbol `.*' ...".
611			while ($Fields[$Offset]) {
612				if ($Fields[$Offset] =~ /^\`(.*)\'$/) {
613					$SymName = $1;
614					$Offset++;
615					last;
616				}
617				$Offset++;
618			}
619			# Possible trailing binding info, "... (direct,.*)$".
620			while ($Fields[$Offset]) {
621				if ($Fields[$Offset] =~ /^\((.*)\)$/) {
622					$BndInfo = $1;
623					$Offset++;
624					last;
625				}
626				$Offset++;
627			}
628
629			if ($Detail == 0) {
630				printf STDERR gettext("%s: %s: debug file " .
631				    "does not contain `detail' information\n"),
632				    $Prog, $DbgFile;
633				return;
634			}
635
636			# Collect the symbols from each object.
637			GetAllSymbols($RefFile);
638			GetAllSymbols($DstFile);
639
640			# Identify that this definition has been bound to.
641			$Symbols{$SymName}{$DstFile}[$ObjRef]++;
642			if ($RefFile eq $DstFile) {
643				# If the reference binds to a definition within
644				# the same file this symbol may be a candidate
645				# for reducing to local.
646				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Self;
647				$Objects{$DstFile}{$SymName} |= $Self;
648			} else {
649				# This symbol is required to satisfy an external
650				# reference.
651				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Extn;
652				$Objects{$DstFile}{$SymName} |= $Extn;
653			}
654
655			# Assign any other state indicated by the binding info
656			# associated with the diagnostic output.
657			if (!$BndInfo) {
658				next;
659			}
660
661			if ($BndInfo =~ /direct/) {
662				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Dirc;
663				$Objects{$DstFile}{$SymName} |= $Dirc;
664			}
665			if ($BndInfo =~ /copy-ref/) {
666				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Cpyr;
667				$Objects{$DstFile}{$SymName} |= $Cpyr;
668			}
669			if ($BndInfo =~ /filtee/) {
670				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Filt;
671				$Objects{$DstFile}{$SymName} |= $Filt;
672			}
673			if ($BndInfo =~ /interpose/) {
674				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Intp;
675				$Objects{$DstFile}{$SymName} |= $Intp;
676			}
677			if ($BndInfo =~ /plt-addr/) {
678				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Plta;
679				$Objects{$DstFile}{$SymName} |= $Plta;
680			}
681			if ($Dlsym) {
682				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $User;
683				$Objects{$DstFile}{$SymName} |= $User;
684			}
685		}
686	}
687	close($FileHandle);
688
689	# Now that we've processed all objects, complete any auxiliary filtee
690	# tagging.  For each filtee, determine which of the symbols it exports
691	# are also defined in its filters.  If a filtee is bound to, the
692	# runtime linkers diagnostics will indicate a filtee binding.  However,
693	# some of the filtee symbols may not be bound to, so here we mark them
694	# all so as to remove them from any interesting output.
695	for my $Filtee (keys(%Filtees)) {
696
697		# Standard filters aren't captured at all, as nothing can bind
698		# to them.
699		if (!exists($Objects{$Filtee})) {
700			next;
701		}
702
703		# Determine what symbols this filtee offers.
704		foreach my $SymName (keys(%{$Objects{$Filtee}})) {
705
706			# Ignore the usual reserved stuff.
707			if (!$opt{a} && (($SymName =~ $MultSyms) ||
708			    ($SymName =~ $CrtSyms))) {
709				next;
710			}
711
712			# Determine whether this symbol exists in our filter.
713			for my $Filter (keys(%{$Filtees{$Filtee}})) {
714				if (!$Symbols{$SymName}{$Filter}) {
715					next;
716				}
717				if (!($Symbols{$SymName}{$Filter}[$ObjFlag] &
718				    $Msft)) {
719					next;
720				}
721				$Symbols{$SymName}{$Filtee}[$ObjFlag] |= $Filt;
722			}
723		}
724	}
725
726	# Process objects and their symbols as required.
727	if ($opt{m}) {
728		# If we're creating a mapfile, traverse each object we've
729		# collected.
730		foreach my $Obj (keys(%Objects)) {
731			my ($File, $Path);
732
733			# Skip any objects that should be ignored
734			if ($Obj =~ $Rtld) {
735				next;
736			}
737
738			# Skip any versioned objects if required.
739			if ($opt{v} && $Versioned{$Obj}) {
740				next;
741			}
742
743			# Open the mapfile if required.
744			$File = basename($Obj);
745			$Path = "$DestDir/mapfile-$File";
746			if (!open(MAPOUT, "> $Path")) {
747				printf STDERR gettext("%s: %s: open failed:" .
748				    "%s\n"), $Prog, $Path, $!;
749				exit 1;
750			}
751
752			# Establish the mapfile preamble.
753			print MAPOUT "#\n# Interface Definition mapfile for:\n";
754			print MAPOUT "#\tDynamic Object: $Obj\n";
755			print MAPOUT "#\tProcess:        $File\n#\n\n";
756
757			# Process each global symbol.
758			print MAPOUT "$File {\n\tglobal:\n";
759
760			foreach my $SymName (sort(keys(%{$Objects{$Obj}}))) {
761				my ($Flag) = $Objects{$Obj}{$SymName};
762
763				# For the first pass we're only interested in
764				# symbols that have been bound to from an
765				# external object, or must be global to enable
766				# a binding to an interposing definition.
767				# Skip bindings to ourself as these are
768				# candidates for demoting to local.
769				if (!($Flag & ($Extn | $Intp))) {
770					next;
771				}
772				if (($Flag & ($Extn | $Self)) == $Self) {
773					next;
774				}
775
776				# Add the demangled name as a comment if
777				# required.
778				if ($opt{C}) {
779					my ($DemName) = Demangle($SymName);
780
781					if ($DemName ne "") {
782						print MAPOUT "\t\t#$DemName\n";
783					}
784				}
785				print MAPOUT "\t\t$SymName;\n";
786			}
787
788			# Process each local demotion.
789			print MAPOUT "\tlocal:\n";
790
791			if ($opt{o}) {
792				foreach my $SymName
793				    (sort(keys(%{$Objects{$Obj}}))) {
794					my ($Flag) = $Objects{$Obj}{$SymName};
795
796					# For this pass we're only interested
797					# in symbol definitions that haven't
798					# been bound to, or have only been
799					# bound to from the same object.
800					if ($Flag & $Extn) {
801						next;
802					}
803
804					# Add the demangled name as a comment if
805					# required.
806					if ($opt{C}) {
807						my ($DemName) =
808						    Demangle($SymName);
809
810						if ($DemName ne "") {
811							print MAPOUT
812							    "\t\t#$DemName\n";
813						}
814					}
815					print MAPOUT "\t\t$SymName;\n";
816				}
817			}
818
819			# Capture everything else as local.
820			print MAPOUT "\t\t\*;\n};\n";
821			close MAPOUT;
822		}
823
824	} else {
825		# If we're gathering information regarding the symbols used by
826		# the process, automatically sort any standard output using the
827		# symbol name.
828		if (!open(SORT, "| sort +1")) {
829			printf STDERR gettext("%s: fork failed: %s\n"),
830			    $Prog, $!;
831			exit 1;
832		}
833
834		foreach my $SymName (keys(%Symbols)) {
835			my ($Cnt);
836
837			# If we're looking for interesting symbols, inspect
838			# each definition of each symbol.  If one is found to
839			# be interesting, the whole family are printed.
840			if (($Cnt = Interesting($SymName)) == 0) {
841				next;
842			}
843
844			# We've found something interesting, or all symbols
845			# should be output.  List all objects that define this
846			# symbol.
847			foreach my $Obj (keys(%{$Symbols{$SymName}})) {
848				my ($DemName, $Type);
849				my ($Flag) = $Symbols{$SymName}{$Obj}[$ObjFlag];
850				my ($Str) = "$Cnt:";
851
852				# Do we just want overhead symbols.  Consider
853				# copy-relocations, and plt address binding,
854				# as overhead too.
855				if ($opt{o} && (($Flag &
856				    ($Extn | $Cpyr | $Plta)) == $Extn)) {
857					next;
858				}
859
860				# Do we just want all symbols that have been
861				# bound to.
862				if (($opt{a} || $opt{o}) && $opt{b} &&
863				    (($Flag & ($Extn | $Self | $Prot)) == 0)) {
864					next;
865				}
866
867				# If we haven't been asked for all symbols, only
868				# print those reserved symbols that have been
869				# bound to, as the number of reserved symbols
870				# can be quite excessive.
871				if (!$opt{a} && ((($SymName =~ $MultSyms) &&
872				    (($Flag & ($Extn | $Self)) == 0)) ||
873				    (($SymName =~ $CrtSyms) && (($Flag &
874				    ($Extn | $Self | $Prot)) == 0)))) {
875					next;
876				}
877
878				# Skip any versioned objects if required.
879				if ($opt{v} && $Versioned{$Obj}) {
880					next;
881				}
882
883				# Display this symbol.
884				if ($Symbols{$SymName}{$Obj}[$ObjRef]) {
885					$Str = $Str .
886					    $Symbols{$SymName}{$Obj}[$ObjRef];
887				} else {
888					$Str = $Str . '0';
889				}
890
891				# Has the symbol been bound to externally
892				if ($Flag & $Extn) {
893					$Str = $Str . 'E';
894				}
895				# Has the symbol been bound to from the same
896				# object.
897				if ($Flag & $Self) {
898					$Str = $Str . 'S';
899				}
900				# Has the symbol been bound to directly.
901				if ($Flag & $Dirc) {
902					$Str = $Str . 'D';
903				}
904				# Does this symbol originate for an explicit
905				# interposer.
906				if ($Flag & $Intp) {
907					$Str = $Str . 'I';
908				}
909				# Is this symbol the reference data of a copy
910				# relocation.
911				if ($Flag & $Cpyr) {
912					$Str = $Str . 'C';
913				}
914				# Is this symbol part of filtee.
915				if ($Flag & $Filt) {
916					$Str = $Str . 'F';
917				}
918				# Is this symbol protected (in which case there
919				# may be a symbolic binding within the same
920				# object to this symbol).
921				if ($Flag & $Prot) {
922					$Str = $Str . 'P';
923				}
924				# Is this symbol an executables .plt address.
925				if ($Flag & $Plta) {
926					$Str = $Str . 'A';
927				}
928				# Does this binding originate from a user
929				# (dlsym) request.
930				if ($Flag & $User) {
931					$Str = $Str . 'U';
932				}
933				# Does this definition redirect the binding.
934				if ($Flag & $Msft) {
935					$Str = $Str . 'R';
936				}
937				# Does this definition explicity define no
938				# direct binding.
939				if ($Flag & $Nodi) {
940					$Str = $Str . 'N';
941				}
942
943				# Determine whether this is a function or a data
944				# object.  For the latter, display the symbol
945				# size.  Otherwise, the symbol is a reserved
946				# label, and is left untyped.
947				if ($Flag & $Func) {
948					$Type = '()';
949				} elsif ($Flag & $Objt) {
950					$Type = '[' .
951					    $Symbols{$SymName}{$Obj}[$ObjSize] .
952					']';
953				} else {
954					$Type = "";
955				}
956
957				# Demangle the symbol name if desired.
958				$DemName = Demangle($SymName);
959
960				if ($Mult) {
961					print SORT "  [$Str]: " .
962					    "$SymName$Type$DemName: $Obj\n";
963				} else {
964					print SORT "[$Str]: " .
965					    "$SymName$Type$DemName: $Obj\n";
966				}
967			}
968		}
969		close SORT;
970	}
971}
972
973# Heuristics to determine whether a symbol binding is interesting.  In most
974# applications there can be a large amount of symbol binding information to
975# wade through.  The most typical binding, to a single definition, probably
976# isn't interesting or the cause of unexpected behavior.  Here, we try and
977# determine those bindings that may can cause unexpected behavior.
978#
979# Note, this routine is actually called for all symbols so that their count
980# can be calculated in one place.
981sub Interesting
982{
983	my ($SymName) = @_;
984	my ($ObjCnt, $GFlags, $BndCnt, $FltCnt, $NodiCnt, $RdirCnt, $ExRef);
985
986	# Scan all definitions of this symbol, thus determining the definition
987	# count, the number of filters, redirections, executable references
988	# (copy-relocations, or plt addresses), no-direct bindings, and the
989	# number of definitions that have been bound to.
990	$ObjCnt = $GFlags = $BndCnt = $FltCnt =
991		$NodiCnt = $RdirCnt = $ExRef = 0;
992	foreach my $Obj (keys(%{$Symbols{$SymName}})) {
993		my ($Flag) = $Symbols{$SymName}{$Obj}[$ObjFlag];
994
995		# Ignore standard filters when determining the symbol count, as
996		# a standard filter can never be bound to.
997		if (($Flag & ($Osft | $Ssft)) == 0) {
998			$ObjCnt++;
999		}
1000
1001		$GFlags |= $Flag;
1002		if ($Flag & $Filt) {
1003			$FltCnt++;
1004		}
1005		if ($Flag & $Nodi) {
1006			$NodiCnt++;
1007		}
1008		if ($Flag & ($Cpyr | $Plta)) {
1009			$ExRef++;
1010		}
1011		if ($Flag & $Msft) {
1012			$RdirCnt++;
1013		}
1014
1015		# Ignore bindings to undefined .plts, and copy-relocation
1016		# references.  These are implementation details, rather than
1017		# a truly interesting multiple-binding.  If a symbol is tagged
1018		# as protected, count it as having bound to itself, even though
1019		# we can't tell if it's really been used.
1020		if (($Flag & ($Self | $Extn | $Prot)) &&
1021		    (($Flag & ($Plta | $Cpyr)) == 0)) {
1022			$BndCnt++;
1023		}
1024	}
1025
1026	# If we want all symbols, return the count.
1027	if (!$opt{i}) {
1028		return $ObjCnt;
1029	}
1030
1031	# Single instance symbol definitions aren't very interesting.
1032	if ($ObjCnt == 1) {
1033		return 0;
1034	}
1035
1036	# Traverse each symbol definition looking for the following:
1037	#
1038	#   .	Multiple symbols are bound to externally.
1039	#   .	A symbol is bound to externally, and possibly symbolically.
1040	#
1041	# Two symbol bindings are acceptable in some cases, and thus aren't
1042	# interesting:
1043	#
1044	#   .	Copy relocations.  Here, the executable binds to a shared object
1045	#	to access the data definition, which is then copied to the
1046	#	executable.  All other references should then bind to the copied
1047	#	data.
1048	#   .	Non-plt relocations to functions that are referenced by the
1049	#	executable will bind to the .plt in the executable.  This
1050	#	provides for address comparison calculations (although plainly
1051	#	an overhead).
1052	#
1053	# Multiple symbol bindings are acceptable in some cases, and thus aren't
1054	# interesting:
1055	#
1056	#   .	Filtees.  Multiple filtees may exist for one filter.
1057	#
1058	if ((($ObjCnt == 2) && ($GFlags & ($Cpyr | $Plta))) ||
1059	    ($ObjCnt == ($FltCnt + 1))) {
1060		return 0;
1061	}
1062
1063	# If we're only interested in multiply-bound symbols.
1064	if (($opt{b} || ($SymName =~ $MultSyms) || ($SymName =~ $CrtSyms)) &&
1065	    ($BndCnt < 2)) {
1066		return (0);
1067	}
1068
1069	# Multiple instances of a definition, where all but one are filter
1070	# references and/or copy relocations, are also uninteresting.
1071	# Effectively, only one symbol is providing the final binding.
1072	if (($FltCnt && $RdirCnt) &&
1073	    (($FltCnt + $RdirCnt + $ExRef) == $ObjCnt)) {
1074		return (0);
1075	}
1076
1077	# Multiple instances of explicitly defined no-direct binding symbols
1078	# are known to occur, and their no-binding definition indicates they
1079	# are expected and accounted for.  Thus, these aren't interesting.
1080	if (($ExRef + $NodiCnt) == $ObjCnt) {
1081		return (0);
1082	}
1083
1084	# We have an interesting symbol, returns its count.
1085	return $ObjCnt;
1086}
1087
1088# Obtain the global symbol definitions of an object and determine whether the
1089# object has been versioned.
1090sub GetAllSymbols {
1091	my ($Obj) = @_;
1092	my (@Elfd, @Elfs, @Elfr, $Type, $Exec, $FileHandle);
1093	my (%AddrToName, %NameToAddr);
1094	my ($Vers) = 0;
1095	my ($Symb) = 0;
1096	my ($Copy) = 0;
1097	my ($Interpose) = 0;
1098	my ($Fltr) = 0;
1099
1100	# Determine whether we've already retrieved this object's symbols.
1101	# Also, ignore the runtime linker, it's on a separate link-map, and
1102	# except for the filtee symbols that might be bound via libdl, is
1103	# uninteresting.
1104	if (($Objects{$Obj}) || ($Obj =~ $Rtld)) {
1105		return;
1106	}
1107
1108	# Get the dynamic information.
1109	@Elfd = split(/\n/, `LC_ALL=C elfdump -d '$Obj' 2> /dev/null`);
1110
1111	# If there's no information, it's possible we've been given a debug
1112	# output file and are processing it from a location from which the
1113	# dependencies specified in the debug file aren't accessible.
1114	if (!@Elfd) {
1115		printf STDERR gettext("%s: %s: unable to process ELF file\n"),
1116		    $Prog, $Obj;
1117
1118		# Add the file to our list, so that we don't create the same
1119		# message again.  Processing should continue so that we can
1120		# flush out as many error messages as possible.
1121		$Objects{$Obj}{"DoesNotExist"} = 0;
1122		return;
1123	}
1124
1125	# If we're processing a filter there's no need to save any symbols, as
1126	# no bindings will occur to this object.
1127	#
1128	# Determine whether we've got a symbolicly bound object.  With newer
1129	# linkers all symbols will be marked as protected ("P"), but with older
1130	# linkers this state could only be intuited from the symbolic dynamic
1131	# tag.
1132	foreach my $Line (@Elfd) {
1133		my (@Fields);
1134		@Fields = split(' ', $Line);
1135
1136		# Determine if the FILTER tag is set.
1137		if ($#Fields == 3) {
1138			if ($Fields[1] eq "FILTER") {
1139				$Fltr |= $Osft;
1140				next;
1141			}
1142			if ($Fields[1] eq "AUXILIARY") {
1143				$Fltr |= $Oaft;
1144				next;
1145			}
1146			next;
1147		}
1148
1149		# We're only interested in the FLAGS entry.
1150		if (($#Fields < 4) || ($Fields[1] !~ "^FLAGS")) {
1151			next;
1152		}
1153		if (($Fields[1] eq "FLAGS") && ($Line =~ " SYMBOLIC ")) {
1154			$Symb = 1;
1155			next;
1156		}
1157		if (($Fields[1] eq "FLAGS_1") && ($Line =~ " INTERPOSE ")) {
1158			$Interpose = 1;
1159		}
1160	}
1161
1162	# If this file is a dynamic executable, determine if this object has
1163	# any copy relocations so that any associated bindings can be labeled
1164	# more meaningfully.
1165	$Type = `LC_ALL=C file '$Obj'`;
1166	if ($Type =~ /executable/) {
1167		$Exec = 1;
1168		# Obtain any copy relocations.
1169		@Elfr = split(/\n/, `LC_ALL=C elfdump -r '$Obj' 2>&1`);
1170
1171		foreach my $Rel (@Elfr) {
1172			my ($SymName, @Fields);
1173
1174			if ($Rel !~ /^\s+R_\S+_COPY /) {
1175				next;
1176			}
1177			@Fields = split(' ', $Rel);
1178			if ($Fields[0] eq 'R_SPARC_COPY') {
1179				$SymName = $Fields[4];
1180			} elsif ($Fields[0] eq 'R_386_COPY') {
1181				$SymName = $Fields[3];
1182			} else {
1183				next;
1184			}
1185
1186			$Symbols{$SymName}{$Obj}[$ObjFlag] |= $Cpyr;
1187			$Objects{$Obj}{$SymName} |= $Cpyr;
1188			$Copy = 1;
1189		}
1190	} else {
1191		$Exec = 0;
1192	}
1193
1194	# Obtain the dynamic symbol table for this object.  Symbol tables can
1195	# be quite large, so open the elfump command through a pipe.
1196	open($FileHandle, "LC_ALL=C elfdump -sN.dynsym '$Obj' 2> /dev/null |");
1197
1198	# Now process all symbols.
1199	while (defined(my $Line = <$FileHandle>)) {
1200		chomp($Line);
1201
1202		my (@Fields) = split(' ', $Line);
1203		my ($Flags);
1204
1205		# We're only interested in defined non-reserved symbol entries.
1206		# Note, ABS and NOTY symbols of non-zero size have been known to
1207		# occur, so capture them.
1208		if (($#Fields < 8) || ($Fields[4] !~ $GlobWeak) ||
1209		    ($Fields[7] =~ $UndefSym) || (!$opt{a} &&
1210		    ($Fields[7] =~ $IgnSyms) && (oct($Fields[2]) eq 0))) {
1211			next;
1212		}
1213
1214		# If we're found copy relocations, save the address and names
1215		# of any OBJT definitions, together with the copy symbol.
1216		if ($Copy && ($Fields[3] eq 'OBJT')) {
1217			push(@{$AddrToName{$Fields[1]}}, $Fields[8]);
1218		}
1219		if (($Symbols{$Fields[8]}{$Obj}) &&
1220		    ($Symbols{$Fields[8]}{$Obj}[$ObjFlag] & $Cpyr)) {
1221			$NameToAddr{$Fields[8]} = $Fields[1];
1222		}
1223
1224		# If the symbol visibility is protected, this is an internal
1225		# symbolic binding (NOTE, an INTERNAL visibility for a global
1226		# symbol is invalid, but for a while ld(1) was setting this
1227		# attribute mistakenly for protected).
1228		# If this is a dynamic executable, mark its symbols as protected
1229		# (they can't be interposed on any more than symbols defined
1230		# protected within shared objects).
1231		$Flags = $Glob | $Fltr;
1232		if (($Fields[5] =~ /^[IP]$/) || $Symb || $Exec) {
1233			$Flags |= $Prot;
1234		}
1235
1236		# If this object is marked as an interposer, tag each symbol.
1237		if ($Interpose) {
1238			$Flags |= $Intp;
1239		}
1240
1241		# Identify the symbol as a function or data type, and for the
1242		# latter, capture the symbol size.  Ignore the standard
1243		# symbolic labels, as we don't want to type them.
1244		if ($Fields[8] !~ $MultSyms) {
1245			if ($Fields[3] =~ /^FUNC$/) {
1246				$Flags |= $Func;
1247			} elsif ($Fields[3] =~ /^OBJT$/) {
1248				my ($Size) = $Fields[2];
1249
1250				if (oct($Size) eq 0) {
1251					$Size = "0";
1252				} else {
1253					$Size =~ s/0x0*/0x/;
1254				}
1255				$Flags |= $Objt;
1256				$Symbols{$Fields[8]}{$Obj}[$ObjSize] = $Size;
1257			}
1258		}
1259
1260		$Symbols{$Fields[8]}{$Obj}[$ObjFlag] |= $Flags;
1261		$Objects{$Obj}{$Fields[8]} |= $Flags;
1262
1263		# If the version field is non-null this object has already been
1264		# versioned.
1265		if (($Vers == 0) && ($Fields[6] ne '0')) {
1266			$Versioned{$Obj} = 1;
1267			$Vers = 1;
1268		}
1269	}
1270	close($FileHandle);
1271
1272	# Obtain any symbol information table for this object.  Symbol tables can
1273	# be quite large, so open the elfump command through a pipe.
1274	open($FileHandle, "LC_ALL=C elfdump -y '$Obj' 2> /dev/null |");
1275
1276	# Now process all symbols.
1277	while (defined(my $Line = <$FileHandle>)) {
1278		chomp($Line);
1279
1280		my (@Fields) = split(' ', $Line);
1281		my ($Flags) = 0;
1282
1283		# Binding attributes are in the second column.
1284		if ($#Fields < 1) {
1285			next;
1286		}
1287		if ($Fields[1] =~ /N/) {
1288			$Flags |= $Nodi
1289		}
1290		if ($Fields[1] =~ /F/) {
1291			$Flags |= $Ssft;
1292		}
1293		if ($Fields[1] =~ /A/) {
1294			$Flags |= $Saft;
1295		}
1296
1297		# Determine the symbol name based upon the number of fields.
1298		if ($Flags && $Symbols{$Fields[$#Fields]}{$Obj}) {
1299			$Symbols{$Fields[$#Fields]}{$Obj}[$ObjFlag] |= $Flags;
1300			$Objects{$Obj}{$Fields[$#Fields]} |= $Flags;
1301		}
1302	}
1303	close($FileHandle);
1304
1305	# If this symbol has already been marked as a copy-relocation reference,
1306	# see if this symbol has any aliases, which should also be marked.
1307	if ($Copy) {
1308		foreach my $SymName (keys(%NameToAddr)) {
1309			my ($Addr) = $NameToAddr{$SymName};
1310
1311			# Determine all symbols that have the same address.
1312			foreach my $AliasName (@{$AddrToName{$Addr}}) {
1313				if ($SymName eq $AliasName) {
1314					next;
1315				}
1316				$Symbols{$AliasName}{$Obj}[$ObjFlag] |= $Cpyr;
1317				$Objects{$Obj}{$AliasName} |= $Cpyr;
1318			}
1319		}
1320	}
1321}
1322
1323# Demangle a symbol name if required.
1324sub Demangle
1325{
1326	my ($SymName) = @_;
1327	my ($DemName);
1328
1329	if ($opt{C}) {
1330		my (@Dem);
1331
1332		# Determine if we've already demangled this name.
1333		if (exists($DemSyms{$SymName})) {
1334			return $DemSyms{$SymName};
1335		}
1336
1337		@Dem = split(/\n/, `dem '$SymName'`);
1338		foreach my $Line (@Dem) {
1339			my (@Fields) = split(' ', $Line);
1340
1341			if (($#Fields < 2) || ($Fields[1] ne '==') ||
1342			    ($Fields[0] eq $Fields[2])) {
1343				next;
1344			}
1345			$DemName = $Line;
1346			$DemName =~ s/.*== (.*)$/ \[$1]/;
1347			$DemSyms{$SymName} = $DemName;
1348			return($DemName);
1349		}
1350	}
1351	$DemSyms{$SymName} = "";
1352	return("");
1353}
1354