xref: /titanic_52/usr/src/cmd/sgs/lari/lari.pl (revision 56a424cca6b3f91f31bdab72a4626c48c779fe8b)
1#!/usr/perl5/bin/perl -w
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or http://www.opensolaris.org/os/licensing.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22
23#
24# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
25# Use is subject to license terms.
26#
27# ident	"%Z%%M%	%I%	%E% SMI"
28#
29# Link Analysis of Runtime Interfaces.
30#
31
32# Define all global variables (required for strict)
33use vars  qw($Prog $DestDir $ObjRef $ObjFlag $ObjSize $TmpDir $LddArgs);
34use vars  qw($Glob $Intp $Cpyr $Prot $Extn $Self $Filt $Dirc $Plta $User $Func);
35use vars  qw($Objt $UndefSym $IgnSyms $Rtld $MultSyms $CrtSyms $GlobWeak);
36use vars  qw($DbgSeed %opt %Symbols %Objects %Versioned %DemSyms);
37use vars  qw($Platform $Nodi $Osft $Oaft $Ssft $Saft $Msft);
38
39use strict;
40
41use Getopt::Std;
42use File::Basename;
43
44# Pattern match to skip objects.
45$Rtld = qr{
46	/lib/ld\.so\.1 |
47	/usr/lib/ld\.so\.1 |
48	/lib/sparcv9/ld\.so\.1 |
49	/usr/lib/sparcv9/ld\.so\.1 |
50	/lib/amd64/ld\.so\.1 |
51	/usr/lib/amd64/ld\.so\.1
52}x;
53
54# Pattern matching required to determine a global symbol.
55$GlobWeak = qr{ ^(?:
56	GLOB |
57	WEAK
58	)$
59}x;
60
61# Pattern matching to determine link-editor specific symbols and those common
62# to the compilation environment (ie. provided by all crt's).
63$MultSyms = qr{ ^(?:
64	 _DYNAMIC |
65	 _GLOBAL_OFFSET_TABLE_ |
66	 _PROCEDURE_LINKAGE_TABLE_ |
67	 _etext |
68	 _edata |
69	 _end |
70	 _init |
71	 _fini |
72	 _lib_version |			# Defined in values
73	 __xpg4 |			# Defined in values
74	 __xpg6				# Defined in values
75	)$
76}x;
77
78$CrtSyms = qr{ ^(?:
79	 ___Argv |			# Defined in crt
80	 __environ_lock |		# Defined in crt
81	 _environ |			# Defined in crt
82	 environ			# Defined in crt
83	 )$
84}x;
85
86# Pattern match to remove undefined, NOTY and versioning symbols.
87$UndefSym = qr{ ^(?:
88	UNDEF
89	)$
90}x;
91
92$IgnSyms = qr{ ^(?:
93	NOTY |
94	ABS
95	)$
96}x;
97
98# Symbol flags.
99$Glob = 0x00001;	# symbol is global
100$Intp = 0x00010;	# symbol originates for explicit interposer
101$Dirc = 0x00020;	# symbol bound to directly
102$Cpyr = 0x00040;	# symbol bound to copy-relocation reference
103$Prot = 0x00080;	# symbol is protected (symbolic)
104$Extn = 0x00100;	# symbol has been bound to from an external reference
105$Self = 0x00200;	# symbol has been bound to from the same object
106$Filt = 0x00400;	# symbol bound to a filtee
107$Plta = 0x00800;	# symbol bound to executables plt address
108$User = 0x01000;	# symbol binding originates from user (dlsym) request
109$Func = 0x02000;	# symbol is of type function
110$Objt = 0x04000;	# symbol is of type object
111$Nodi = 0x08000;	# symbol prohibits direct binding
112
113$Osft = 0x10000;	# symbol is an standard object filter
114$Oaft = 0x20000;	# symbol is an auxiliary object filter
115$Ssft = 0x40000;	# symbol is a per-symbol standard filter
116$Saft = 0x80000;	# symbol is a per-symbol auxilary filter
117$Msft = 0xf0000;	# filter mask
118
119# Offsets into $Symbols{$SymName}{$Obj} array.
120$ObjRef =	0;
121$ObjFlag =	1;
122$ObjSize =	2;
123
124
125# Establish locale
126use POSIX qw(locale_h);
127use Sun::Solaris::Utils qw(textdomain gettext);
128
129setlocale(LC_ALL, "");
130textdomain("SUNW_OST_SGS");
131
132# Establish a program name for any error diagnostics.
133$Prog = basename($0);
134
135sub inappropriate {
136	my ($Opt1, $Opt2, $Flag) = @_;
137
138	if ($Flag) {
139	    printf STDERR
140		gettext("%s: inappropriate use of %s with %s: %s ignored\n"),
141		$Prog, $Opt1, $Opt2, $Opt1;
142	} else {
143	    printf STDERR
144		gettext("%s: inappropriate use of %s without %s: %s ignored\n"),
145		$Prog, $Opt1, $Opt2, $Opt1;
146	}
147}
148
149# Cleanup any temporary files on interruption
150sub Cleanup {
151	my ($Sig) = @_;
152
153	$SIG{$Sig} = 'IGNORE';
154
155	if ($DbgSeed ne "") {
156		foreach my $File (<\Q${DbgSeed}\E.*>) {
157			if ($File =~ /^\Q$DbgSeed\E\.\d+$/) {
158				unlink($File);
159			}
160		}
161	}
162	exit 1;
163}
164
165# Check that we have arguments.
166if ((getopts('abCDd:imosv', \%opt) == 0) || ($#ARGV < 0)) {
167	printf STDERR gettext("usage:\n");
168	printf STDERR
169	    gettext("    %s [-bCDsv] [-a | -i | -o ] file | dir ...\n"), $Prog;
170	printf STDERR
171	    gettext("    %s [-CDosv] [-m [-d mapdir]] file\n"), $Prog;
172	print STDERR
173	    gettext("\t[-a]     print diagnostics for all symbols\n");
174	print STDERR
175	    gettext("\t[-b]     print diagnostics for multiple-bound " .
176		"symbols\n");
177	print STDERR
178	    gettext("\t[-C]     print demangled symbol names also\n");
179	print STDERR
180	    gettext("\t[-D]     read debugging information from \"file\"\n");
181	print STDERR
182	    gettext("\t[-d dir] create mapfiles in \"mapdir\"\n");
183	print STDERR
184	    gettext("\t[-i]     print interesting information (default)\n");
185	print STDERR
186	    gettext("\t[-m]     create mapfiles for interface requirements\n");
187	print STDERR
188	    gettext("\t[-o]     print overhead information\n");
189	print STDERR
190	    gettext("\t[-s]     save bindings information created by ldd(1)\n");
191	print STDERR
192	    gettext("\t[-v]     ignore versioned objects\n");
193	exit 1;
194} else {
195	my ($Mult, $Error);
196
197	# Catch any incompatible argument usage.
198	if ($opt{m}) {
199		if ($opt{a}) {
200			inappropriate("-a", "-m", 1);
201			$opt{a} = 0;
202		}
203		if ($opt{i}) {
204			inappropriate("-i", "-m", 1);
205			$opt{i} = 0;
206		}
207	} else {
208		if ($opt{d}) {
209			inappropriate("-d", "-m", 0);
210			$opt{d} = 0;
211		}
212	}
213	if ($opt{a}) {
214		if ($opt{o}) {
215			inappropriate("-a", "-o", 1);
216			$opt{o} = 0;
217		}
218		if ($opt{i}) {
219			inappropriate("-a", "-i", 1);
220			$opt{i} = 0;
221		}
222	}
223	if ($opt{o}) {
224		if ($opt{i}) {
225			inappropriate("-o", "-i", 1);
226			$opt{i} = 0;
227		}
228		if ($opt{b}) {
229			inappropriate("-o", "-b", 1);
230			$opt{b} = 0;
231		}
232	}
233
234	# If -m is used, only one input file is applicable.
235	if ($opt{m} && ($#ARGV != 0)) {
236		printf STDERR gettext("%s: only one input file is allowed " .
237		    "with the -m option\n"), $Prog;
238		exit 1;
239	}
240
241	# Insure any specified directory exists, or apply a default.
242	if ($opt{d}) {
243		# User specified directory - make sure it exists.
244		if (! -d $opt{d}) {
245			printf STDERR gettext("%s: %s is not a directory\n"),
246			    $Prog, $opt{d};
247			exit 1;
248		}
249		$DestDir = $opt{d};
250	} else {
251		$DestDir = ".";
252	}
253
254	# Establish a temporary directory if necessary.
255	if (!$opt{D}) {
256		if (!($TmpDir = $ENV{TMPDIR}) || (! -d $TmpDir)) {
257			$TmpDir = "/tmp";
258		}
259	}
260
261	# Establish any initial ldd(1) argument requirements.
262	if ($LddArgs = $ENV{LARI_LDD_ARGS}) {
263		$LddArgs = $LddArgs . ' -r -e LD_DEBUG=bindings,files,detail';
264	} else {
265		$LddArgs = '-r -e LD_DEBUG=bindings,files,detail';
266	}
267
268	# If we've been asked to demangle symbols, make sure we can find the
269	# demangler.
270	if ($opt{C}) {
271		my ($DemName) = `dem XXXX 2> /dev/null`;
272		if (!$DemName) {
273			printf STDERR gettext("%s: can not locate demangler: " .
274			    "-C ignored\n"), $Prog;
275			$opt{C} = 0;
276		}
277	}
278
279	# If -a or -o hasn't been specified, default to -i.
280	if (!$opt{a} && !$opt{o}) {
281		$opt{i} = 1;
282	}
283
284	# Determine whether we have multiple input files.
285	if ($#ARGV == 0) {
286		$Mult = 0;
287	} else {
288		$Mult = 1;
289	}
290
291	# Determine what platform we're running on - some inappropriate
292	# platform specific dependencies are better skipped.
293	chomp($Platform = `uname -i`);
294
295	# Establish signal handlers
296	$SIG{INT} = \&Cleanup;
297	$SIG{QUIT} = \&Cleanup;
298
299	$DbgSeed = "";
300
301	# For each argument determine if we're dealing with a file or directory.
302	$Error = 0;
303	foreach my $Arg (@ARGV) {
304		if (!stat($Arg)) {
305			printf STDERR gettext("%s: %s: unable to stat file\n"),
306			    $Prog, $Arg;
307			$Error = 1;
308			next;
309		}
310
311		# Process simple files.
312		if (-f _) {
313			if (!-r _) {
314				printf STDERR gettext("%s: %s: unable to " .
315				   "read file\n"), $Prog, $Arg;
316				$Error = 1;
317				next;
318			}
319			if (!$opt{D}) {
320				if (ProcFile($Arg, $Mult, 1) == 0) {
321					$Error = 1;
322				}
323			} else {
324				# If the -D option is specified, read the
325				# bindings debugging information from the
326				# specified file.
327				if ($Mult) {
328					print STDOUT "$Arg:\n";
329				}
330				ProcBindings($Arg, $Mult, $Arg);
331			}
332			next;
333		}
334
335		# Process directories.
336		if (-d _) {
337			ProcDir($Arg);
338			next;
339		}
340
341		printf STDERR gettext("%s: %s: is not a file or directory\n"),
342		    $Prog, $Arg;
343		$Error = 1;
344	}
345	exit $Error;
346}
347
348sub ProcDir {
349	my ($Dir) = @_;
350	my ($File);
351
352	# Open the directory and read each entry, omit "." and "..".  Sorting
353	# the directory listing makes analyzing different source hierarchies
354	# easier.
355	if (opendir(DIR, $Dir)) {
356		foreach my $Entry (sort(readdir(DIR))) {
357			if (($Entry eq '.') || ($Entry eq '..')) {
358				next;
359			}
360
361			# If we're decending into a platform directory, ignore
362			# any inappropriate platform specific files.  These
363			# files can have dependencies that in turn bring in the
364			# appropriate platform specific file, resulting in more
365			# than one dependency offering the same interfaces.  In
366			# practice, the non-appropriate platform specific file
367			# wouldn't be loaded with a process.
368			if (($Dir =~ /\/platform$/) &&
369			    ($Entry !~ /^$Platform$/)) {
370				next;
371			}
372
373			$File = "$Dir/$Entry";
374			if (!lstat($File)) {
375				next;
376			}
377			# Ignore symlinks.
378			if (-l _) {
379				next;
380			}
381
382			# Descend into, and process any directories.
383			if (-d _) {
384				ProcDir($File);
385				next;
386			}
387
388			# Process any standard files.
389			if (-f _ && -r _) {
390				ProcFile($File, 1, 0);
391				next;
392
393			}
394		}
395		closedir(DIR);
396	}
397}
398
399# Process a file.  If the file was explicitly defined on the command-line, and
400# an error occurs, tell the user.  Otherwise, this file probably came about from
401# scanning a directory, in which case just skip it and move on.
402sub ProcFile {
403	my ($File, $Mult, $CmdLine) = @_;
404	my (@Ldd, $NoFound, $DbgFile, @DbgGlob, $Type);
405
406	# If we're scanning a directory (ie. /lib) and have picked up ld.so.1,
407	# ignore it.
408	if (($CmdLine eq 0) && ($File =~ $Rtld)) {
409		return 1;
410	}
411
412	$Type = `LC_ALL=C file '$File' 2>&1`;
413	if (($Type !~ /dynamically linked/) || ($Type =~ /Sun demand paged/)) {
414		if ($CmdLine) {
415			printf STDERR gettext("%s: %s: is an invalid file " .
416			    "type\n"), $Prog, $File;
417		}
418		return 0;
419	}
420
421	# Create a temporary filename for capturing binding information.
422	$DbgSeed = basename($File);
423	$DbgSeed = "$TmpDir/lari.dbg.$$.$DbgSeed";
424
425	# Exercise the file under ldd(1), capturing all the bindings.
426	@Ldd = split(/\n/,
427	    `LC_ALL=C ldd $LddArgs -e LD_DEBUG_OUTPUT='$DbgSeed' '$File' 2>&1`);
428
429	# If ldd isn't -e capable we'll get a usage message.  The -e option was
430	# introduced in Solaris 9 and related patches.  Also, make sure the user
431	# sees any ldd errors.
432	$NoFound = 0;
433	for my $Line (@Ldd) {
434		if ($Line =~ /^usage: ldd/) {
435			printf STDERR gettext("%s: ldd: does not support -e, " .
436			    "unable to capture bindings output\n"), $Prog;
437			exit 1;
438		}
439		if ($Line =~ /not found/) {
440			$NoFound = 1;
441			last;
442		}
443	}
444
445	# The runtime linker will have appended a process id to the debug file.
446	# As we have to intuit the name, make sure there is only one debug
447	# file match, otherwise there must be some clutter in the output
448	# directory that is going to mess up our analysis.
449	foreach my $Match (<\Q${DbgSeed}\E.*>) {
450		if ($Match =~ /^\Q$DbgSeed\E\.\d+$/) {
451			push(@DbgGlob, $Match);
452		}
453	}
454	if (@DbgGlob == 0) {
455		# If there is no debug file, bail.  This can occur if the file
456		# being processed is secure.
457		if ($CmdLine) {
458			printf STDERR gettext("%s: %s: unable to capture " .
459			    "bindings output - possible secure application?\n"),
460			    $Prog, $File;
461		}
462		return 0;
463	} elsif (@DbgGlob > 1) {
464		# Too many debug files found.
465		if ($CmdLine) {
466			printf STDERR gettext("%s: %s: multiple bindings " .
467			    "output files exist: %s: clean up temporary " .
468			    "directory\n"), $Prog, $File, $DbgSeed;
469		}
470		return 0;
471	} else {
472		$DbgFile = $DbgGlob[0];
473	}
474
475	# Ok, we're ready to process the bindings information.  Print a header
476	# if necessary, and if there were any ldd(1) errors push some of them
477	# out before any bindings information.  Limit the output, as it can
478	# sometimes be excessive.  If there are errors, the bindings information
479	# is likely to be incomplete.
480	if ($Mult) {
481		print STDOUT "$File:\n";
482	}
483	if ($NoFound) {
484		my ($Cnt) = 4;
485
486		for my $Line (@Ldd) {
487			if ($Line =~ /not found/) {
488				print STDOUT "$Line\n";
489				$Cnt--;
490			}
491			if ($Cnt == 0) {
492				print STDOUT gettext("\tcontinued ...\n");
493				last;
494			}
495		}
496	}
497
498	# If the user wants the original debugging file left behind, rename it
499	# so that it doesn't get re-read by another instance of lari processing
500	# this file.
501	if ($opt{s}) {
502		rename($DbgFile, $DbgSeed);
503		$DbgFile = $DbgSeed;
504		printf STDOUT gettext("%s: %s: bindings information " .
505		    "saved as: %s\n"), $Prog, $File, $DbgFile;
506	}
507
508	ProcBindings($File, $Mult, $DbgFile);
509
510	# Now that we've finished with the debugging file, nuke it if necessary.
511	if (!$opt{s}) {
512		unlink($DbgFile);
513	}
514	$DbgSeed = "";
515	return 1;
516}
517
518sub ProcBindings {
519	my ($File, $Mult, $DbgFile) = @_;
520	my (%Filtees, $FileHandle);
521
522	# Reinitialize our arrays when we're dealing with multiple files.
523	if ($Mult) {
524		%Symbols = ();
525		%Objects = ();
526		%Versioned = ();
527	}
528
529	# As debugging output can be significant, read a line at a time.
530	open($FileHandle, "<$DbgFile");
531	while (defined(my $Line = <$FileHandle>)) {
532		chomp($Line);
533
534		# If we find a relationship between a filter and filtee, save
535		# it, we'll come back to this once we've gathered everybodies
536		# symbols.
537		if ($Line =~ /;  filtered by /) {
538			my ($Filtee) = $Line;
539			my ($Filter) = $Line;
540
541			# Separate the filter and filtee names, ignore the
542			# runtime linker.
543			$Filtee =~ s/^.*: file=(.*);  filtered by .*/$1/;
544			if ($Filtee =~ $Rtld) {
545				next;
546			}
547			$Filter =~ s/^.*;  filtered by //;
548			$Filtees{$Filtee}{$Filter} = 1;
549			next;
550		}
551
552		# If we find a configuration alternative, determine whether it
553		# is for one of our filtees, and if so record it.
554		if ($Line =~ / configuration alternate found:/) {
555			my ($Orig) = $Line;
556			my ($Altr) = $Line;
557
558			# Separate the original and alternative names.
559			$Orig =~ s/^.*: file=(.*)  .*$/$1/;
560			$Altr =~ s/^.* configuration alternate found: (.*)$/$1/;
561
562			for my $Filtee (keys(%Filtees)) {
563				if ($Filtee ne $Orig) {
564					next;
565				}
566				for my $Filter (keys(%{$Filtees{$Filtee}})) {
567					$Filtees{$Altr}{$Filter} = 1;
568				}
569			}
570			next;
571		}
572
573		# Collect the symbols from any file analyzed.
574		if ($Line =~ /^.*: file=(.*);  analyzing .*/) {
575			GetAllSymbols($1);
576			next;
577		}
578
579		# Process any symbolic relocations that bind to a file.
580		if ($Line =~ /: binding file=.* to file=/) {
581			my ($RefFile, $DstFile, $SymName);
582			my (@Syms, $Found, @Fields);
583			my ($BndInfo) = 0;
584			my ($Offset) = 1;
585			my ($Dlsym) = 0;
586			my ($Detail) = 0;
587
588			# For greatest flexibility, split the line into fields
589			# and walk each field until we find what we need.
590			@Fields = split(' ', $Line);
591
592			# The referencing file, "... binding file=".*".
593			while ($Fields[$Offset]) {
594				if ($Fields[$Offset] =~ /^file=(.*)/) {
595					$RefFile = $1;
596					$Offset++;
597					last;
598				}
599				$Offset++;
600			}
601			# The referencing offset, typically this is the address
602			# of the reference, "(0x1234...)", but in the case of a
603			# user lookup it's the string "(dlsym)".  If we don't
604			# find this offset information we've been given a debug
605			# file that didn't user the "datail" token, in which case
606			# we're not getting all the information we need.
607			if ($Fields[$Offset] =~ /^\((.*)\)/) {
608				if ($1 eq 'dlsym') {
609					$Dlsym = 1;
610				}
611				$Detail = 1;
612				$Offset++;
613			}
614			# The destination file, "... to file=".*".
615			while ($Fields[$Offset]) {
616				if ($Fields[$Offset] =~ /^file=(.*)/) {
617					$DstFile = $1;
618					$Offset++;
619					last;
620				}
621				$Offset++;
622			}
623			# The symbol being bound, "... symbol `.*' ...".
624			while ($Fields[$Offset]) {
625				if ($Fields[$Offset] =~ /^\`(.*)\'$/) {
626					$SymName = $1;
627					$Offset++;
628					last;
629				}
630				$Offset++;
631			}
632			# Possible trailing binding info, "... (direct,.*)$".
633			while ($Fields[$Offset]) {
634				if ($Fields[$Offset] =~ /^\((.*)\)$/) {
635					$BndInfo = $1;
636					$Offset++;
637					last;
638				}
639				$Offset++;
640			}
641
642			if ($Detail == 0) {
643				printf STDERR gettext("%s: %s: debug file " .
644				    "does not contain `detail' information\n"),
645				    $Prog, $DbgFile;
646				return;
647			}
648
649			# Collect the symbols from each object.
650			GetAllSymbols($RefFile);
651			GetAllSymbols($DstFile);
652
653			# Identify that this definition has been bound to.
654			$Symbols{$SymName}{$DstFile}[$ObjRef]++;
655			if ($RefFile eq $DstFile) {
656				# If the reference binds to a definition within
657				# the same file this symbol may be a candidate
658				# for reducing to local.
659				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Self;
660				$Objects{$DstFile}{$SymName} |= $Self;
661			} else {
662				# This symbol is required to satisfy an external
663				# reference.
664				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Extn;
665				$Objects{$DstFile}{$SymName} |= $Extn;
666			}
667
668			# Assign any other state indicated by the binding info
669			# associated with the diagnostic output.
670			if (!$BndInfo) {
671				next;
672			}
673
674			if ($BndInfo =~ /direct/) {
675				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Dirc;
676				$Objects{$DstFile}{$SymName} |= $Dirc;
677			}
678			if ($BndInfo =~ /copy-ref/) {
679				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Cpyr;
680				$Objects{$DstFile}{$SymName} |= $Cpyr;
681			}
682			if ($BndInfo =~ /filtee/) {
683				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Filt;
684				$Objects{$DstFile}{$SymName} |= $Filt;
685			}
686			if ($BndInfo =~ /interpose/) {
687				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Intp;
688				$Objects{$DstFile}{$SymName} |= $Intp;
689			}
690			if ($BndInfo =~ /plt-addr/) {
691				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Plta;
692				$Objects{$DstFile}{$SymName} |= $Plta;
693			}
694			if ($Dlsym) {
695				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $User;
696				$Objects{$DstFile}{$SymName} |= $User;
697			}
698		}
699	}
700	close($FileHandle);
701
702	# Now that we've processed all objects, complete any auxiliary filtee
703	# tagging.  For each filtee, determine which of the symbols it exports
704	# are also defined in its filters.  If a filtee is bound to, the
705	# runtime linkers diagnostics will indicate a filtee binding.  However,
706	# some of the filtee symbols may not be bound to, so here we mark them
707	# all so as to remove them from any interesting output.
708	for my $Filtee (keys(%Filtees)) {
709
710		# Standard filters aren't captured at all, as nothing can bind
711		# to them.
712		if (!exists($Objects{$Filtee})) {
713			next;
714		}
715
716		# Determine what symbols this filtee offers.
717		foreach my $SymName (keys(%{$Objects{$Filtee}})) {
718
719			# Ignore the usual reserved stuff.
720			if (!$opt{a} && (($SymName =~ $MultSyms) ||
721			    ($SymName =~ $CrtSyms))) {
722				next;
723			}
724
725			# Determine whether this symbol exists in our filter.
726			for my $Filter (keys(%{$Filtees{$Filtee}})) {
727				if (!$Symbols{$SymName}{$Filter}) {
728					next;
729				}
730				if (!($Symbols{$SymName}{$Filter}[$ObjFlag] &
731				    $Msft)) {
732					next;
733				}
734				$Symbols{$SymName}{$Filtee}[$ObjFlag] |= $Filt;
735			}
736		}
737	}
738
739	# Process objects and their symbols as required.
740	if ($opt{m}) {
741		# If we're creating a mapfile, traverse each object we've
742		# collected.
743		foreach my $Obj (keys(%Objects)) {
744			my ($File, $Path);
745
746			# Skip any objects that should be ignored.
747			if ($Obj =~ $Rtld) {
748				next;
749			}
750
751			# Skip any versioned objects if required.
752			if ($opt{v} && $Versioned{$Obj}) {
753				next;
754			}
755
756			# Open the mapfile if required.
757			$File = basename($Obj);
758			$Path = "$DestDir/mapfile-$File";
759			if (!open(MAPOUT, "> $Path")) {
760				printf STDERR gettext("%s: %s: open failed:" .
761				    "%s\n"), $Prog, $Path, $!;
762				exit 1;
763			}
764
765			# Establish the mapfile preamble.
766			print MAPOUT "#\n# Interface Definition mapfile for:\n";
767			print MAPOUT "#\tDynamic Object: $Obj\n";
768			print MAPOUT "#\tProcess:        $File\n#\n\n";
769
770			# Process each global symbol.
771			print MAPOUT "$File {\n\tglobal:\n";
772
773			foreach my $SymName (sort(keys(%{$Objects{$Obj}}))) {
774				my ($Flag) = $Objects{$Obj}{$SymName};
775
776				# For the first pass we're only interested in
777				# symbols that have been bound to from an
778				# external object, or must be global to enable
779				# a binding to an interposing definition.
780				# Skip bindings to ourself as these are
781				# candidates for demoting to local.
782				if (!($Flag & ($Extn | $Intp))) {
783					next;
784				}
785				if (($Flag & ($Extn | $Self)) == $Self) {
786					next;
787				}
788
789				# Add the demangled name as a comment if
790				# required.
791				if ($opt{C}) {
792					my ($DemName) = Demangle($SymName);
793
794					if ($DemName ne "") {
795						print MAPOUT "\t\t#$DemName\n";
796					}
797				}
798				print MAPOUT "\t\t$SymName;\n";
799			}
800
801			# Process each local demotion.
802			print MAPOUT "\tlocal:\n";
803
804			if ($opt{o}) {
805				foreach my $SymName
806				    (sort(keys(%{$Objects{$Obj}}))) {
807					my ($Flag) = $Objects{$Obj}{$SymName};
808
809					# For this pass we're only interested
810					# in symbol definitions that haven't
811					# been bound to, or have only been
812					# bound to from the same object.
813					if ($Flag & $Extn) {
814						next;
815					}
816
817					# Add the demangled name as a comment if
818					# required.
819					if ($opt{C}) {
820						my ($DemName) =
821						    Demangle($SymName);
822
823						if ($DemName ne "") {
824							print MAPOUT
825							    "\t\t#$DemName\n";
826						}
827					}
828					print MAPOUT "\t\t$SymName;\n";
829				}
830			}
831
832			# Capture everything else as local.
833			print MAPOUT "\t\t\*;\n};\n";
834			close MAPOUT;
835		}
836
837	} else {
838		# If we're gathering information regarding the symbols used by
839		# the process, automatically sort any standard output using the
840		# symbol name.
841		if (!open(SORT, "| sort +1")) {
842			printf STDERR gettext("%s: fork failed: %s\n"),
843			    $Prog, $!;
844			exit 1;
845		}
846
847		foreach my $SymName (keys(%Symbols)) {
848			my ($Cnt);
849
850			# If we're looking for interesting symbols, inspect
851			# each definition of each symbol.  If one is found to
852			# be interesting, the whole family are printed.
853			if (($Cnt = Interesting($SymName)) == 0) {
854				next;
855			}
856
857			# We've found something interesting, or all symbols
858			# should be output.  List all objects that define this
859			# symbol.
860			foreach my $Obj (keys(%{$Symbols{$SymName}})) {
861				my ($DemName, $Type);
862				my ($Flag) = $Symbols{$SymName}{$Obj}[$ObjFlag];
863				my ($Str) = "$Cnt:";
864
865				# Do we just want overhead symbols.  Consider
866				# copy-relocations, and plt address binding,
867				# as overhead too.
868				if ($opt{o} && (($Flag &
869				    ($Extn | $Cpyr | $Plta)) == $Extn)) {
870					next;
871				}
872
873				# Do we just want all symbols that have been
874				# bound to.
875				if (($opt{a} || $opt{o}) && $opt{b} &&
876				    (($Flag & ($Extn | $Self | $Prot)) == 0)) {
877					next;
878				}
879
880				# If we haven't been asked for all symbols, only
881				# print those reserved symbols that have been
882				# bound to, as the number of reserved symbols
883				# can be quite excessive.
884				if (!$opt{a} && ((($SymName =~ $MultSyms) &&
885				    (($Flag & ($Extn | $Self)) == 0)) ||
886				    (($SymName =~ $CrtSyms) && (($Flag &
887				    ($Extn | $Self | $Prot)) == 0)))) {
888					next;
889				}
890
891				# Skip any versioned objects if required.
892				if ($opt{v} && $Versioned{$Obj}) {
893					next;
894				}
895
896				# Display this symbol.
897				if ($Symbols{$SymName}{$Obj}[$ObjRef]) {
898					$Str = $Str .
899					    $Symbols{$SymName}{$Obj}[$ObjRef];
900				} else {
901					$Str = $Str . '0';
902				}
903
904				# Has the symbol been bound to externally
905				if ($Flag & $Extn) {
906					$Str = $Str . 'E';
907				}
908				# Has the symbol been bound to from the same
909				# object.
910				if ($Flag & $Self) {
911					$Str = $Str . 'S';
912				}
913				# Has the symbol been bound to directly.
914				if ($Flag & $Dirc) {
915					$Str = $Str . 'D';
916				}
917				# Does this symbol originate for an explicit
918				# interposer.
919				if ($Flag & $Intp) {
920					$Str = $Str . 'I';
921				}
922				# Is this symbol the reference data of a copy
923				# relocation.
924				if ($Flag & $Cpyr) {
925					$Str = $Str . 'C';
926				}
927				# Is this symbol part of filtee.
928				if ($Flag & $Filt) {
929					$Str = $Str . 'F';
930				}
931				# Is this symbol protected (in which case there
932				# may be a symbolic binding within the same
933				# object to this symbol).
934				if ($Flag & $Prot) {
935					$Str = $Str . 'P';
936				}
937				# Is this symbol an executables .plt address.
938				if ($Flag & $Plta) {
939					$Str = $Str . 'A';
940				}
941				# Does this binding originate from a user
942				# (dlsym) request.
943				if ($Flag & $User) {
944					$Str = $Str . 'U';
945				}
946				# Does this definition redirect the binding.
947				if ($Flag & $Msft) {
948					$Str = $Str . 'R';
949				}
950				# Does this definition explicity define no
951				# direct binding.
952				if ($Flag & $Nodi) {
953					$Str = $Str . 'N';
954				}
955
956				# Determine whether this is a function or a data
957				# object.  For the latter, display the symbol
958				# size.  Otherwise, the symbol is a reserved
959				# label, and is left untyped.
960				if ($Flag & $Func) {
961					$Type = '()';
962				} elsif ($Flag & $Objt) {
963					$Type = '[' .
964					    $Symbols{$SymName}{$Obj}[$ObjSize] .
965					']';
966				} else {
967					$Type = "";
968				}
969
970				# Demangle the symbol name if desired.
971				$DemName = Demangle($SymName);
972
973				if ($Mult) {
974					print SORT "  [$Str]: " .
975					    "$SymName$Type$DemName: $Obj\n";
976				} else {
977					print SORT "[$Str]: " .
978					    "$SymName$Type$DemName: $Obj\n";
979				}
980			}
981		}
982		close SORT;
983	}
984}
985
986# Heuristics to determine whether a symbol binding is interesting.  In most
987# applications there can be a large amount of symbol binding information to
988# wade through.  The most typical binding, to a single definition, probably
989# isn't interesting or the cause of unexpected behavior.  Here, we try and
990# determine those bindings that may can cause unexpected behavior.
991#
992# Note, this routine is actually called for all symbols so that their count
993# can be calculated in one place.
994sub Interesting
995{
996	my ($SymName) = @_;
997	my ($ObjCnt, $GFlags, $BndCnt, $FltCnt, $NodiCnt, $RdirCnt, $ExRef);
998
999	# Scan all definitions of this symbol, thus determining the definition
1000	# count, the number of filters, redirections, executable references
1001	# (copy-relocations, or plt addresses), no-direct bindings, and the
1002	# number of definitions that have been bound to.
1003	$ObjCnt = $GFlags = $BndCnt = $FltCnt =
1004		$NodiCnt = $RdirCnt = $ExRef = 0;
1005	foreach my $Obj (keys(%{$Symbols{$SymName}})) {
1006		my ($Flag) = $Symbols{$SymName}{$Obj}[$ObjFlag];
1007
1008		# Ignore standard filters when determining the symbol count, as
1009		# a standard filter can never be bound to.
1010		if (($Flag & ($Osft | $Ssft)) == 0) {
1011			$ObjCnt++;
1012		}
1013
1014		$GFlags |= $Flag;
1015		if ($Flag & $Filt) {
1016			$FltCnt++;
1017		}
1018		if ($Flag & $Nodi) {
1019			$NodiCnt++;
1020		}
1021		if ($Flag & ($Cpyr | $Plta)) {
1022			$ExRef++;
1023		}
1024		if ($Flag & $Msft) {
1025			$RdirCnt++;
1026		}
1027
1028		# Ignore bindings to undefined .plts, and copy-relocation
1029		# references.  These are implementation details, rather than
1030		# a truly interesting multiple-binding.  If a symbol is tagged
1031		# as protected, count it as having bound to itself, even though
1032		# we can't tell if it's really been used.
1033		if (($Flag & ($Self | $Extn | $Prot)) &&
1034		    (($Flag & ($Plta | $Cpyr)) == 0)) {
1035			$BndCnt++;
1036		}
1037	}
1038
1039	# If we want all overhead symbols, return the count.
1040	if ($opt{o}) {
1041		return $ObjCnt;
1042	}
1043
1044	# If we want all symbols, return the count.  If we want all bound
1045	# symbols, return the count provided it is non-zero.
1046	if ($opt{a} && (!$opt{b} || ($BndCnt > 0))) {
1047		return $ObjCnt;
1048	}
1049
1050	# Single instance symbol definitions aren't very interesting.
1051	if ($ObjCnt == 1) {
1052		return 0;
1053	}
1054
1055	# Traverse each symbol definition looking for the following:
1056	#
1057	#   .	Multiple symbols are bound to externally.
1058	#   .	A symbol is bound to externally, and possibly symbolically.
1059	#
1060	# Two symbol bindings are acceptable in some cases, and thus aren't
1061	# interesting:
1062	#
1063	#   .	Copy relocations.  Here, the executable binds to a shared object
1064	#	to access the data definition, which is then copied to the
1065	#	executable.  All other references should then bind to the copied
1066	#	data.
1067	#   .	Non-plt relocations to functions that are referenced by the
1068	#	executable will bind to the .plt in the executable.  This
1069	#	provides for address comparison calculations (although plainly
1070	#	an overhead).
1071	#
1072	# Multiple symbol bindings are acceptable in some cases, and thus aren't
1073	# interesting:
1074	#
1075	#   .	Filtees.  Multiple filtees may exist for one filter.
1076	#
1077	if ((($ObjCnt == 2) && ($GFlags & ($Cpyr | $Plta))) ||
1078	    ($ObjCnt == ($FltCnt + 1))) {
1079		return 0;
1080	}
1081
1082	# Only display any reserved symbols if more than one binding has
1083	# occurred.
1084	if ((($SymName =~ $MultSyms) || ($SymName =~ $CrtSyms)) &&
1085	    ($BndCnt < 2)) {
1086		return (0);
1087	}
1088
1089	# For all other symbols, determine whether a binding has occurred.
1090	# Note: definitions within an executable are tagged as protected ("P")
1091	# as they may have been bound to from within the executable - we can't
1092	# tell.
1093	if ($opt{b} && ($BndCnt == 0)) {
1094		return (0);
1095	}
1096
1097	# Multiple instances of a definition, where all but one are filter
1098	# references and/or copy relocations, are also uninteresting.
1099	# Effectively, only one symbol is providing the final binding.
1100	if (($FltCnt && $RdirCnt) &&
1101	    (($FltCnt + $RdirCnt + $ExRef) == $ObjCnt)) {
1102		return (0);
1103	}
1104
1105	# Multiple instances of explicitly defined no-direct binding symbols
1106	# are known to occur, and their no-binding definition indicates they
1107	# are expected and accounted for.  Thus, these aren't interesting.
1108	if (($ExRef + $NodiCnt) == $ObjCnt) {
1109		return (0);
1110	}
1111
1112	# We have an interesting symbol, returns its count.
1113	return $ObjCnt;
1114}
1115
1116# Obtain the global symbol definitions of an object and determine whether the
1117# object has been versioned.
1118sub GetAllSymbols {
1119	my ($Obj) = @_;
1120	my (@Elfd, @Elfs, @Elfr, $Type, $Exec, $FileHandle);
1121	my (%AddrToName, %NameToAddr);
1122	my ($Vers) = 0;
1123	my ($Symb) = 0;
1124	my ($Copy) = 0;
1125	my ($Interpose) = 0;
1126	my ($Fltr) = 0;
1127
1128	# Determine whether we've already retrieved this object's symbols.
1129	# Also, ignore the runtime linker, it's on a separate link-map, and
1130	# except for the filtee symbols that might be bound via libdl, is
1131	# uninteresting.  Tag the runtime linker as versioned to simplify
1132	# possible -v processing.
1133	if ($Objects{$Obj}) {
1134		return;
1135	}
1136
1137	if ($Obj =~ $Rtld) {
1138		$Versioned{$Obj} = 1;
1139		return;
1140	}
1141
1142	# Get the dynamic information.
1143	@Elfd = split(/\n/, `LC_ALL=C elfdump -d '$Obj' 2> /dev/null`);
1144
1145	# If there's no information, it's possible we've been given a debug
1146	# output file and are processing it from a location from which the
1147	# dependencies specified in the debug file aren't accessible.
1148	if (!@Elfd) {
1149		printf STDERR gettext("%s: %s: unable to process ELF file\n"),
1150		    $Prog, $Obj;
1151
1152		# Add the file to our list, so that we don't create the same
1153		# message again.  Processing should continue so that we can
1154		# flush out as many error messages as possible.
1155		$Objects{$Obj}{"DoesNotExist"} = 0;
1156		return;
1157	}
1158
1159	# If we're processing a filter there's no need to save any symbols, as
1160	# no bindings will occur to this object.
1161	#
1162	# Determine whether we've got a symbolicly bound object.  With newer
1163	# linkers all symbols will be marked as protected ("P"), but with older
1164	# linkers this state could only be intuited from the symbolic dynamic
1165	# tag.
1166	foreach my $Line (@Elfd) {
1167		my (@Fields);
1168		@Fields = split(' ', $Line);
1169
1170		# Determine if the FILTER tag is set.
1171		if ($#Fields == 3) {
1172			if ($Fields[1] eq "FILTER") {
1173				$Fltr |= $Osft;
1174				next;
1175			}
1176			if ($Fields[1] eq "AUXILIARY") {
1177				$Fltr |= $Oaft;
1178				next;
1179			}
1180			next;
1181		}
1182
1183		# We're only interested in the FLAGS entry.
1184		if (($#Fields < 4) || ($Fields[1] !~ "^FLAGS")) {
1185			next;
1186		}
1187		if (($Fields[1] eq "FLAGS") && ($Line =~ " SYMBOLIC ")) {
1188			$Symb = 1;
1189			next;
1190		}
1191		if (($Fields[1] eq "FLAGS_1") && ($Line =~ " INTERPOSE ")) {
1192			$Interpose = 1;
1193		}
1194	}
1195
1196	# If this file is a dynamic executable, determine if this object has
1197	# any copy relocations so that any associated bindings can be labeled
1198	# more meaningfully.
1199	$Type = `LC_ALL=C file '$Obj'`;
1200	if ($Type =~ /executable/) {
1201		$Exec = 1;
1202		# Obtain any copy relocations.
1203		@Elfr = split(/\n/, `LC_ALL=C elfdump -r '$Obj' 2>&1`);
1204
1205		foreach my $Rel (@Elfr) {
1206			my ($SymName, @Fields);
1207
1208			if ($Rel !~ / R_[A-Z0-9]+_COPY /) {
1209				next;
1210			}
1211			@Fields = split(' ', $Rel);
1212			# Intel relocation records don't contain an addend,
1213			# where as every other supported platform does.
1214			if ($Fields[0] eq 'R_386_COPY') {
1215				$SymName = $Fields[3];
1216			} else {
1217				$SymName = $Fields[4];
1218			}
1219
1220			$Symbols{$SymName}{$Obj}[$ObjFlag] |= $Cpyr;
1221			$Objects{$Obj}{$SymName} |= $Cpyr;
1222			$Copy = 1;
1223		}
1224	} else {
1225		$Exec = 0;
1226	}
1227
1228	# Obtain the dynamic symbol table for this object.  Symbol tables can
1229	# be quite large, so open the elfump command through a pipe.
1230	open($FileHandle, "LC_ALL=C elfdump -sN.dynsym '$Obj' 2> /dev/null |");
1231
1232	# Now process all symbols.
1233	while (defined(my $Line = <$FileHandle>)) {
1234		chomp($Line);
1235
1236		my (@Fields) = split(' ', $Line);
1237		my ($Flags);
1238
1239		# We're only interested in defined non-reserved symbol entries.
1240		# Note, ABS and NOTY symbols of non-zero size have been known to
1241		# occur, so capture them.
1242		if (($#Fields < 8) || ($Fields[4] !~ $GlobWeak) ||
1243		    ($Fields[7] =~ $UndefSym) || (!$opt{a} &&
1244		    ($Fields[7] =~ $IgnSyms) && (oct($Fields[2]) eq 0))) {
1245			next;
1246		}
1247
1248		# If we're found copy relocations, save the address and names
1249		# of any OBJT definitions, together with the copy symbol.
1250		if ($Copy && ($Fields[3] eq 'OBJT')) {
1251			push(@{$AddrToName{$Fields[1]}}, $Fields[8]);
1252		}
1253		if (($Symbols{$Fields[8]}{$Obj}) &&
1254		    ($Symbols{$Fields[8]}{$Obj}[$ObjFlag] & $Cpyr)) {
1255			$NameToAddr{$Fields[8]} = $Fields[1];
1256		}
1257
1258		# If the symbol visibility is protected, this is an internal
1259		# symbolic binding (NOTE, an INTERNAL visibility for a global
1260		# symbol is invalid, but for a while ld(1) was setting this
1261		# attribute mistakenly for protected).
1262		# If this is a dynamic executable, mark its symbols as protected
1263		# (they can't be interposed on any more than symbols defined
1264		# protected within shared objects).
1265		$Flags = $Glob | $Fltr;
1266		if (($Fields[5] =~ /^[IP]$/) || $Symb || $Exec) {
1267			$Flags |= $Prot;
1268		}
1269
1270		# If this object is marked as an interposer, tag each symbol.
1271		if ($Interpose) {
1272			$Flags |= $Intp;
1273		}
1274
1275		# Identify the symbol as a function or data type, and for the
1276		# latter, capture the symbol size.  Ignore the standard
1277		# symbolic labels, as we don't want to type them.
1278		if ($Fields[8] !~ $MultSyms) {
1279			if ($Fields[3] =~ /^FUNC$/) {
1280				$Flags |= $Func;
1281			} elsif ($Fields[3] =~ /^OBJT$/) {
1282				my ($Size) = $Fields[2];
1283
1284				if (oct($Size) eq 0) {
1285					$Size = "0";
1286				} else {
1287					$Size =~ s/0x0*/0x/;
1288				}
1289				$Flags |= $Objt;
1290				$Symbols{$Fields[8]}{$Obj}[$ObjSize] = $Size;
1291			}
1292		}
1293
1294		$Symbols{$Fields[8]}{$Obj}[$ObjFlag] |= $Flags;
1295		$Objects{$Obj}{$Fields[8]} |= $Flags;
1296
1297		# If the version field is non-null this object has already been
1298		# versioned.
1299		if (($Vers == 0) && ($Fields[6] ne '0')) {
1300			$Versioned{$Obj} = 1;
1301			$Vers = 1;
1302		}
1303	}
1304	close($FileHandle);
1305
1306	# Obtain any symbol information table for this object.  Symbol tables can
1307	# be quite large, so open the elfump command through a pipe.
1308	open($FileHandle, "LC_ALL=C elfdump -y '$Obj' 2> /dev/null |");
1309
1310	# Now process all symbols.
1311	while (defined(my $Line = <$FileHandle>)) {
1312		chomp($Line);
1313
1314		my (@Fields) = split(' ', $Line);
1315		my ($Flags) = 0;
1316
1317		# Binding attributes are in the second column.
1318		if ($#Fields < 1) {
1319			next;
1320		}
1321		if ($Fields[1] =~ /N/) {
1322			$Flags |= $Nodi
1323		}
1324		if ($Fields[1] =~ /F/) {
1325			$Flags |= $Ssft;
1326		}
1327		if ($Fields[1] =~ /A/) {
1328			$Flags |= $Saft;
1329		}
1330
1331		# Determine the symbol name based upon the number of fields.
1332		if ($Flags && $Symbols{$Fields[$#Fields]}{$Obj}) {
1333			$Symbols{$Fields[$#Fields]}{$Obj}[$ObjFlag] |= $Flags;
1334			$Objects{$Obj}{$Fields[$#Fields]} |= $Flags;
1335		}
1336	}
1337	close($FileHandle);
1338
1339	# If this symbol has already been marked as a copy-relocation reference,
1340	# see if this symbol has any aliases, which should also be marked.
1341	if ($Copy) {
1342		foreach my $SymName (keys(%NameToAddr)) {
1343			my ($Addr) = $NameToAddr{$SymName};
1344
1345			# Determine all symbols that have the same address.
1346			foreach my $AliasName (@{$AddrToName{$Addr}}) {
1347				if ($SymName eq $AliasName) {
1348					next;
1349				}
1350				$Symbols{$AliasName}{$Obj}[$ObjFlag] |= $Cpyr;
1351				$Objects{$Obj}{$AliasName} |= $Cpyr;
1352			}
1353		}
1354	}
1355}
1356
1357# Demangle a symbol name if required.
1358sub Demangle
1359{
1360	my ($SymName) = @_;
1361	my ($DemName);
1362
1363	if ($opt{C}) {
1364		my (@Dem);
1365
1366		# Determine if we've already demangled this name.
1367		if (exists($DemSyms{$SymName})) {
1368			return $DemSyms{$SymName};
1369		}
1370
1371		@Dem = split(/\n/, `dem '$SymName'`);
1372		foreach my $Line (@Dem) {
1373			my (@Fields) = split(' ', $Line);
1374
1375			if (($#Fields < 2) || ($Fields[1] ne '==') ||
1376			    ($Fields[0] eq $Fields[2])) {
1377				next;
1378			}
1379			$DemName = $Line;
1380			$DemName =~ s/.*== (.*)$/ \[$1]/;
1381			$DemSyms{$SymName} = $DemName;
1382			return($DemName);
1383		}
1384	}
1385	$DemSyms{$SymName} = "";
1386	return("");
1387}
1388