xref: /illumos-gate/usr/src/cmd/sgs/lari/lari.pl (revision 98677c366f39bc9e671513615d9b1a2c6f15621d)
1#!/usr/perl5/bin/perl -w
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or http://www.opensolaris.org/os/licensing.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22
23#
24# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
25# Use is subject to license terms.
26#
27# ident	"%Z%%M%	%I%	%E% SMI"
28#
29# Link Analysis of Runtime Interfaces.
30#
31
32# Define all global variables (required for strict)
33use vars  qw($Prog $DestDir $ObjRef $ObjFlag $ObjSize $TmpDir $LddArgs);
34use vars  qw($Glob $Intp $Cpyr $Prot $Extn $Self $Filt $Dirc $Plta $User $Func);
35use vars  qw($Objt $UndefSym $IgnSyms $Rtld $MultSyms $CrtSyms $GlobWeak);
36use vars  qw($DbgSeed %opt %Symbols %Objects %Versioned %DemSyms);
37use vars  qw($Platform $Nodi $Osft $Oaft $Ssft $Saft $Msft);
38
39use strict;
40
41use Getopt::Std;
42use File::Basename;
43
44# Pattern match to skip objects.
45$Rtld = qr{ ^(?:
46	/lib/ld\.so\.1 |
47	/usr/lib/ld\.so\.1 |
48	/lib/sparcv9/ld\.so\.1 |
49	/usr/lib/sparcv9/ld\.so\.1 |
50	/lib/amd64/ld\.so\.1 |
51	/usr/lib/amd64/ld\.so\.1
52	)$
53}x;
54
55# Pattern matching required to determine a global symbol.
56$GlobWeak = qr{ ^(?:
57	GLOB |
58	WEAK
59	)$
60}x;
61
62# Pattern matching to determine link-editor specific symbols and those common
63# to the compilation environment (ie. provided by all crt's).
64$MultSyms = qr{ ^(?:
65	 _DYNAMIC |
66	 _GLOBAL_OFFSET_TABLE_ |
67	 _PROCEDURE_LINKAGE_TABLE_ |
68	 _etext |
69	 _edata |
70	 _end |
71	 _init |
72	 _fini |
73	 _lib_version |			# Defined in values
74	 __xpg4 |			# Defined in values
75	 __xpg6				# Defined in values
76	)$
77}x;
78
79$CrtSyms = qr{ ^(?:
80	 ___Argv |			# Defined in crt
81	 __environ_lock |		# Defined in crt
82	 _environ |			# Defined in crt
83	 environ			# Defined in crt
84	 )$
85}x;
86
87# Pattern match to remove undefined, NOTY and versioning symbols.
88$UndefSym = qr{ ^(?:
89	UNDEF
90	)$
91}x;
92
93$IgnSyms = qr{ ^(?:
94	NOTY |
95	ABS
96	)$
97}x;
98
99# Symbol flags.
100$Glob = 0x00001;	# symbol is global
101$Intp = 0x00010;	# symbol originates for explicit interposer
102$Dirc = 0x00020;	# symbol bound to directly
103$Cpyr = 0x00040;	# symbol bound to copy-relocation reference
104$Prot = 0x00080;	# symbol is protected (symbolic)
105$Extn = 0x00100;	# symbol has been bound to from an external reference
106$Self = 0x00200;	# symbol has been bound to from the same object
107$Filt = 0x00400;	# symbol bound to a filtee
108$Plta = 0x00800;	# symbol bound to executables plt address
109$User = 0x01000;	# symbol binding originates from user (dlsym) request
110$Func = 0x02000;	# symbol is of type function
111$Objt = 0x04000;	# symbol is of type object
112$Nodi = 0x08000;	# symbol prohibits direct binding
113
114$Osft = 0x10000;	# symbol is an standard object filter
115$Oaft = 0x20000;	# symbol is an auxiliary object filter
116$Ssft = 0x40000;	# symbol is a per-symbol standard filter
117$Saft = 0x80000;	# symbol is a per-symbol auxilary filter
118$Msft = 0xf0000;	# filter mask
119
120# Offsets into $Symbols{$SymName}{$Obj} array.
121$ObjRef =	0;
122$ObjFlag =	1;
123$ObjSize =	2;
124
125
126# Establish locale
127use POSIX qw(locale_h);
128use Sun::Solaris::Utils qw(textdomain gettext);
129
130setlocale(LC_ALL, "");
131textdomain("SUNW_OST_SGS");
132
133# Establish a program name for any error diagnostics.
134$Prog = basename($0);
135
136sub inappropriate {
137	my ($Opt1, $Opt2, $Flag) = @_;
138
139	if ($Flag) {
140	    printf STDERR
141		gettext("%s: inappropriate use of %s with %s: %s ignored\n"),
142		$Prog, $Opt1, $Opt2, $Opt1;
143	} else {
144	    printf STDERR
145		gettext("%s: inappropriate use of %s without %s: %s ignored\n"),
146		$Prog, $Opt1, $Opt2, $Opt1;
147	}
148}
149
150# Cleanup any temporary files on interruption
151sub Cleanup {
152	my ($Sig) = @_;
153
154	$SIG{$Sig} = 'IGNORE';
155
156	if ($DbgSeed ne "") {
157		foreach my $File (<\Q${DbgSeed}\E.*>) {
158			if ($File =~ /^\Q$DbgSeed\E\.\d+$/) {
159				unlink($File);
160			}
161		}
162	}
163	exit 1;
164}
165
166# Check that we have arguments.
167if ((getopts('abCDd:imosv', \%opt) == 0) || ($#ARGV < 0)) {
168	printf STDERR gettext("usage:\n");
169	printf STDERR
170	    gettext("    %s [-bCDsv] [-a | -i | -o ] file | dir ...\n"), $Prog;
171	printf STDERR
172	    gettext("    %s [-CDosv] [-m [-d mapdir]] file\n"), $Prog;
173	print STDERR
174	    gettext("\t[-a]     print diagnostics for all symbols\n");
175	print STDERR
176	    gettext("\t[-b]     print diagnostics for multiple-bound " .
177		"symbols\n");
178	print STDERR
179	    gettext("\t[-C]     print demangled symbol names also\n");
180	print STDERR
181	    gettext("\t[-D]     read debugging information from \"file\"\n");
182	print STDERR
183	    gettext("\t[-d dir] create mapfiles in \"mapdir\"\n");
184	print STDERR
185	    gettext("\t[-i]     print interesting information (default)\n");
186	print STDERR
187	    gettext("\t[-m]     create mapfiles for interface requirements\n");
188	print STDERR
189	    gettext("\t[-o]     print overhead information\n");
190	print STDERR
191	    gettext("\t[-s]     save bindings information created by ldd(1)\n");
192	print STDERR
193	    gettext("\t[-v]     ignore versioned objects\n");
194	exit 1;
195} else {
196	my ($Mult, $Error);
197
198	# Catch any incompatible argument usage.
199	if ($opt{m}) {
200		if ($opt{a}) {
201			inappropriate("-a", "-m", 1);
202			$opt{a} = 0;
203		}
204		if ($opt{i}) {
205			inappropriate("-i", "-m", 1);
206			$opt{i} = 0;
207		}
208	} else {
209		if ($opt{d}) {
210			inappropriate("-d", "-m", 0);
211			$opt{d} = 0;
212		}
213	}
214	if ($opt{a}) {
215		if ($opt{o}) {
216			inappropriate("-a", "-o", 1);
217			$opt{o} = 0;
218		}
219		if ($opt{i}) {
220			inappropriate("-a", "-i", 1);
221			$opt{i} = 0;
222		}
223	}
224	if ($opt{o} && $opt{i}) {
225		inappropriate("-o", "-i", 1);
226			$opt{i} = 0;
227	}
228
229	# If -m is used, only one input file is applicable.
230	if ($opt{m} && ($#ARGV != 0)) {
231		printf STDERR gettext("%s: only one input file is allowed " .
232		    "with the -m option\n"), $Prog;
233		exit 1;
234	}
235
236	# Insure any specified directory exists, or apply a default.
237	if ($opt{d}) {
238		# User specified directory - make sure it exists.
239		if (! -d $opt{d}) {
240			printf STDERR gettext("%s: %s is not a directory\n"),
241			    $Prog, $opt{d};
242			exit 1;
243		}
244		$DestDir = $opt{d};
245	} else {
246		$DestDir = ".";
247	}
248
249	# Establish a temporary directory if necessary.
250	if (!$opt{D}) {
251		if (!($TmpDir = $ENV{TMPDIR}) || (! -d $TmpDir)) {
252			$TmpDir = "/tmp";
253		}
254	}
255
256	# Establish any initial ldd(1) argument requirements.
257	if ($LddArgs = $ENV{LARI_LDD_ARGS}) {
258		$LddArgs = $LddArgs . ' -r -e LD_DEBUG=bindings,files,detail';
259	} else {
260		$LddArgs = '-r -e LD_DEBUG=bindings,files,detail';
261	}
262
263	# If we've been asked to demangle symbols, make sure we can find the
264	# demangler.
265	if ($opt{C}) {
266		my ($DemName) = `dem XXXX 2> /dev/null`;
267		if (!$DemName) {
268			printf STDERR gettext("%s: can not locate demangler: " .
269			    "-C ignored\n"), $Prog;
270			$opt{C} = 0;
271		}
272	}
273
274	# If -a or -o hasn't been specified, default to -i.
275	if (!$opt{a} && !$opt{o}) {
276		$opt{i} = 1;
277	}
278
279	# Determine whether we have a multiple input files.
280	if ($#ARGV == 0) {
281		$Mult = 0;
282	} else {
283		$Mult = 1;
284	}
285
286	# Determine what platform we're running on - some inappropriate
287	# platform specific dependencies are better skipped.
288	chomp($Platform = `uname -i`);
289
290	# Establish signal handlers
291	$SIG{INT} = \&Cleanup;
292	$SIG{QUIT} = \&Cleanup;
293
294	$DbgSeed = "";
295
296	# For each argument determine if we're dealing with a file or directory.
297	$Error = 0;
298	foreach my $Arg (@ARGV) {
299		if (!stat($Arg)) {
300			printf STDERR gettext("%s: %s: unable to stat file\n"),
301			    $Prog, $Arg;
302			$Error = 1;
303			next;
304		}
305
306		# Process simple files.
307		if (-f _) {
308			if (!-r _) {
309				printf STDERR gettext("%s: %s: unable to " .
310				   "read file\n"), $Prog, $Arg;
311				$Error = 1;
312				next;
313			}
314			if (!$opt{D}) {
315				if (ProcFile($Arg, $Mult, 1) == 0) {
316					$Error = 1;
317				}
318			} else {
319				# If the -D option is specified, read the
320				# bindings debugging information from the
321				# specified file.
322				if ($Mult) {
323					print STDOUT "$Arg:\n";
324				}
325				ProcBindings($Arg, $Mult, $Arg);
326			}
327			next;
328		}
329
330		# Process directories.
331		if (-d _) {
332			ProcDir($Arg);
333			next;
334		}
335
336		printf STDERR gettext("%s: %s: is not a file or directory\n"),
337		    $Prog, $Arg;
338		$Error = 1;
339	}
340	exit $Error;
341}
342
343sub ProcDir {
344	my ($Dir) = @_;
345	my ($File);
346
347	# Open the directory and read each entry, omit "." and "..".  Sorting
348	# the directory listing makes analyzing different source hierarchies
349	# easier.
350	if (opendir(DIR, $Dir)) {
351		foreach my $Entry (sort(readdir(DIR))) {
352			if (($Entry eq '.') || ($Entry eq '..')) {
353				next;
354			}
355
356			# If we're decending into a platform directory, ignore
357			# any inappropriate platform specific files.  These
358			# files can have dependencies that in turn bring in the
359			# appropriate platform specific file, resulting in more
360			# than one dependency offering the same interfaces.  In
361			# practice, the non-appropriate platform specific file
362			# wouldn't be loaded with a process.
363			if (($Dir =~ /\/platform$/) &&
364			    ($Entry !~ /^$Platform$/)) {
365				next;
366			}
367
368			$File = "$Dir/$Entry";
369			if (!lstat($File)) {
370				next;
371			}
372			# Ignore symlinks.
373			if (-l _) {
374				next;
375			}
376
377			# Descend into, and process any directories.
378			if (-d _) {
379				ProcDir($File);
380				next;
381			}
382
383			# Process any standard files.
384			if (-f _ && -r _) {
385				ProcFile($File, 1, 0);
386				next;
387
388			}
389		}
390		closedir(DIR);
391	}
392}
393
394# Process a file.  If the file was explicitly defined on the command-line, and
395# an error occurs, tell the user.  Otherwise, this file probably came about from
396# scanning a directory, in which case just skip it and move on.
397sub ProcFile {
398	my ($File, $Mult, $CmdLine) = @_;
399	my (@Ldd, $NoFound, $DbgFile, @DbgGlob, $Type);
400
401	$Type = `LC_ALL=C file '$File' 2>&1`;
402	if (($Type !~ /dynamically linked/) || ($Type =~ /Sun demand paged/)) {
403		if ($CmdLine) {
404			printf STDERR gettext("%s: %s: is an invalid file " .
405			    "type\n"), $Prog, $File;
406		}
407		return 0;
408	}
409
410	# Create a temporary filename for capturing binding information.
411	$DbgSeed = basename($File);
412	$DbgSeed = "$TmpDir/lari.dbg.$$.$DbgSeed";
413
414	# Exercise the file under ldd(1), capturing all the bindings.
415	@Ldd = split(/\n/,
416	    `LC_ALL=C ldd $LddArgs -e LD_DEBUG_OUTPUT='$DbgSeed' '$File' 2>&1`);
417
418	# If ldd isn't -e capable we'll get a usage message.  The -e option was
419	# introduced in Solaris 9 and related patches.  Also, make sure the user
420	# sees any ldd errors.
421	$NoFound = 0;
422	for my $Line (@Ldd) {
423		if ($Line =~ /^usage: ldd/) {
424			printf STDERR gettext("%s: ldd: does not support -e, " .
425			    "unable to capture bindings output\n"), $Prog;
426			exit 1;
427		}
428		if ($Line =~ /not found/) {
429			$NoFound = 1;
430			last;
431		}
432	}
433
434	# The runtime linker will have appended a process id to the debug file.
435	# As we have to intuit the name, make sure there is only one debug
436	# file match, otherwise there must be some clutter in the output
437	# directory that is going to mess up our analysis.
438	foreach my $Match (<\Q${DbgSeed}\E.*>) {
439		if ($Match =~ /^\Q$DbgSeed\E\.\d+$/) {
440			push(@DbgGlob, $Match);
441		}
442	}
443	if (@DbgGlob == 0) {
444		# If there is no debug file, bail.  This can occur if the file
445		# being processed is secure.
446		if ($CmdLine) {
447			printf STDERR gettext("%s: %s: unable to capture " .
448			    "bindings output - possible secure application?\n"),
449			    $Prog, $File;
450		}
451		return 0;
452	} elsif (@DbgGlob > 1) {
453		# Too many debug files found.
454		if ($CmdLine) {
455			printf STDERR gettext("%s: %s: multiple bindings " .
456			    "output files exist: %s: clean up temporary " .
457			    "directory\n"), $Prog, $File, $DbgSeed;
458		}
459		return 0;
460	} else {
461		$DbgFile = $DbgGlob[0];
462	}
463
464	# Ok, we're ready to process the bindings information.  Print a header
465	# if necessary, and if there were any ldd(1) errors push some of them
466	# out before any bindings information.  Limit the output, as it can
467	# sometimes be excessive.  If there are errors, the bindings information
468	# is likely to be incomplete.
469	if ($Mult) {
470		print STDOUT "$File:\n";
471	}
472	if ($NoFound) {
473		my ($Cnt) = 4;
474
475		for my $Line (@Ldd) {
476			if ($Line =~ /not found/) {
477				print STDOUT "$Line\n";
478				$Cnt--;
479			}
480			if ($Cnt == 0) {
481				print STDOUT gettext("\tcontinued ...\n");
482				last;
483			}
484		}
485	}
486
487	# If the user wants the original debugging file left behind, rename it
488	# so that it doesn't get re-read by another instance of lari processing
489	# this file.
490	if ($opt{s}) {
491		rename($DbgFile, $DbgSeed);
492		$DbgFile = $DbgSeed;
493		printf STDOUT gettext("%s: %s: bindings information " .
494		    "saved as: %s\n"), $Prog, $File, $DbgFile;
495	}
496
497	ProcBindings($File, $Mult, $DbgFile);
498
499	# Now that we've finished with the debugging file, nuke it if necessary.
500	if (!$opt{s}) {
501		unlink($DbgFile);
502	}
503	$DbgSeed = "";
504	return 1;
505}
506
507sub ProcBindings {
508	my ($File, $Mult, $DbgFile) = @_;
509	my (%Filtees, $FileHandle);
510
511	# Reinitialize our arrays when we're dealing with multiple files.
512	if ($Mult) {
513		%Symbols = ();
514		%Objects = ();
515		%Versioned = ();
516	}
517
518	# As debugging output can be significant, read a line at a time.
519	open($FileHandle, "<$DbgFile");
520	while (defined(my $Line = <$FileHandle>)) {
521		chomp($Line);
522
523		# If we find a relationship between a filter and filtee, save
524		# it, we'll come back to this once we've gathered everybodies
525		# symbols.
526		if ($Line =~ /;  filtered by /) {
527			my ($Filtee) = $Line;
528			my ($Filter) = $Line;
529
530			# Separate the filter and filtee names, ignore the
531			# runtime linker.
532			$Filtee =~ s/^.*: file=(.*);  filtered by .*/$1/;
533			if ($Filtee =~ $Rtld) {
534				next;
535			}
536			$Filter =~ s/^.*;  filtered by //;
537			$Filtees{$Filtee}{$Filter} = 1;
538			next;
539		}
540
541		# If we find a configuration alternative, determine whether it
542		# is for one of our filtees, and if so record it.
543		if ($Line =~ / configuration alternate found:/) {
544			my ($Orig) = $Line;
545			my ($Altr) = $Line;
546
547			# Separate the original and alternative names.
548			$Orig =~ s/^.*: file=(.*)  .*$/$1/;
549			$Altr =~ s/^.* configuration alternate found: (.*)$/$1/;
550
551			for my $Filtee (keys(%Filtees)) {
552				if ($Filtee ne $Orig) {
553					next;
554				}
555				for my $Filter (keys(%{$Filtees{$Filtee}})) {
556					$Filtees{$Altr}{$Filter} = 1;
557				}
558			}
559			next;
560		}
561
562		# Collect the symbols from any file analyzed.
563		if ($Line =~ /^.*: file=(.*);  analyzing .*/) {
564			GetAllSymbols($1);
565			next;
566		}
567
568		# Process any symbolic relocations that bind to a file.
569		if ($Line =~ /: binding file=.* to file=/) {
570			my ($RefFile, $DstFile, $SymName);
571			my (@Syms, $Found, @Fields);
572			my ($BndInfo) = 0;
573			my ($Offset) = 1;
574			my ($Dlsym) = 0;
575			my ($Detail) = 0;
576
577			# For greatest flexibility, split the line into fields
578			# and walk each field until we find what we need.
579			@Fields = split(' ', $Line);
580
581			# The referencing file, "... binding file=".*".
582			while ($Fields[$Offset]) {
583				if ($Fields[$Offset] =~ /^file=(.*)/) {
584					$RefFile = $1;
585					$Offset++;
586					last;
587				}
588				$Offset++;
589			}
590			# The referencing offset, typically this is the address
591			# of the reference, "(0x1234...)", but in the case of a
592			# user lookup it's the string "(dlsym)".  If we don't
593			# find this offset information we've been given a debug
594			# file that didn't user the "datail" token, in which case
595			# we're not getting all the information we need.
596			if ($Fields[$Offset] =~ /^\((.*)\)/) {
597				if ($1 eq 'dlsym') {
598					$Dlsym = 1;
599				}
600				$Detail = 1;
601				$Offset++;
602			}
603			# The destination file, "... to file=".*".
604			while ($Fields[$Offset]) {
605				if ($Fields[$Offset] =~ /^file=(.*)/) {
606					$DstFile = $1;
607					$Offset++;
608					last;
609				}
610				$Offset++;
611			}
612			# The symbol being bound, "... symbol `.*' ...".
613			while ($Fields[$Offset]) {
614				if ($Fields[$Offset] =~ /^\`(.*)\'$/) {
615					$SymName = $1;
616					$Offset++;
617					last;
618				}
619				$Offset++;
620			}
621			# Possible trailing binding info, "... (direct,.*)$".
622			while ($Fields[$Offset]) {
623				if ($Fields[$Offset] =~ /^\((.*)\)$/) {
624					$BndInfo = $1;
625					$Offset++;
626					last;
627				}
628				$Offset++;
629			}
630
631			if ($Detail == 0) {
632				printf STDERR gettext("%s: %s: debug file " .
633				    "does not contain `detail' information\n"),
634				    $Prog, $DbgFile;
635				return;
636			}
637
638			# Collect the symbols from each object.
639			GetAllSymbols($RefFile);
640			GetAllSymbols($DstFile);
641
642			# Identify that this definition has been bound to.
643			$Symbols{$SymName}{$DstFile}[$ObjRef]++;
644			if ($RefFile eq $DstFile) {
645				# If the reference binds to a definition within
646				# the same file this symbol may be a candidate
647				# for reducing to local.
648				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Self;
649				$Objects{$DstFile}{$SymName} |= $Self;
650			} else {
651				# This symbol is required to satisfy an external
652				# reference.
653				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Extn;
654				$Objects{$DstFile}{$SymName} |= $Extn;
655			}
656
657			# Assign any other state indicated by the binding info
658			# associated with the diagnostic output.
659			if (!$BndInfo) {
660				next;
661			}
662
663			if ($BndInfo =~ /direct/) {
664				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Dirc;
665				$Objects{$DstFile}{$SymName} |= $Dirc;
666			}
667			if ($BndInfo =~ /copy-ref/) {
668				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Cpyr;
669				$Objects{$DstFile}{$SymName} |= $Cpyr;
670			}
671			if ($BndInfo =~ /filtee/) {
672				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Filt;
673				$Objects{$DstFile}{$SymName} |= $Filt;
674			}
675			if ($BndInfo =~ /interpose/) {
676				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Intp;
677				$Objects{$DstFile}{$SymName} |= $Intp;
678			}
679			if ($BndInfo =~ /plt-addr/) {
680				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $Plta;
681				$Objects{$DstFile}{$SymName} |= $Plta;
682			}
683			if ($Dlsym) {
684				$Symbols{$SymName}{$DstFile}[$ObjFlag] |= $User;
685				$Objects{$DstFile}{$SymName} |= $User;
686			}
687		}
688	}
689	close($FileHandle);
690
691	# Now that we've processed all objects, complete any auxiliary filtee
692	# tagging.  For each filtee, determine which of the symbols it exports
693	# are also defined in its filters.  If a filtee is bound to, the
694	# runtime linkers diagnostics will indicate a filtee binding.  However,
695	# some of the filtee symbols may not be bound to, so here we mark them
696	# all so as to remove them from any interesting output.
697	for my $Filtee (keys(%Filtees)) {
698
699		# Standard filters aren't captured at all, as nothing can bind
700		# to them.
701		if (!exists($Objects{$Filtee})) {
702			next;
703		}
704
705		# Determine what symbols this filtee offers.
706		foreach my $SymName (keys(%{$Objects{$Filtee}})) {
707
708			# Ignore the usual reserved stuff.
709			if (!$opt{a} && (($SymName =~ $MultSyms) ||
710			    ($SymName =~ $CrtSyms))) {
711				next;
712			}
713
714			# Determine whether this symbol exists in our filter.
715			for my $Filter (keys(%{$Filtees{$Filtee}})) {
716				if (!$Symbols{$SymName}{$Filter}) {
717					next;
718				}
719				if (!($Symbols{$SymName}{$Filter}[$ObjFlag] &
720				    $Msft)) {
721					next;
722				}
723				$Symbols{$SymName}{$Filtee}[$ObjFlag] |= $Filt;
724			}
725		}
726	}
727
728	# Process objects and their symbols as required.
729	if ($opt{m}) {
730		# If we're creating a mapfile, traverse each object we've
731		# collected.
732		foreach my $Obj (keys(%Objects)) {
733			my ($File, $Path);
734
735			# Skip any objects that should be ignored.
736			if ($Obj =~ $Rtld) {
737				next;
738			}
739
740			# Skip any versioned objects if required.
741			if ($opt{v} && $Versioned{$Obj}) {
742				next;
743			}
744
745			# Open the mapfile if required.
746			$File = basename($Obj);
747			$Path = "$DestDir/mapfile-$File";
748			if (!open(MAPOUT, "> $Path")) {
749				printf STDERR gettext("%s: %s: open failed:" .
750				    "%s\n"), $Prog, $Path, $!;
751				exit 1;
752			}
753
754			# Establish the mapfile preamble.
755			print MAPOUT "#\n# Interface Definition mapfile for:\n";
756			print MAPOUT "#\tDynamic Object: $Obj\n";
757			print MAPOUT "#\tProcess:        $File\n#\n\n";
758
759			# Process each global symbol.
760			print MAPOUT "$File {\n\tglobal:\n";
761
762			foreach my $SymName (sort(keys(%{$Objects{$Obj}}))) {
763				my ($Flag) = $Objects{$Obj}{$SymName};
764
765				# For the first pass we're only interested in
766				# symbols that have been bound to from an
767				# external object, or must be global to enable
768				# a binding to an interposing definition.
769				# Skip bindings to ourself as these are
770				# candidates for demoting to local.
771				if (!($Flag & ($Extn | $Intp))) {
772					next;
773				}
774				if (($Flag & ($Extn | $Self)) == $Self) {
775					next;
776				}
777
778				# Add the demangled name as a comment if
779				# required.
780				if ($opt{C}) {
781					my ($DemName) = Demangle($SymName);
782
783					if ($DemName ne "") {
784						print MAPOUT "\t\t#$DemName\n";
785					}
786				}
787				print MAPOUT "\t\t$SymName;\n";
788			}
789
790			# Process each local demotion.
791			print MAPOUT "\tlocal:\n";
792
793			if ($opt{o}) {
794				foreach my $SymName
795				    (sort(keys(%{$Objects{$Obj}}))) {
796					my ($Flag) = $Objects{$Obj}{$SymName};
797
798					# For this pass we're only interested
799					# in symbol definitions that haven't
800					# been bound to, or have only been
801					# bound to from the same object.
802					if ($Flag & $Extn) {
803						next;
804					}
805
806					# Add the demangled name as a comment if
807					# required.
808					if ($opt{C}) {
809						my ($DemName) =
810						    Demangle($SymName);
811
812						if ($DemName ne "") {
813							print MAPOUT
814							    "\t\t#$DemName\n";
815						}
816					}
817					print MAPOUT "\t\t$SymName;\n";
818				}
819			}
820
821			# Capture everything else as local.
822			print MAPOUT "\t\t\*;\n};\n";
823			close MAPOUT;
824		}
825
826	} else {
827		# If we're gathering information regarding the symbols used by
828		# the process, automatically sort any standard output using the
829		# symbol name.
830		if (!open(SORT, "| sort +1")) {
831			printf STDERR gettext("%s: fork failed: %s\n"),
832			    $Prog, $!;
833			exit 1;
834		}
835
836		foreach my $SymName (keys(%Symbols)) {
837			my ($Cnt);
838
839			# If we're looking for interesting symbols, inspect
840			# each definition of each symbol.  If one is found to
841			# be interesting, the whole family are printed.
842			if (($Cnt = Interesting($SymName)) == 0) {
843				next;
844			}
845
846			# We've found something interesting, or all symbols
847			# should be output.  List all objects that define this
848			# symbol.
849			foreach my $Obj (keys(%{$Symbols{$SymName}})) {
850				my ($DemName, $Type);
851				my ($Flag) = $Symbols{$SymName}{$Obj}[$ObjFlag];
852				my ($Str) = "$Cnt:";
853
854				# Do we just want overhead symbols.  Consider
855				# copy-relocations, and plt address binding,
856				# as overhead too.
857				if ($opt{o} && (($Flag &
858				    ($Extn | $Cpyr | $Plta)) == $Extn)) {
859					next;
860				}
861
862				# Do we just want all symbols that have been
863				# bound to.
864				if (($opt{a} || $opt{o}) && $opt{b} &&
865				    (($Flag & ($Extn | $Self | $Prot)) == 0)) {
866					next;
867				}
868
869				# If we haven't been asked for all symbols, only
870				# print those reserved symbols that have been
871				# bound to, as the number of reserved symbols
872				# can be quite excessive.
873				if (!$opt{a} && ((($SymName =~ $MultSyms) &&
874				    (($Flag & ($Extn | $Self)) == 0)) ||
875				    (($SymName =~ $CrtSyms) && (($Flag &
876				    ($Extn | $Self | $Prot)) == 0)))) {
877					next;
878				}
879
880				# Skip any versioned objects if required.
881				if ($opt{v} && $Versioned{$Obj}) {
882					next;
883				}
884
885				# Display this symbol.
886				if ($Symbols{$SymName}{$Obj}[$ObjRef]) {
887					$Str = $Str .
888					    $Symbols{$SymName}{$Obj}[$ObjRef];
889				} else {
890					$Str = $Str . '0';
891				}
892
893				# Has the symbol been bound to externally
894				if ($Flag & $Extn) {
895					$Str = $Str . 'E';
896				}
897				# Has the symbol been bound to from the same
898				# object.
899				if ($Flag & $Self) {
900					$Str = $Str . 'S';
901				}
902				# Has the symbol been bound to directly.
903				if ($Flag & $Dirc) {
904					$Str = $Str . 'D';
905				}
906				# Does this symbol originate for an explicit
907				# interposer.
908				if ($Flag & $Intp) {
909					$Str = $Str . 'I';
910				}
911				# Is this symbol the reference data of a copy
912				# relocation.
913				if ($Flag & $Cpyr) {
914					$Str = $Str . 'C';
915				}
916				# Is this symbol part of filtee.
917				if ($Flag & $Filt) {
918					$Str = $Str . 'F';
919				}
920				# Is this symbol protected (in which case there
921				# may be a symbolic binding within the same
922				# object to this symbol).
923				if ($Flag & $Prot) {
924					$Str = $Str . 'P';
925				}
926				# Is this symbol an executables .plt address.
927				if ($Flag & $Plta) {
928					$Str = $Str . 'A';
929				}
930				# Does this binding originate from a user
931				# (dlsym) request.
932				if ($Flag & $User) {
933					$Str = $Str . 'U';
934				}
935				# Does this definition redirect the binding.
936				if ($Flag & $Msft) {
937					$Str = $Str . 'R';
938				}
939				# Does this definition explicity define no
940				# direct binding.
941				if ($Flag & $Nodi) {
942					$Str = $Str . 'N';
943				}
944
945				# Determine whether this is a function or a data
946				# object.  For the latter, display the symbol
947				# size.  Otherwise, the symbol is a reserved
948				# label, and is left untyped.
949				if ($Flag & $Func) {
950					$Type = '()';
951				} elsif ($Flag & $Objt) {
952					$Type = '[' .
953					    $Symbols{$SymName}{$Obj}[$ObjSize] .
954					']';
955				} else {
956					$Type = "";
957				}
958
959				# Demangle the symbol name if desired.
960				$DemName = Demangle($SymName);
961
962				if ($Mult) {
963					print SORT "  [$Str]: " .
964					    "$SymName$Type$DemName: $Obj\n";
965				} else {
966					print SORT "[$Str]: " .
967					    "$SymName$Type$DemName: $Obj\n";
968				}
969			}
970		}
971		close SORT;
972	}
973}
974
975# Heuristics to determine whether a symbol binding is interesting.  In most
976# applications there can be a large amount of symbol binding information to
977# wade through.  The most typical binding, to a single definition, probably
978# isn't interesting or the cause of unexpected behavior.  Here, we try and
979# determine those bindings that may can cause unexpected behavior.
980#
981# Note, this routine is actually called for all symbols so that their count
982# can be calculated in one place.
983sub Interesting
984{
985	my ($SymName) = @_;
986	my ($ObjCnt, $GFlags, $BndCnt, $FltCnt, $NodiCnt, $RdirCnt, $ExRef);
987
988	# Scan all definitions of this symbol, thus determining the definition
989	# count, the number of filters, redirections, executable references
990	# (copy-relocations, or plt addresses), no-direct bindings, and the
991	# number of definitions that have been bound to.
992	$ObjCnt = $GFlags = $BndCnt = $FltCnt =
993		$NodiCnt = $RdirCnt = $ExRef = 0;
994	foreach my $Obj (keys(%{$Symbols{$SymName}})) {
995		my ($Flag) = $Symbols{$SymName}{$Obj}[$ObjFlag];
996
997		# Ignore standard filters when determining the symbol count, as
998		# a standard filter can never be bound to.
999		if (($Flag & ($Osft | $Ssft)) == 0) {
1000			$ObjCnt++;
1001		}
1002
1003		$GFlags |= $Flag;
1004		if ($Flag & $Filt) {
1005			$FltCnt++;
1006		}
1007		if ($Flag & $Nodi) {
1008			$NodiCnt++;
1009		}
1010		if ($Flag & ($Cpyr | $Plta)) {
1011			$ExRef++;
1012		}
1013		if ($Flag & $Msft) {
1014			$RdirCnt++;
1015		}
1016
1017		# Ignore bindings to undefined .plts, and copy-relocation
1018		# references.  These are implementation details, rather than
1019		# a truly interesting multiple-binding.  If a symbol is tagged
1020		# as protected, count it as having bound to itself, even though
1021		# we can't tell if it's really been used.
1022		if (($Flag & ($Self | $Extn | $Prot)) &&
1023		    (($Flag & ($Plta | $Cpyr)) == 0)) {
1024			$BndCnt++;
1025		}
1026	}
1027
1028	# If we want all symbols, return the count.
1029	if (!$opt{i}) {
1030		return $ObjCnt;
1031	}
1032
1033	# Single instance symbol definitions aren't very interesting.
1034	if ($ObjCnt == 1) {
1035		return 0;
1036	}
1037
1038	# Traverse each symbol definition looking for the following:
1039	#
1040	#   .	Multiple symbols are bound to externally.
1041	#   .	A symbol is bound to externally, and possibly symbolically.
1042	#
1043	# Two symbol bindings are acceptable in some cases, and thus aren't
1044	# interesting:
1045	#
1046	#   .	Copy relocations.  Here, the executable binds to a shared object
1047	#	to access the data definition, which is then copied to the
1048	#	executable.  All other references should then bind to the copied
1049	#	data.
1050	#   .	Non-plt relocations to functions that are referenced by the
1051	#	executable will bind to the .plt in the executable.  This
1052	#	provides for address comparison calculations (although plainly
1053	#	an overhead).
1054	#
1055	# Multiple symbol bindings are acceptable in some cases, and thus aren't
1056	# interesting:
1057	#
1058	#   .	Filtees.  Multiple filtees may exist for one filter.
1059	#
1060	if ((($ObjCnt == 2) && ($GFlags & ($Cpyr | $Plta))) ||
1061	    ($ObjCnt == ($FltCnt + 1))) {
1062		return 0;
1063	}
1064
1065	# If we're only interested in multiply-bound symbols.
1066	if (($opt{b} || ($SymName =~ $MultSyms) || ($SymName =~ $CrtSyms)) &&
1067	    ($BndCnt < 2)) {
1068		return (0);
1069	}
1070
1071	# Multiple instances of a definition, where all but one are filter
1072	# references and/or copy relocations, are also uninteresting.
1073	# Effectively, only one symbol is providing the final binding.
1074	if (($FltCnt && $RdirCnt) &&
1075	    (($FltCnt + $RdirCnt + $ExRef) == $ObjCnt)) {
1076		return (0);
1077	}
1078
1079	# Multiple instances of explicitly defined no-direct binding symbols
1080	# are known to occur, and their no-binding definition indicates they
1081	# are expected and accounted for.  Thus, these aren't interesting.
1082	if (($ExRef + $NodiCnt) == $ObjCnt) {
1083		return (0);
1084	}
1085
1086	# We have an interesting symbol, returns its count.
1087	return $ObjCnt;
1088}
1089
1090# Obtain the global symbol definitions of an object and determine whether the
1091# object has been versioned.
1092sub GetAllSymbols {
1093	my ($Obj) = @_;
1094	my (@Elfd, @Elfs, @Elfr, $Type, $Exec, $FileHandle);
1095	my (%AddrToName, %NameToAddr);
1096	my ($Vers) = 0;
1097	my ($Symb) = 0;
1098	my ($Copy) = 0;
1099	my ($Interpose) = 0;
1100	my ($Fltr) = 0;
1101
1102	# Determine whether we've already retrieved this object's symbols.
1103	# Also, ignore the runtime linker, it's on a separate link-map, and
1104	# except for the filtee symbols that might be bound via libdl, is
1105	# uninteresting.
1106	if (($Objects{$Obj}) || ($Obj =~ $Rtld)) {
1107		return;
1108	}
1109
1110	# Get the dynamic information.
1111	@Elfd = split(/\n/, `LC_ALL=C elfdump -d '$Obj' 2> /dev/null`);
1112
1113	# If there's no information, it's possible we've been given a debug
1114	# output file and are processing it from a location from which the
1115	# dependencies specified in the debug file aren't accessible.
1116	if (!@Elfd) {
1117		printf STDERR gettext("%s: %s: unable to process ELF file\n"),
1118		    $Prog, $Obj;
1119
1120		# Add the file to our list, so that we don't create the same
1121		# message again.  Processing should continue so that we can
1122		# flush out as many error messages as possible.
1123		$Objects{$Obj}{"DoesNotExist"} = 0;
1124		return;
1125	}
1126
1127	# If we're processing a filter there's no need to save any symbols, as
1128	# no bindings will occur to this object.
1129	#
1130	# Determine whether we've got a symbolicly bound object.  With newer
1131	# linkers all symbols will be marked as protected ("P"), but with older
1132	# linkers this state could only be intuited from the symbolic dynamic
1133	# tag.
1134	foreach my $Line (@Elfd) {
1135		my (@Fields);
1136		@Fields = split(' ', $Line);
1137
1138		# Determine if the FILTER tag is set.
1139		if ($#Fields == 3) {
1140			if ($Fields[1] eq "FILTER") {
1141				$Fltr |= $Osft;
1142				next;
1143			}
1144			if ($Fields[1] eq "AUXILIARY") {
1145				$Fltr |= $Oaft;
1146				next;
1147			}
1148			next;
1149		}
1150
1151		# We're only interested in the FLAGS entry.
1152		if (($#Fields < 4) || ($Fields[1] !~ "^FLAGS")) {
1153			next;
1154		}
1155		if (($Fields[1] eq "FLAGS") && ($Line =~ " SYMBOLIC ")) {
1156			$Symb = 1;
1157			next;
1158		}
1159		if (($Fields[1] eq "FLAGS_1") && ($Line =~ " INTERPOSE ")) {
1160			$Interpose = 1;
1161		}
1162	}
1163
1164	# If this file is a dynamic executable, determine if this object has
1165	# any copy relocations so that any associated bindings can be labeled
1166	# more meaningfully.
1167	$Type = `LC_ALL=C file '$Obj'`;
1168	if ($Type =~ /executable/) {
1169		$Exec = 1;
1170		# Obtain any copy relocations.
1171		@Elfr = split(/\n/, `LC_ALL=C elfdump -r '$Obj' 2>&1`);
1172
1173		foreach my $Rel (@Elfr) {
1174			my ($SymName, @Fields);
1175
1176			if ($Rel !~ /^\s+R_\S+_COPY /) {
1177				next;
1178			}
1179			@Fields = split(' ', $Rel);
1180			if ($Fields[0] eq 'R_SPARC_COPY') {
1181				$SymName = $Fields[4];
1182			} elsif ($Fields[0] eq 'R_386_COPY') {
1183				$SymName = $Fields[3];
1184			} else {
1185				next;
1186			}
1187
1188			$Symbols{$SymName}{$Obj}[$ObjFlag] |= $Cpyr;
1189			$Objects{$Obj}{$SymName} |= $Cpyr;
1190			$Copy = 1;
1191		}
1192	} else {
1193		$Exec = 0;
1194	}
1195
1196	# Obtain the dynamic symbol table for this object.  Symbol tables can
1197	# be quite large, so open the elfump command through a pipe.
1198	open($FileHandle, "LC_ALL=C elfdump -sN.dynsym '$Obj' 2> /dev/null |");
1199
1200	# Now process all symbols.
1201	while (defined(my $Line = <$FileHandle>)) {
1202		chomp($Line);
1203
1204		my (@Fields) = split(' ', $Line);
1205		my ($Flags);
1206
1207		# We're only interested in defined non-reserved symbol entries.
1208		# Note, ABS and NOTY symbols of non-zero size have been known to
1209		# occur, so capture them.
1210		if (($#Fields < 8) || ($Fields[4] !~ $GlobWeak) ||
1211		    ($Fields[7] =~ $UndefSym) || (!$opt{a} &&
1212		    ($Fields[7] =~ $IgnSyms) && (oct($Fields[2]) eq 0))) {
1213			next;
1214		}
1215
1216		# If we're found copy relocations, save the address and names
1217		# of any OBJT definitions, together with the copy symbol.
1218		if ($Copy && ($Fields[3] eq 'OBJT')) {
1219			push(@{$AddrToName{$Fields[1]}}, $Fields[8]);
1220		}
1221		if (($Symbols{$Fields[8]}{$Obj}) &&
1222		    ($Symbols{$Fields[8]}{$Obj}[$ObjFlag] & $Cpyr)) {
1223			$NameToAddr{$Fields[8]} = $Fields[1];
1224		}
1225
1226		# If the symbol visibility is protected, this is an internal
1227		# symbolic binding (NOTE, an INTERNAL visibility for a global
1228		# symbol is invalid, but for a while ld(1) was setting this
1229		# attribute mistakenly for protected).
1230		# If this is a dynamic executable, mark its symbols as protected
1231		# (they can't be interposed on any more than symbols defined
1232		# protected within shared objects).
1233		$Flags = $Glob | $Fltr;
1234		if (($Fields[5] =~ /^[IP]$/) || $Symb || $Exec) {
1235			$Flags |= $Prot;
1236		}
1237
1238		# If this object is marked as an interposer, tag each symbol.
1239		if ($Interpose) {
1240			$Flags |= $Intp;
1241		}
1242
1243		# Identify the symbol as a function or data type, and for the
1244		# latter, capture the symbol size.  Ignore the standard
1245		# symbolic labels, as we don't want to type them.
1246		if ($Fields[8] !~ $MultSyms) {
1247			if ($Fields[3] =~ /^FUNC$/) {
1248				$Flags |= $Func;
1249			} elsif ($Fields[3] =~ /^OBJT$/) {
1250				my ($Size) = $Fields[2];
1251
1252				if (oct($Size) eq 0) {
1253					$Size = "0";
1254				} else {
1255					$Size =~ s/0x0*/0x/;
1256				}
1257				$Flags |= $Objt;
1258				$Symbols{$Fields[8]}{$Obj}[$ObjSize] = $Size;
1259			}
1260		}
1261
1262		$Symbols{$Fields[8]}{$Obj}[$ObjFlag] |= $Flags;
1263		$Objects{$Obj}{$Fields[8]} |= $Flags;
1264
1265		# If the version field is non-null this object has already been
1266		# versioned.
1267		if (($Vers == 0) && ($Fields[6] ne '0')) {
1268			$Versioned{$Obj} = 1;
1269			$Vers = 1;
1270		}
1271	}
1272	close($FileHandle);
1273
1274	# Obtain any symbol information table for this object.  Symbol tables can
1275	# be quite large, so open the elfump command through a pipe.
1276	open($FileHandle, "LC_ALL=C elfdump -y '$Obj' 2> /dev/null |");
1277
1278	# Now process all symbols.
1279	while (defined(my $Line = <$FileHandle>)) {
1280		chomp($Line);
1281
1282		my (@Fields) = split(' ', $Line);
1283		my ($Flags) = 0;
1284
1285		# Binding attributes are in the second column.
1286		if ($#Fields < 1) {
1287			next;
1288		}
1289		if ($Fields[1] =~ /N/) {
1290			$Flags |= $Nodi
1291		}
1292		if ($Fields[1] =~ /F/) {
1293			$Flags |= $Ssft;
1294		}
1295		if ($Fields[1] =~ /A/) {
1296			$Flags |= $Saft;
1297		}
1298
1299		# Determine the symbol name based upon the number of fields.
1300		if ($Flags && $Symbols{$Fields[$#Fields]}{$Obj}) {
1301			$Symbols{$Fields[$#Fields]}{$Obj}[$ObjFlag] |= $Flags;
1302			$Objects{$Obj}{$Fields[$#Fields]} |= $Flags;
1303		}
1304	}
1305	close($FileHandle);
1306
1307	# If this symbol has already been marked as a copy-relocation reference,
1308	# see if this symbol has any aliases, which should also be marked.
1309	if ($Copy) {
1310		foreach my $SymName (keys(%NameToAddr)) {
1311			my ($Addr) = $NameToAddr{$SymName};
1312
1313			# Determine all symbols that have the same address.
1314			foreach my $AliasName (@{$AddrToName{$Addr}}) {
1315				if ($SymName eq $AliasName) {
1316					next;
1317				}
1318				$Symbols{$AliasName}{$Obj}[$ObjFlag] |= $Cpyr;
1319				$Objects{$Obj}{$AliasName} |= $Cpyr;
1320			}
1321		}
1322	}
1323}
1324
1325# Demangle a symbol name if required.
1326sub Demangle
1327{
1328	my ($SymName) = @_;
1329	my ($DemName);
1330
1331	if ($opt{C}) {
1332		my (@Dem);
1333
1334		# Determine if we've already demangled this name.
1335		if (exists($DemSyms{$SymName})) {
1336			return $DemSyms{$SymName};
1337		}
1338
1339		@Dem = split(/\n/, `dem '$SymName'`);
1340		foreach my $Line (@Dem) {
1341			my (@Fields) = split(' ', $Line);
1342
1343			if (($#Fields < 2) || ($Fields[1] ne '==') ||
1344			    ($Fields[0] eq $Fields[2])) {
1345				next;
1346			}
1347			$DemName = $Line;
1348			$DemName =~ s/.*== (.*)$/ \[$1]/;
1349			$DemSyms{$SymName} = $DemName;
1350			return($DemName);
1351		}
1352	}
1353	$DemSyms{$SymName} = "";
1354	return("");
1355}
1356