xref: /titanic_44/usr/src/cmd/sgs/libld/common/libs.c (revision 8eea8e29cc4374d1ee24c25a07f45af132db3499)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  *	Copyright (c) 1988 AT&T
24  *	  All Rights Reserved
25  *
26  *
27  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * Library processing
34  */
35 #include	<stdio.h>
36 #include	<string.h>
37 #include	"debug.h"
38 #include	"msg.h"
39 #include	"_libld.h"
40 
41 /*
42  * Because a tentative symbol may cause the extraction of an archive member,
43  * make sure that the potential member is really required.  If the archive
44  * member has a strong defined symbol it will be extracted.  If it simply
45  * contains another tentative definition, or a defined function symbol, then it
46  * will not be used.
47  */
48 int
49 process_member(Ar_mem *amp, const char *name, unsigned char obind,
50     Ofl_desc *ofl)
51 {
52 	Sym *		syms;
53 	Xword		symn, cnt;
54 	char 		*strs;
55 
56 	/*
57 	 * Find the first symbol table in the archive member, obtain its
58 	 * data buffer and determine the number of global symbols (Note,
59 	 * there must be a symbol table present otherwise the archive would
60 	 * never have been able to generate its own symbol entry for this
61 	 * member).
62 	 */
63 	if (amp->am_syms == 0) {
64 		Elf_Scn *	scn = NULL;
65 		Shdr *		shdr;
66 		Elf_Data *	data;
67 
68 		while (scn = elf_nextscn(amp->am_elf, scn)) {
69 			if ((shdr = elf_getshdr(scn)) == NULL) {
70 				eprintf(ERR_ELF, MSG_INTL(MSG_ELF_GETSHDR),
71 				    amp->am_path);
72 				ofl->ofl_flags |= FLG_OF_FATAL;
73 				return (0);
74 			}
75 			if ((shdr->sh_type == SHT_SYMTAB) ||
76 			    (shdr->sh_type == SHT_DYNSYM))
77 				break;
78 		}
79 		if ((data = elf_getdata(scn, NULL)) == NULL) {
80 			eprintf(ERR_ELF, MSG_INTL(MSG_ELF_GETDATA),
81 			    amp->am_path);
82 			ofl->ofl_flags |= FLG_OF_FATAL;
83 			return (0);
84 		}
85 		syms = (Sym *)data->d_buf;
86 		syms += shdr->sh_info;
87 		symn = shdr->sh_size / shdr->sh_entsize;
88 		symn -= shdr->sh_info;
89 
90 		/*
91 		 * Get the data for the associated string table.
92 		 */
93 		if ((scn = elf_getscn(amp->am_elf, (size_t)shdr->sh_link)) ==
94 		    NULL) {
95 			eprintf(ERR_ELF, MSG_INTL(MSG_ELF_GETSCN),
96 			    amp->am_path);
97 			ofl->ofl_flags |= FLG_OF_FATAL;
98 			return (0);
99 		}
100 		if ((data = elf_getdata(scn, NULL)) == NULL) {
101 			eprintf(ERR_ELF, MSG_INTL(MSG_ELF_GETDATA),
102 			    amp->am_path);
103 			ofl->ofl_flags |= FLG_OF_FATAL;
104 			return (0);
105 		}
106 		strs = data->d_buf;
107 
108 		/*
109 		 * Initialize the archive member structure in case we have to
110 		 * come through here again.
111 		 */
112 		amp->am_syms = syms;
113 		amp->am_strs = strs;
114 		amp->am_symn = symn;
115 	} else {
116 		syms = amp->am_syms;
117 		strs = amp->am_strs;
118 		symn = amp->am_symn;
119 	}
120 
121 	/*
122 	 * Loop through the symbol table entries looking for a match for the
123 	 * original symbol.  The archive member will be used if	the new symbol
124 	 * is a definition of an object (not a function).  Note however that a
125 	 * weak definition within the archive will not override a strong
126 	 * tentative symbol (see sym_realtent() resolution and ABI symbol
127 	 * binding description - page 4-27).
128 	 */
129 	for (cnt = 0; cnt < symn; syms++, cnt++) {
130 		Word		shndx = syms->st_shndx;
131 		unsigned char	info;
132 
133 		if ((shndx == SHN_ABS) || (shndx == SHN_COMMON) ||
134 		    (shndx == SHN_UNDEF))
135 			continue;
136 
137 		info = syms->st_info;
138 		if ((ELF_ST_TYPE(info) == STT_FUNC) ||
139 		    ((ELF_ST_BIND(info) == STB_WEAK) && (obind != STB_WEAK)))
140 			continue;
141 
142 		if (strcmp(strs + syms->st_name, name) == NULL)
143 			return (1);
144 	}
145 	return (0);
146 }
147 
148 /*
149  * Create an archive descriptor.  By maintaining a list of archives any
150  * duplicate occurrences of the same archive specified by the user enable us to
151  * pick off where the last processing finished.
152  */
153 Ar_desc *
154 ar_setup(const char *name, Elf *elf, Ofl_desc *ofl)
155 {
156 	Ar_desc *	adp;
157 	size_t		number;
158 	Elf_Arsym *	start;
159 
160 	/*
161 	 * Get the archive symbol table. If this fails, we will
162 	 * ignore this file with a warning message.
163 	 */
164 	if ((start = elf_getarsym(elf, &number)) == 0) {
165 		if (elf_errno()) {
166 			eprintf(ERR_ELF, MSG_INTL(MSG_ELF_GETARSYM), name);
167 			ofl->ofl_flags |= FLG_OF_FATAL;
168 		} else
169 			eprintf(ERR_WARNING, MSG_INTL(MSG_ELF_ARSYM), name);
170 
171 		return (0);
172 	}
173 
174 	/*
175 	 * As this is a new archive reference establish a new descriptor.
176 	 */
177 	if ((adp = libld_malloc(sizeof (Ar_desc))) == 0)
178 		return ((Ar_desc *)S_ERROR);
179 	adp->ad_name = name;
180 	adp->ad_elf = elf;
181 	adp->ad_start = start;
182 	if ((adp->ad_aux = libld_calloc(sizeof (Ar_aux), number)) == 0)
183 		return ((Ar_desc *)S_ERROR);
184 
185 	/*
186 	 * Retain any command line options that are applicable to archive
187 	 * extraction in case we have to rescan this archive later.
188 	 */
189 	adp->ad_flags = ofl->ofl_flags1 & MSK_OF1_ARCHIVE;
190 
191 	ofl->ofl_arscnt++;
192 
193 	/*
194 	 * Add this new descriptor to the list of archives.
195 	 */
196 	if (list_appendc(&ofl->ofl_ars, adp) == 0)
197 		return ((Ar_desc *)S_ERROR);
198 	else
199 		return (adp);
200 }
201 
202 /*
203  * For each archive descriptor we maintain an `Ar_aux' table to parallel the
204  * archive symbol table (returned from elf_getarsym(3e)).  We use this table to
205  * hold the `Sym_desc' for each symbol (thus reducing the number of sym_find()'s
206  * we have to do), and to hold the `Ar_mem' pointer.  The `Ar_mem' element can
207  * have one of three values indicating the state of the archive member
208  * associated with the offset for this symbol table entry:
209  *
210  *  0		indicates that the member has not been processed.
211  *
212  *  FLG_ARMEM_PROC
213  *		indicates that the member has been processed.
214  *
215  *  addr	indicates that the member has been investigated to determine if
216  *		it contained a symbol definition we need, but was found not to
217  *		be a candidate for extraction.  In this case the members
218  *		structure is maintained for possible later use.
219  *
220  * Each time we process an archive member we use its offset value to scan this
221  * `Ar_aux' list.  If the member has been extracted, each entry with the same
222  * offset has its `Ar_mem' pointer set to FLG_AMMEM_PROC.  Thus if we cycle back
223  * through the archive symbol table we will ignore these symbols as they will
224  * have already been added to the output image.  If a member has been processed
225  * but found not to contain a symbol we need, each entry with the same offset
226  * has its `Ar_mem' pointer set to the member structures address.
227  */
228 void
229 ar_member(Ar_desc * adp, Elf_Arsym * arsym, Ar_aux * aup, Ar_mem * amp)
230 {
231 	Elf_Arsym *	_arsym = arsym;
232 	Ar_aux *	_aup = aup;
233 	size_t		_off = arsym->as_off;
234 
235 	if (_arsym != adp->ad_start) {
236 		do {
237 			_arsym--;
238 			_aup--;
239 			if (_arsym->as_off != _off)
240 				break;
241 			_aup->au_mem = amp;
242 		} while (_arsym != adp->ad_start);
243 	}
244 
245 	_arsym = arsym;
246 	_aup = aup;
247 
248 	do {
249 		if (_arsym->as_off != _off)
250 			break;
251 		_aup->au_mem = amp;
252 		_arsym++;
253 		_aup++;
254 	} while (_arsym->as_name);
255 }
256 
257 /*
258  * Read in the archive's symbol table; for each symbol in the table check
259  * whether that symbol satisfies an unresolved, or tentative reference in
260  * ld's internal symbol table; if so, the corresponding object from the
261  * archive is processed.  The archive symbol table is searched until we go
262  * through a complete pass without satisfying any unresolved symbols
263  */
264 uintptr_t
265 process_archive(const char *name, int fd, Ar_desc *adp, Ofl_desc *ofl)
266 {
267 	Elf_Arsym *	arsym;
268 	Elf_Arhdr *	arhdr;
269 	Elf *		arelf;
270 	Ar_aux *	aup;
271 	Sym_desc *	sdp;
272 	char 		*arname, *arpath;
273 	int		ndx, found = 0, again = 0;
274 	int		allexrt = ofl->ofl_flags1 & FLG_OF1_ALLEXRT;
275 	uintptr_t	err;
276 	Rej_desc	rej = { 0 };
277 
278 	/*
279 	 * If a fatal error condition has been set there's really no point in
280 	 * processing the archive further.  Having got to this point we have at
281 	 * least established that the archive exists (thus verifying that the
282 	 * command line options that got us to this archive are correct).  Very
283 	 * large archives can take a significant time to process, therefore
284 	 * continuing on from here may significantly delay the fatal error
285 	 * message the user is already set to receive.
286 	 */
287 	if (ofl->ofl_flags & FLG_OF_FATAL)
288 		return (1);
289 
290 	/*
291 	 * If this archive was processed with -z allextract, then all members
292 	 * have already been extracted.
293 	 */
294 	if (adp->ad_elf == (Elf *)NULL)
295 		return (1);
296 
297 	/*
298 	 * Loop through archive symbol table until we make a complete pass
299 	 * without satisfying an unresolved reference.  For each archive
300 	 * symbol, see if there is a symbol with the same name in ld's
301 	 * symbol table.  If so, and if that symbol is still unresolved or
302 	 * tentative, process the corresponding archive member.
303 	 */
304 	do {
305 		DBG_CALL(Dbg_file_archive(name, again));
306 		DBG_CALL(Dbg_syms_ar_title(name, again));
307 
308 		ndx = again = 0;
309 		for (arsym = adp->ad_start, aup = adp->ad_aux; arsym->as_name;
310 		    ++arsym, ++aup, ndx++) {
311 			Rej_desc	_rej = { 0 };
312 			Ar_mem *	amp;
313 			Sym *		sym;
314 
315 			/*
316 			 * If the auxiliary members value indicates that this
317 			 * member has been processed then this symbol will have
318 			 * been added to the output file image already or the
319 			 * object was rejected in which case we don't want to
320 			 * process it again.
321 			 */
322 			if (aup->au_mem == FLG_ARMEM_PROC)
323 				continue;
324 
325 			/*
326 			 * If the auxiliary symbol element is non-zero lookup
327 			 * the symbol from the internal symbol table.
328 			 * (But you skip this if allextract is specified.)
329 			 */
330 			if ((allexrt == 0) && ((sdp = aup->au_syms) == 0)) {
331 				if ((sdp = sym_find(arsym->as_name,
332 				    /* LINTED */
333 				    (Word)arsym->as_hash, 0, ofl)) == 0) {
334 					DBG_CALL(Dbg_syms_ar_entry(ndx, arsym));
335 					continue;
336 				}
337 				aup->au_syms = sdp;
338 			}
339 
340 			/*
341 			 * With '-z allextract', all members will be extracted.
342 			 *
343 			 * This archive member is a candidate for extraction if
344 			 * the internal symbol originates from an explicit file
345 			 * and is undefined or tentative.  By default weak
346 			 * references do not cause archive extraction, however
347 			 * the -zweakextract flag overrides this default.
348 			 * If this symbol has been bound to a versioned shared
349 			 * object make sure it is available for linking.
350 			 */
351 			if (allexrt == 0) {
352 				Boolean		vers = TRUE;
353 				Ifl_desc *	file = sdp->sd_file;
354 
355 				if ((sdp->sd_ref == REF_DYN_NEED) &&
356 				    (file->ifl_vercnt)) {
357 					Word		vndx;
358 					Ver_index *	vip;
359 
360 					vndx = sdp->sd_aux->sa_dverndx;
361 					vip = &file->ifl_verndx[vndx];
362 					if (!(vip->vi_flags & FLG_VER_AVAIL))
363 						vers = FALSE;
364 				}
365 
366 				sym = sdp->sd_sym;
367 				if ((!(file->ifl_flags & FLG_IF_NEEDED)) ||
368 				    ((sym->st_shndx != SHN_UNDEF) &&
369 				    (sym->st_shndx != SHN_COMMON) && vers) ||
370 				    ((ELF_ST_BIND(sym->st_info) == STB_WEAK) &&
371 				    (!(ofl->ofl_flags1 & FLG_OF1_WEAKEXT)))) {
372 					DBG_CALL(Dbg_syms_ar_entry(ndx, arsym));
373 					continue;
374 				}
375 			}
376 
377 			/*
378 			 * Determine if we have already extracted this member,
379 			 * and if so reuse the Ar_mem information.
380 			 */
381 			if ((amp = aup->au_mem) != 0) {
382 				arelf = amp->am_elf;
383 				arname = amp->am_name;
384 				arpath = amp->am_path;
385 			} else {
386 				size_t	len;
387 
388 				/*
389 				 * Set up a new elf descriptor for this member.
390 				 */
391 				if (elf_rand(adp->ad_elf, arsym->as_off) !=
392 				    arsym->as_off) {
393 					eprintf(ERR_ELF,
394 					    MSG_INTL(MSG_ELF_ARMEM), name,
395 					    EC_WORD(arsym->as_off), ndx,
396 					    demangle(arsym->as_name));
397 					ofl->ofl_flags |= FLG_OF_FATAL;
398 					return (0);
399 				}
400 				if ((arelf = elf_begin(fd, ELF_C_READ,
401 				    adp->ad_elf)) == NULL) {
402 					eprintf(ERR_ELF,
403 					    MSG_INTL(MSG_ELF_BEGIN), name);
404 					ofl->ofl_flags |= FLG_OF_FATAL;
405 					return (0);
406 				}
407 
408 				/*
409 				 * Construct the member filename.
410 				 */
411 				if ((arhdr = elf_getarhdr(arelf)) == NULL) {
412 					eprintf(ERR_ELF,
413 					    MSG_INTL(MSG_ELF_GETARHDR), name);
414 					ofl->ofl_flags |= FLG_OF_FATAL;
415 					return (0);
416 				}
417 				arname = arhdr->ar_name;
418 
419 				/*
420 				 * Construct the members full pathname, using
421 				 * the format "%s(%s)".
422 				 */
423 				len = strlen(name) + strlen(arname) + 3;
424 				if ((arpath = libld_malloc(len)) == 0)
425 					return (S_ERROR);
426 				(void) snprintf(arpath, len,
427 				    MSG_ORIG(MSG_FMT_ARMEM), name, arname);
428 			}
429 
430 			/*
431 			 * If the symbol for which this archive member is
432 			 * being processed is a tentative symbol, then this
433 			 * member must be verified to insure that it is
434 			 * going to provided a symbol definition that will
435 			 * override the tentative symbol.
436 			 */
437 			if ((allexrt == 0) && (sym->st_shndx == SHN_COMMON)) {
438 				/* LINTED */
439 				Byte bind = (Byte)ELF_ST_BIND(sym->st_info);
440 
441 				/*
442 				 * If we don't already have a member structure
443 				 * allocate one.
444 				 */
445 				if (!amp) {
446 					if ((amp = libld_calloc(sizeof (Ar_mem),
447 					    1)) == 0)
448 						return (S_ERROR);
449 					amp->am_elf = arelf;
450 					amp->am_name = arname;
451 					amp->am_path = arpath;
452 				}
453 				DBG_CALL(Dbg_syms_ar_checking(ndx, arsym,
454 				    arname));
455 				if ((err = process_member(amp, arsym->as_name,
456 				    bind, ofl)) == S_ERROR)
457 					return (S_ERROR);
458 
459 				/*
460 				 * If it turns out that we don't need this
461 				 * member simply initialize all other auxiliary
462 				 * entries that match this offset with this
463 				 * members address.  In this way we can resuse
464 				 * this information if we recurse back to this
465 				 * symbol.
466 				 */
467 				if (err == 0) {
468 					if (aup->au_mem == 0)
469 						ar_member(adp, arsym, aup, amp);
470 					continue;
471 				}
472 			}
473 
474 			/*
475 			 * Process the archive member.  Retain any error for
476 			 * return to the caller.
477 			 */
478 			DBG_CALL(Dbg_syms_ar_resolve(ndx, arsym, arname,
479 			    allexrt));
480 			if ((err = (uintptr_t)process_ifl(arpath, NULL, fd,
481 			    arelf, FLG_IF_EXTRACT | FLG_IF_NEEDED, ofl,
482 			    &_rej)) == S_ERROR)
483 				return (S_ERROR);
484 
485 			/*
486 			 * If this member is rejected maintain the first
487 			 * rejection error for possible later display.  Keep the
488 			 * member as extracted so that we don't try and process
489 			 * it again on a rescan.
490 			 */
491 			if (_rej.rej_type) {
492 				if (rej.rej_type == 0) {
493 					rej.rej_type = _rej.rej_type;
494 					rej.rej_info = _rej.rej_info;
495 					rej.rej_name = (const char *)arpath;
496 				}
497 				ar_member(adp, arsym, aup, FLG_ARMEM_PROC);
498 				continue;
499 			}
500 
501 			/*
502 			 * Indicate that the extracted member is in use.  This
503 			 * enables debugging diags, and indicates that a further
504 			 * rescan of all archives may be necessary.
505 			 */
506 			found = 1;
507 			ofl->ofl_flags1 |= FLG_OF1_EXTRACT;
508 			adp->ad_flags |= FLG_ARD_EXTRACT;
509 
510 			/*
511 			 * If not under '-z allextract' signal the need to
512 			 * rescan this archive.
513 			 */
514 			if (allexrt == 0)
515 				again = 1;
516 
517 			ar_member(adp, arsym, aup, FLG_ARMEM_PROC);
518 			DBG_CALL(Dbg_syms_nl());
519 		}
520 	} while (again);
521 
522 	/*
523 	 * If no objects have been found in the archive test for any rejections
524 	 * and if one had occurred issue a warning - its possible a user has
525 	 * pointed at an archive containing the wrong class of elf members.
526 	 */
527 	if (found == 0) {
528 		if (rej.rej_type)
529 			eprintf(ERR_WARNING, MSG_INTL(reject[rej.rej_type]),
530 			    rej.rej_name ? rej.rej_name :
531 			    MSG_INTL(MSG_STR_UNKNOWN), conv_reject_str(&rej));
532 	}
533 
534 	/*
535 	 * If this archive was extracted by -z allextract, the ar_aux table
536 	 * and elf descriptor can be freed.  Set ad_elf to NULL to mark the
537 	 * archive is completely processed.
538 	 */
539 	if (allexrt) {
540 		(void) elf_end(adp->ad_elf);
541 		adp->ad_elf = (Elf *)NULL;
542 	}
543 
544 	return (1);
545 }
546