xref: /illumos-gate/usr/src/cmd/sgs/libld/common/libs.c (revision cb6207858a9fcc2feaee22e626912fba281ac969)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  *	Copyright (c) 1988 AT&T
24  *	  All Rights Reserved
25  *
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 /*
32  * Library processing
33  */
34 #include	<stdio.h>
35 #include	<string.h>
36 #include	<debug.h>
37 #include	"msg.h"
38 #include	"_libld.h"
39 
40 /*
41  * Archive members are typically extracted to resolve an existing undefined
42  * reference.  However, other symbol definitions can cause archive members to
43  * be processed to determine if the archive member provides a more appropriate
44  * definition.  This routine processes the archive member to determine if the
45  * member is really required.
46  *
47  *  i.	Tentative symbols may cause the extraction of an archive member.
48  *	If the archive member has a strong defined symbol it will be used.
49  *	If the archive member simply contains another tentative definition,
50  *	or a defined function symbol, then it will not be used.
51  *
52  *  ii.	A symbol reference may define a hidden or protected visibility.  The
53  *	reference can only be bound to a definition within a relocatable object
54  *	for this restricted visibility to be satisfied.  If the archive member
55  * 	provides a definition of the same symbol type, this definition is
56  *	taken.  The visibility of the defined symbol is irrelevant, as the most
57  *	restrictive visibility of the reference and the definition will be
58  *	applied to the final symbol.
59  */
60 static int
61 process_member(Ar_mem *amp, const char *name, Sym_desc *sdp, Ofl_desc *ofl)
62 {
63 	Sym	*syms, *osym = sdp->sd_sym;
64 	Xword	symn, cnt;
65 	char 	*strs;
66 
67 	/*
68 	 * Find the first symbol table in the archive member, obtain its
69 	 * data buffer and determine the number of global symbols (Note,
70 	 * there must be a symbol table present otherwise the archive would
71 	 * never have been able to generate its own symbol entry for this
72 	 * member).
73 	 */
74 	if (amp->am_syms == 0) {
75 		Elf_Scn		*scn = NULL;
76 		Shdr		*shdr;
77 		Elf_Data	*data;
78 
79 		while (scn = elf_nextscn(amp->am_elf, scn)) {
80 			if ((shdr = elf_getshdr(scn)) == NULL) {
81 				eprintf(ofl->ofl_lml, ERR_ELF,
82 				    MSG_INTL(MSG_ELF_GETSHDR), amp->am_path);
83 				ofl->ofl_flags |= FLG_OF_FATAL;
84 				return (0);
85 			}
86 			if ((shdr->sh_type == SHT_SYMTAB) ||
87 			    (shdr->sh_type == SHT_DYNSYM))
88 				break;
89 		}
90 		if ((data = elf_getdata(scn, NULL)) == NULL) {
91 			eprintf(ofl->ofl_lml, ERR_ELF,
92 			    MSG_INTL(MSG_ELF_GETDATA), amp->am_path);
93 			ofl->ofl_flags |= FLG_OF_FATAL;
94 			return (0);
95 		}
96 		syms = (Sym *)data->d_buf;
97 		syms += shdr->sh_info;
98 		symn = shdr->sh_size / shdr->sh_entsize;
99 		symn -= shdr->sh_info;
100 
101 		/*
102 		 * Get the data for the associated string table.
103 		 */
104 		if ((scn = elf_getscn(amp->am_elf, (size_t)shdr->sh_link)) ==
105 		    NULL) {
106 			eprintf(ofl->ofl_lml, ERR_ELF,
107 			    MSG_INTL(MSG_ELF_GETSCN), amp->am_path);
108 			ofl->ofl_flags |= FLG_OF_FATAL;
109 			return (0);
110 		}
111 		if ((data = elf_getdata(scn, NULL)) == NULL) {
112 			eprintf(ofl->ofl_lml, ERR_ELF,
113 			    MSG_INTL(MSG_ELF_GETDATA), amp->am_path);
114 			ofl->ofl_flags |= FLG_OF_FATAL;
115 			return (0);
116 		}
117 		strs = data->d_buf;
118 
119 		/*
120 		 * Initialize the archive member structure in case we have to
121 		 * come through here again.
122 		 */
123 		amp->am_syms = syms;
124 		amp->am_strs = strs;
125 		amp->am_symn = symn;
126 	} else {
127 		syms = amp->am_syms;
128 		strs = amp->am_strs;
129 		symn = amp->am_symn;
130 	}
131 
132 	/*
133 	 * Loop through the symbol table entries looking for a match for the
134 	 * original symbol.
135 	 */
136 	for (cnt = 0; cnt < symn; syms++, cnt++) {
137 		Word	shndx;
138 
139 		if ((shndx = syms->st_shndx) == SHN_UNDEF)
140 			continue;
141 
142 		if (osym->st_shndx == SHN_COMMON) {
143 			/*
144 			 * Determine whether a tentative symbol definition
145 			 * should be overridden.
146 			 */
147 			if ((shndx == SHN_ABS) || (shndx == SHN_COMMON) ||
148 			    (ELF_ST_TYPE(syms->st_info) == STT_FUNC))
149 				continue;
150 
151 			/*
152 			 * A historic detail requires that a weak definition
153 			 * within an archive will not override a strong
154 			 * definition (see sym_realtent() resolution and ABI
155 			 * symbol binding description - page 4-27).
156 			 */
157 			if ((ELF_ST_BIND(syms->st_info) == STB_WEAK) &&
158 			    (ELF_ST_BIND(osym->st_info) != STB_WEAK))
159 				continue;
160 		} else {
161 			/*
162 			 * Determine whether a restricted visibility reference
163 			 * should be overridden.  Don't worry about the
164 			 * visibility of the archive member definition, nor
165 			 * whether it is weak or global.  Any definition is
166 			 * better than a binding to an external shared object
167 			 * (which is the only event that must presently exist
168 			 * for us to be here looking for a better alternative).
169 			 */
170 			if (ELF_ST_TYPE(syms->st_info) !=
171 			    ELF_ST_TYPE(osym->st_info))
172 				continue;
173 		}
174 
175 		if (strcmp(strs + syms->st_name, name) == NULL)
176 			return (1);
177 	}
178 	return (0);
179 }
180 
181 /*
182  * Create an archive descriptor.  By maintaining a list of archives any
183  * duplicate occurrences of the same archive specified by the user enable us to
184  * pick off where the last processing finished.
185  */
186 Ar_desc *
187 ld_ar_setup(const char *name, Elf *elf, Ofl_desc *ofl)
188 {
189 	Ar_desc *	adp;
190 	size_t		number;
191 	Elf_Arsym *	start;
192 
193 	/*
194 	 * Get the archive symbol table. If this fails, we will
195 	 * ignore this file with a warning message.
196 	 */
197 	if ((start = elf_getarsym(elf, &number)) == 0) {
198 		if (elf_errno()) {
199 			eprintf(ofl->ofl_lml, ERR_ELF,
200 			    MSG_INTL(MSG_ELF_GETARSYM), name);
201 			ofl->ofl_flags |= FLG_OF_FATAL;
202 		} else
203 			eprintf(ofl->ofl_lml, ERR_WARNING,
204 			    MSG_INTL(MSG_ELF_ARSYM), name);
205 
206 		return (0);
207 	}
208 
209 	/*
210 	 * As this is a new archive reference establish a new descriptor.
211 	 */
212 	if ((adp = libld_malloc(sizeof (Ar_desc))) == 0)
213 		return ((Ar_desc *)S_ERROR);
214 	adp->ad_name = name;
215 	adp->ad_elf = elf;
216 	adp->ad_start = start;
217 	if ((adp->ad_aux = libld_calloc(sizeof (Ar_aux), number)) == 0)
218 		return ((Ar_desc *)S_ERROR);
219 
220 	/*
221 	 * Retain any command line options that are applicable to archive
222 	 * extraction in case we have to rescan this archive later.
223 	 */
224 	adp->ad_flags = ofl->ofl_flags1 & MSK_OF1_ARCHIVE;
225 
226 	ofl->ofl_arscnt++;
227 
228 	/*
229 	 * Add this new descriptor to the list of archives.
230 	 */
231 	if (list_appendc(&ofl->ofl_ars, adp) == 0)
232 		return ((Ar_desc *)S_ERROR);
233 	else
234 		return (adp);
235 }
236 
237 /*
238  * For each archive descriptor, maintain an `Ar_aux' table to parallel the
239  * archive symbol table (returned from elf_getarsym(3e)).  Use this table to
240  * hold a `Sym_desc' for each symbol (thus reducing the number of
241  * ld_sym_find()'s), and to hold the `Ar_mem' pointer.  The `Ar_mem' element
242  * can have one of three values indicating the state of the archive member
243  * associated with the offset for this symbol table entry:
244  *
245  *  0		indicates that the member has not been processed.
246  *
247  *  FLG_ARMEM_PROC
248  *		indicates that the member has been processed.
249  *
250  *  addr	indicates that the member has been investigated to determine if
251  *		it contained a symbol definition we need, but was found not to
252  *		be a candidate for extraction.  In this case the members
253  *		structure is maintained for possible later use.
254  *
255  * Each time we process an archive member we use its offset value to scan this
256  * `Ar_aux' list.  If the member has been extracted, each entry with the same
257  * offset has its `Ar_mem' pointer set to FLG_AMMEM_PROC.  Thus if we cycle back
258  * through the archive symbol table we will ignore these symbols as they will
259  * have already been added to the output image.  If a member has been processed
260  * but found not to contain a symbol we need, each entry with the same offset
261  * has its `Ar_mem' pointer set to the member structures address.
262  */
263 void
264 ld_ar_member(Ar_desc * adp, Elf_Arsym * arsym, Ar_aux * aup, Ar_mem * amp)
265 {
266 	Elf_Arsym *	_arsym = arsym;
267 	Ar_aux *	_aup = aup;
268 	size_t		_off = arsym->as_off;
269 
270 	if (_arsym != adp->ad_start) {
271 		do {
272 			_arsym--;
273 			_aup--;
274 			if (_arsym->as_off != _off)
275 				break;
276 			_aup->au_mem = amp;
277 		} while (_arsym != adp->ad_start);
278 	}
279 
280 	_arsym = arsym;
281 	_aup = aup;
282 
283 	do {
284 		if (_arsym->as_off != _off)
285 			break;
286 		_aup->au_mem = amp;
287 		_arsym++;
288 		_aup++;
289 	} while (_arsym->as_name);
290 }
291 
292 /*
293  * Read the archive symbol table.  For each symbol in the table, determine
294  * whether that symbol satisfies an unresolved reference, tentative reference,
295  * or a reference that expects hidden or protected visibility.  If so, the
296  * corresponding object from the archive is processed.  The archive symbol
297  * table is searched until we go through a complete pass without satisfying any
298  * unresolved symbols
299  */
300 uintptr_t
301 ld_process_archive(const char *name, int fd, Ar_desc *adp, Ofl_desc *ofl)
302 {
303 	Elf_Arsym *	arsym;
304 	Elf_Arhdr *	arhdr;
305 	Elf *		arelf;
306 	Ar_aux *	aup;
307 	Sym_desc *	sdp;
308 	char 		*arname, *arpath;
309 	Xword		ndx;
310 	int		found, again;
311 	int		allexrt = ofl->ofl_flags1 & FLG_OF1_ALLEXRT;
312 	uintptr_t	err;
313 	Rej_desc	rej = { 0 };
314 
315 	/*
316 	 * If a fatal error condition has been set there's really no point in
317 	 * processing the archive further.  Having got to this point we have at
318 	 * least established that the archive exists (thus verifying that the
319 	 * command line options that got us to this archive are correct).  Very
320 	 * large archives can take a significant time to process, therefore
321 	 * continuing on from here may significantly delay the fatal error
322 	 * message the user is already set to receive.
323 	 */
324 	if (ofl->ofl_flags & FLG_OF_FATAL)
325 		return (1);
326 
327 	/*
328 	 * If this archive was processed with -z allextract, then all members
329 	 * have already been extracted.
330 	 */
331 	if (adp->ad_elf == (Elf *)NULL)
332 		return (1);
333 
334 	/*
335 	 * Loop through archive symbol table until we make a complete pass
336 	 * without satisfying an unresolved reference.  For each archive
337 	 * symbol, see if there is a symbol with the same name in ld's
338 	 * symbol table.  If so, and if that symbol is still unresolved or
339 	 * tentative, process the corresponding archive member.
340 	 */
341 	found = again = 0;
342 	do {
343 		DBG_CALL(Dbg_file_ar(ofl->ofl_lml, name, again));
344 		DBG_CALL(Dbg_syms_ar_title(ofl->ofl_lml, name, again));
345 
346 		ndx = again = 0;
347 		for (arsym = adp->ad_start, aup = adp->ad_aux; arsym->as_name;
348 		    ++arsym, ++aup, ndx++) {
349 			Rej_desc	_rej = { 0 };
350 			Ar_mem		*amp;
351 			Sym		*sym;
352 			Boolean		vis = TRUE;
353 
354 			/*
355 			 * If the auxiliary members value indicates that this
356 			 * member has been processed then this symbol will have
357 			 * been added to the output file image already or the
358 			 * object was rejected in which case we don't want to
359 			 * process it again.
360 			 */
361 			if (aup->au_mem == FLG_ARMEM_PROC)
362 				continue;
363 
364 			/*
365 			 * If the auxiliary symbol element is non-zero lookup
366 			 * the symbol from the internal symbol table.
367 			 * (But you skip this if allextract is specified.)
368 			 */
369 			if ((allexrt == 0) && ((sdp = aup->au_syms) == 0)) {
370 				if ((sdp = ld_sym_find(arsym->as_name,
371 				    /* LINTED */
372 				    (Word)arsym->as_hash, 0, ofl)) == 0) {
373 					DBG_CALL(Dbg_syms_ar_entry(ofl->ofl_lml,
374 					    ndx, arsym));
375 					continue;
376 				}
377 				aup->au_syms = sdp;
378 			}
379 
380 			/*
381 			 * With '-z allextract', all members will be extracted.
382 			 *
383 			 * This archive member is a candidate for extraction if
384 			 * the internal symbol originates from an explicit file,
385 			 * and represents an undefined or tentative symbol.
386 			 *
387 			 * By default, weak references do not cause archive
388 			 * extraction, however the -zweakextract flag overrides
389 			 * this default.
390 			 *
391 			 * If this symbol has already been bound to a versioned
392 			 * shared object, but the shared objects version is not
393 			 * available, then a definition of this symbol from
394 			 * within the archive is a better candidate.  Similarly,
395 			 * if this symbol has been bound to a shared object, but
396 			 * the original reference expected hidden or protected
397 			 * visibility, then a definition of this symbol from
398 			 * within the archive is a better candidate.
399 			 */
400 			if (allexrt == 0) {
401 				Boolean		vers = TRUE;
402 				Ifl_desc	*ifl = sdp->sd_file;
403 
404 				sym = sdp->sd_sym;
405 
406 				if (sdp->sd_ref == REF_DYN_NEED) {
407 					uchar_t	oth;
408 
409 					if (ifl->ifl_vercnt) {
410 						Word		vndx;
411 						Ver_index	*vip;
412 
413 						vndx = sdp->sd_aux->sa_dverndx;
414 						vip = &ifl->ifl_verndx[vndx];
415 						if ((vip->vi_flags &
416 						    FLG_VER_AVAIL) == 0)
417 							vers = FALSE;
418 					}
419 
420 					oth = ELF_ST_VISIBILITY(sym->st_other);
421 					if ((oth == STV_HIDDEN) ||
422 					    (oth == STV_PROTECTED)) {
423 						vis = FALSE;
424 					}
425 				}
426 
427 				if (((ifl->ifl_flags & FLG_IF_NEEDED) == 0) ||
428 				    (vis && vers &&
429 				    (sym->st_shndx != SHN_UNDEF) &&
430 				    (sym->st_shndx != SHN_COMMON)) ||
431 				    ((ELF_ST_BIND(sym->st_info) == STB_WEAK) &&
432 				    (!(ofl->ofl_flags1 & FLG_OF1_WEAKEXT)))) {
433 					DBG_CALL(Dbg_syms_ar_entry(ofl->ofl_lml,
434 					    ndx, arsym));
435 					continue;
436 				}
437 			}
438 
439 			/*
440 			 * Determine if we have already extracted this member,
441 			 * and if so reuse the Ar_mem information.
442 			 */
443 			if ((amp = aup->au_mem) != 0) {
444 				arelf = amp->am_elf;
445 				arname = amp->am_name;
446 				arpath = amp->am_path;
447 			} else {
448 				size_t	len;
449 
450 				/*
451 				 * Set up a new elf descriptor for this member.
452 				 */
453 				if (elf_rand(adp->ad_elf, arsym->as_off) !=
454 				    arsym->as_off) {
455 					eprintf(ofl->ofl_lml, ERR_ELF,
456 					    MSG_INTL(MSG_ELF_ARMEM), name,
457 					    EC_WORD(arsym->as_off), ndx,
458 					    demangle(arsym->as_name));
459 					ofl->ofl_flags |= FLG_OF_FATAL;
460 					return (0);
461 				}
462 
463 				if ((arelf = elf_begin(fd, ELF_C_READ,
464 				    adp->ad_elf)) == NULL) {
465 					eprintf(ofl->ofl_lml, ERR_ELF,
466 					    MSG_INTL(MSG_ELF_BEGIN), name);
467 					ofl->ofl_flags |= FLG_OF_FATAL;
468 					return (0);
469 				}
470 
471 				/*
472 				 * Construct the member filename.
473 				 */
474 				if ((arhdr = elf_getarhdr(arelf)) == NULL) {
475 					eprintf(ofl->ofl_lml, ERR_ELF,
476 					    MSG_INTL(MSG_ELF_GETARHDR), name);
477 					ofl->ofl_flags |= FLG_OF_FATAL;
478 					return (0);
479 				}
480 				arname = arhdr->ar_name;
481 
482 				/*
483 				 * Construct the members full pathname, using
484 				 * the format "%s(%s)".
485 				 */
486 				len = strlen(name) + strlen(arname) + 3;
487 				if ((arpath = libld_malloc(len)) == 0)
488 					return (S_ERROR);
489 				(void) snprintf(arpath, len,
490 				    MSG_ORIG(MSG_FMT_ARMEM), name, arname);
491 
492 				/*
493 				 * Determine whether the support library wishes
494 				 * to process this open.  See comments in
495 				 * ld_process_open().
496 				 */
497 				ld_sup_open(ofl, (const char **)&arpath,
498 				    (const char **)&arname, &fd,
499 				    (FLG_IF_EXTRACT | FLG_IF_NEEDED),
500 				    &arelf, adp->ad_elf, arsym->as_off,
501 				    elf_kind(arelf));
502 			}
503 
504 			/*
505 			 * The symbol for which this archive member is being
506 			 * processed may provide a better alternative to the
507 			 * symbol that is presently known.  Two cases are
508 			 * covered:
509 			 *
510 			 *  i.	The present symbol represents tentative data.
511 			 *	The archive member may provide a data
512 			 *	definition symbol.
513 			 *  ii.	The present symbol represents a reference that
514 			 *	has seen a definition within a shared object
515 			 *	dependency, but the reference expects to be
516 			 *	reduced to hidden or protected visibility.
517 			 */
518 			if ((allexrt == 0) &&
519 			    ((sym->st_shndx == SHN_COMMON) || (vis == FALSE))) {
520 
521 				/*
522 				 * If we don't already have a member structure
523 				 * allocate one.
524 				 */
525 				if (!amp) {
526 					if ((amp = libld_calloc(sizeof (Ar_mem),
527 					    1)) == 0)
528 						return (S_ERROR);
529 					amp->am_elf = arelf;
530 					amp->am_name = arname;
531 					amp->am_path = arpath;
532 				}
533 				DBG_CALL(Dbg_syms_ar_checking(ofl->ofl_lml,
534 				    ndx, arsym, arname));
535 				if ((err = process_member(amp, arsym->as_name,
536 				    sdp, ofl)) == S_ERROR)
537 					return (S_ERROR);
538 
539 				/*
540 				 * If it turns out that we don't need this
541 				 * member simply initialize all other auxiliary
542 				 * entries that match this offset with this
543 				 * members address.  In this way we can resuse
544 				 * this information if we recurse back to this
545 				 * symbol.
546 				 */
547 				if (err == 0) {
548 					if (aup->au_mem == 0)
549 						ld_ar_member(adp, arsym,
550 						    aup, amp);
551 					continue;
552 				}
553 			}
554 
555 			/*
556 			 * Process the archive member.  Retain any error for
557 			 * return to the caller.
558 			 */
559 			DBG_CALL(Dbg_syms_ar_resolve(ofl->ofl_lml, ndx, arsym,
560 			    arname, allexrt));
561 			if ((err = (uintptr_t)ld_process_ifl(arpath, NULL, fd,
562 			    arelf, (FLG_IF_EXTRACT | FLG_IF_NEEDED), ofl,
563 			    &_rej)) == S_ERROR)
564 				return (S_ERROR);
565 
566 			/*
567 			 * If this member is rejected maintain the first
568 			 * rejection error for possible later display.  Keep the
569 			 * member as extracted so that we don't try and process
570 			 * it again on a rescan.
571 			 */
572 			if (_rej.rej_type) {
573 				if (rej.rej_type == 0) {
574 					rej.rej_type = _rej.rej_type;
575 					rej.rej_info = _rej.rej_info;
576 					rej.rej_name = (const char *)arpath;
577 				}
578 				ld_ar_member(adp, arsym, aup, FLG_ARMEM_PROC);
579 				continue;
580 			}
581 
582 			/*
583 			 * Indicate that the extracted member is in use.  This
584 			 * enables debugging diags, and indicates that a further
585 			 * rescan of all archives may be necessary.
586 			 */
587 			found = 1;
588 			ofl->ofl_flags1 |= FLG_OF1_EXTRACT;
589 			adp->ad_flags |= FLG_ARD_EXTRACT;
590 
591 			/*
592 			 * If not under '-z allextract' signal the need to
593 			 * rescan this archive.
594 			 */
595 			if (allexrt == 0)
596 				again = 1;
597 
598 			ld_ar_member(adp, arsym, aup, FLG_ARMEM_PROC);
599 			DBG_CALL(Dbg_util_nl(ofl->ofl_lml, DBG_NL_STD));
600 		}
601 	} while (again);
602 
603 	/*
604 	 * If no objects have been found in the archive test for any rejections
605 	 * and if one had occurred issue a warning - its possible a user has
606 	 * pointed at an archive containing the wrong class of elf members.
607 	 */
608 	if (found == 0) {
609 		if (rej.rej_type)
610 			eprintf(ofl->ofl_lml, ERR_WARNING,
611 			    MSG_INTL(reject[rej.rej_type]),
612 			    rej.rej_name ? rej.rej_name :
613 			    MSG_INTL(MSG_STR_UNKNOWN), conv_reject_desc(&rej));
614 	}
615 
616 	/*
617 	 * If this archive was extracted by -z allextract, the ar_aux table
618 	 * and elf descriptor can be freed.  Set ad_elf to NULL to mark the
619 	 * archive is completely processed.
620 	 */
621 	if (allexrt) {
622 		(void) elf_end(adp->ad_elf);
623 		adp->ad_elf = (Elf *)NULL;
624 	}
625 
626 	return (1);
627 }
628