xref: /illumos-gate/usr/src/cmd/sgs/libld/common/libs.c (revision 1a220b56b93ff1dc80855691548503117af4cc10)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  *	Copyright (c) 1988 AT&T
24  *	  All Rights Reserved
25  *
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 /*
32  * Library processing
33  */
34 #include	<stdio.h>
35 #include	<string.h>
36 #include	<debug.h>
37 #include	"msg.h"
38 #include	"_libld.h"
39 
40 /*
41  * Because a tentative symbol may cause the extraction of an archive member,
42  * make sure that the potential member is really required.  If the archive
43  * member has a strong defined symbol it will be extracted.  If it simply
44  * contains another tentative definition, or a defined function symbol, then it
45  * will not be used.
46  */
47 static int
48 process_member(Ar_mem *amp, const char *name, unsigned char obind,
49     Ofl_desc *ofl)
50 {
51 	Sym *		syms;
52 	Xword		symn, cnt;
53 	char 		*strs;
54 
55 	/*
56 	 * Find the first symbol table in the archive member, obtain its
57 	 * data buffer and determine the number of global symbols (Note,
58 	 * there must be a symbol table present otherwise the archive would
59 	 * never have been able to generate its own symbol entry for this
60 	 * member).
61 	 */
62 	if (amp->am_syms == 0) {
63 		Elf_Scn *	scn = NULL;
64 		Shdr *		shdr;
65 		Elf_Data *	data;
66 
67 		while (scn = elf_nextscn(amp->am_elf, scn)) {
68 			if ((shdr = elf_getshdr(scn)) == NULL) {
69 				eprintf(ofl->ofl_lml, ERR_ELF,
70 				    MSG_INTL(MSG_ELF_GETSHDR), amp->am_path);
71 				ofl->ofl_flags |= FLG_OF_FATAL;
72 				return (0);
73 			}
74 			if ((shdr->sh_type == SHT_SYMTAB) ||
75 			    (shdr->sh_type == SHT_DYNSYM))
76 				break;
77 		}
78 		if ((data = elf_getdata(scn, NULL)) == NULL) {
79 			eprintf(ofl->ofl_lml, ERR_ELF,
80 			    MSG_INTL(MSG_ELF_GETDATA), amp->am_path);
81 			ofl->ofl_flags |= FLG_OF_FATAL;
82 			return (0);
83 		}
84 		syms = (Sym *)data->d_buf;
85 		syms += shdr->sh_info;
86 		symn = shdr->sh_size / shdr->sh_entsize;
87 		symn -= shdr->sh_info;
88 
89 		/*
90 		 * Get the data for the associated string table.
91 		 */
92 		if ((scn = elf_getscn(amp->am_elf, (size_t)shdr->sh_link)) ==
93 		    NULL) {
94 			eprintf(ofl->ofl_lml, ERR_ELF,
95 			    MSG_INTL(MSG_ELF_GETSCN), amp->am_path);
96 			ofl->ofl_flags |= FLG_OF_FATAL;
97 			return (0);
98 		}
99 		if ((data = elf_getdata(scn, NULL)) == NULL) {
100 			eprintf(ofl->ofl_lml, ERR_ELF,
101 			    MSG_INTL(MSG_ELF_GETDATA), amp->am_path);
102 			ofl->ofl_flags |= FLG_OF_FATAL;
103 			return (0);
104 		}
105 		strs = data->d_buf;
106 
107 		/*
108 		 * Initialize the archive member structure in case we have to
109 		 * come through here again.
110 		 */
111 		amp->am_syms = syms;
112 		amp->am_strs = strs;
113 		amp->am_symn = symn;
114 	} else {
115 		syms = amp->am_syms;
116 		strs = amp->am_strs;
117 		symn = amp->am_symn;
118 	}
119 
120 	/*
121 	 * Loop through the symbol table entries looking for a match for the
122 	 * original symbol.  The archive member will be used if	the new symbol
123 	 * is a definition of an object (not a function).  Note however that a
124 	 * weak definition within the archive will not override a strong
125 	 * tentative symbol (see sym_realtent() resolution and ABI symbol
126 	 * binding description - page 4-27).
127 	 */
128 	for (cnt = 0; cnt < symn; syms++, cnt++) {
129 		Word		shndx = syms->st_shndx;
130 		unsigned char	info;
131 
132 		if ((shndx == SHN_ABS) || (shndx == SHN_COMMON) ||
133 		    (shndx == SHN_UNDEF))
134 			continue;
135 
136 		info = syms->st_info;
137 		if ((ELF_ST_TYPE(info) == STT_FUNC) ||
138 		    ((ELF_ST_BIND(info) == STB_WEAK) && (obind != STB_WEAK)))
139 			continue;
140 
141 		if (strcmp(strs + syms->st_name, name) == NULL)
142 			return (1);
143 	}
144 	return (0);
145 }
146 
147 /*
148  * Create an archive descriptor.  By maintaining a list of archives any
149  * duplicate occurrences of the same archive specified by the user enable us to
150  * pick off where the last processing finished.
151  */
152 Ar_desc *
153 ld_ar_setup(const char *name, Elf *elf, Ofl_desc *ofl)
154 {
155 	Ar_desc *	adp;
156 	size_t		number;
157 	Elf_Arsym *	start;
158 
159 	/*
160 	 * Get the archive symbol table. If this fails, we will
161 	 * ignore this file with a warning message.
162 	 */
163 	if ((start = elf_getarsym(elf, &number)) == 0) {
164 		if (elf_errno()) {
165 			eprintf(ofl->ofl_lml, ERR_ELF,
166 			    MSG_INTL(MSG_ELF_GETARSYM), name);
167 			ofl->ofl_flags |= FLG_OF_FATAL;
168 		} else
169 			eprintf(ofl->ofl_lml, ERR_WARNING,
170 			    MSG_INTL(MSG_ELF_ARSYM), name);
171 
172 		return (0);
173 	}
174 
175 	/*
176 	 * As this is a new archive reference establish a new descriptor.
177 	 */
178 	if ((adp = libld_malloc(sizeof (Ar_desc))) == 0)
179 		return ((Ar_desc *)S_ERROR);
180 	adp->ad_name = name;
181 	adp->ad_elf = elf;
182 	adp->ad_start = start;
183 	if ((adp->ad_aux = libld_calloc(sizeof (Ar_aux), number)) == 0)
184 		return ((Ar_desc *)S_ERROR);
185 
186 	/*
187 	 * Retain any command line options that are applicable to archive
188 	 * extraction in case we have to rescan this archive later.
189 	 */
190 	adp->ad_flags = ofl->ofl_flags1 & MSK_OF1_ARCHIVE;
191 
192 	ofl->ofl_arscnt++;
193 
194 	/*
195 	 * Add this new descriptor to the list of archives.
196 	 */
197 	if (list_appendc(&ofl->ofl_ars, adp) == 0)
198 		return ((Ar_desc *)S_ERROR);
199 	else
200 		return (adp);
201 }
202 
203 /*
204  * For each archive descriptor, maintain an `Ar_aux' table to parallel the
205  * archive symbol table (returned from elf_getarsym(3e)).  Use this table to
206  * hold a `Sym_desc' for each symbol (thus reducing the number of
207  * ld_sym_find()'s), and to hold the `Ar_mem' pointer.  The `Ar_mem' element
208  * can have one of three values indicating the state of the archive member
209  * associated with the offset for this symbol table entry:
210  *
211  *  0		indicates that the member has not been processed.
212  *
213  *  FLG_ARMEM_PROC
214  *		indicates that the member has been processed.
215  *
216  *  addr	indicates that the member has been investigated to determine if
217  *		it contained a symbol definition we need, but was found not to
218  *		be a candidate for extraction.  In this case the members
219  *		structure is maintained for possible later use.
220  *
221  * Each time we process an archive member we use its offset value to scan this
222  * `Ar_aux' list.  If the member has been extracted, each entry with the same
223  * offset has its `Ar_mem' pointer set to FLG_AMMEM_PROC.  Thus if we cycle back
224  * through the archive symbol table we will ignore these symbols as they will
225  * have already been added to the output image.  If a member has been processed
226  * but found not to contain a symbol we need, each entry with the same offset
227  * has its `Ar_mem' pointer set to the member structures address.
228  */
229 void
230 ld_ar_member(Ar_desc * adp, Elf_Arsym * arsym, Ar_aux * aup, Ar_mem * amp)
231 {
232 	Elf_Arsym *	_arsym = arsym;
233 	Ar_aux *	_aup = aup;
234 	size_t		_off = arsym->as_off;
235 
236 	if (_arsym != adp->ad_start) {
237 		do {
238 			_arsym--;
239 			_aup--;
240 			if (_arsym->as_off != _off)
241 				break;
242 			_aup->au_mem = amp;
243 		} while (_arsym != adp->ad_start);
244 	}
245 
246 	_arsym = arsym;
247 	_aup = aup;
248 
249 	do {
250 		if (_arsym->as_off != _off)
251 			break;
252 		_aup->au_mem = amp;
253 		_arsym++;
254 		_aup++;
255 	} while (_arsym->as_name);
256 }
257 
258 /*
259  * Read in the archive's symbol table; for each symbol in the table check
260  * whether that symbol satisfies an unresolved, or tentative reference in
261  * ld's internal symbol table; if so, the corresponding object from the
262  * archive is processed.  The archive symbol table is searched until we go
263  * through a complete pass without satisfying any unresolved symbols
264  */
265 uintptr_t
266 ld_process_archive(const char *name, int fd, Ar_desc *adp, Ofl_desc *ofl)
267 {
268 	Elf_Arsym *	arsym;
269 	Elf_Arhdr *	arhdr;
270 	Elf *		arelf;
271 	Ar_aux *	aup;
272 	Sym_desc *	sdp;
273 	char 		*arname, *arpath;
274 	Xword		ndx;
275 	int		found, again;
276 	int		allexrt = ofl->ofl_flags1 & FLG_OF1_ALLEXRT;
277 	uintptr_t	err;
278 	Rej_desc	rej = { 0 };
279 
280 	/*
281 	 * If a fatal error condition has been set there's really no point in
282 	 * processing the archive further.  Having got to this point we have at
283 	 * least established that the archive exists (thus verifying that the
284 	 * command line options that got us to this archive are correct).  Very
285 	 * large archives can take a significant time to process, therefore
286 	 * continuing on from here may significantly delay the fatal error
287 	 * message the user is already set to receive.
288 	 */
289 	if (ofl->ofl_flags & FLG_OF_FATAL)
290 		return (1);
291 
292 	/*
293 	 * If this archive was processed with -z allextract, then all members
294 	 * have already been extracted.
295 	 */
296 	if (adp->ad_elf == (Elf *)NULL)
297 		return (1);
298 
299 	/*
300 	 * Loop through archive symbol table until we make a complete pass
301 	 * without satisfying an unresolved reference.  For each archive
302 	 * symbol, see if there is a symbol with the same name in ld's
303 	 * symbol table.  If so, and if that symbol is still unresolved or
304 	 * tentative, process the corresponding archive member.
305 	 */
306 	found = again = 0;
307 	do {
308 		DBG_CALL(Dbg_file_ar(ofl->ofl_lml, name, again));
309 		DBG_CALL(Dbg_syms_ar_title(ofl->ofl_lml, name, again));
310 
311 		ndx = again = 0;
312 		for (arsym = adp->ad_start, aup = adp->ad_aux; arsym->as_name;
313 		    ++arsym, ++aup, ndx++) {
314 			Rej_desc	_rej = { 0 };
315 			Ar_mem *	amp;
316 			Sym *		sym;
317 
318 			/*
319 			 * If the auxiliary members value indicates that this
320 			 * member has been processed then this symbol will have
321 			 * been added to the output file image already or the
322 			 * object was rejected in which case we don't want to
323 			 * process it again.
324 			 */
325 			if (aup->au_mem == FLG_ARMEM_PROC)
326 				continue;
327 
328 			/*
329 			 * If the auxiliary symbol element is non-zero lookup
330 			 * the symbol from the internal symbol table.
331 			 * (But you skip this if allextract is specified.)
332 			 */
333 			if ((allexrt == 0) && ((sdp = aup->au_syms) == 0)) {
334 				if ((sdp = ld_sym_find(arsym->as_name,
335 				    /* LINTED */
336 				    (Word)arsym->as_hash, 0, ofl)) == 0) {
337 					DBG_CALL(Dbg_syms_ar_entry(ofl->ofl_lml,
338 					    ndx, arsym));
339 					continue;
340 				}
341 				aup->au_syms = sdp;
342 			}
343 
344 			/*
345 			 * With '-z allextract', all members will be extracted.
346 			 *
347 			 * This archive member is a candidate for extraction if
348 			 * the internal symbol originates from an explicit file
349 			 * and is undefined or tentative.  By default weak
350 			 * references do not cause archive extraction, however
351 			 * the -zweakextract flag overrides this default.
352 			 * If this symbol has been bound to a versioned shared
353 			 * object make sure it is available for linking.
354 			 */
355 			if (allexrt == 0) {
356 				Boolean		vers = TRUE;
357 				Ifl_desc *	file = sdp->sd_file;
358 
359 				if ((sdp->sd_ref == REF_DYN_NEED) &&
360 				    (file->ifl_vercnt)) {
361 					Word		vndx;
362 					Ver_index *	vip;
363 
364 					vndx = sdp->sd_aux->sa_dverndx;
365 					vip = &file->ifl_verndx[vndx];
366 					if (!(vip->vi_flags & FLG_VER_AVAIL))
367 						vers = FALSE;
368 				}
369 
370 				sym = sdp->sd_sym;
371 				if ((!(file->ifl_flags & FLG_IF_NEEDED)) ||
372 				    ((sym->st_shndx != SHN_UNDEF) &&
373 				    (sym->st_shndx != SHN_COMMON) && vers) ||
374 				    ((ELF_ST_BIND(sym->st_info) == STB_WEAK) &&
375 				    (!(ofl->ofl_flags1 & FLG_OF1_WEAKEXT)))) {
376 					DBG_CALL(Dbg_syms_ar_entry(ofl->ofl_lml,
377 					    ndx, arsym));
378 					continue;
379 				}
380 			}
381 
382 			/*
383 			 * Determine if we have already extracted this member,
384 			 * and if so reuse the Ar_mem information.
385 			 */
386 			if ((amp = aup->au_mem) != 0) {
387 				arelf = amp->am_elf;
388 				arname = amp->am_name;
389 				arpath = amp->am_path;
390 			} else {
391 				size_t	len;
392 
393 				/*
394 				 * Set up a new elf descriptor for this member.
395 				 */
396 				if (elf_rand(adp->ad_elf, arsym->as_off) !=
397 				    arsym->as_off) {
398 					eprintf(ofl->ofl_lml, ERR_ELF,
399 					    MSG_INTL(MSG_ELF_ARMEM), name,
400 					    EC_WORD(arsym->as_off), ndx,
401 					    demangle(arsym->as_name));
402 					ofl->ofl_flags |= FLG_OF_FATAL;
403 					return (0);
404 				}
405 				if ((arelf = elf_begin(fd, ELF_C_READ,
406 				    adp->ad_elf)) == NULL) {
407 					eprintf(ofl->ofl_lml, ERR_ELF,
408 					    MSG_INTL(MSG_ELF_BEGIN), name);
409 					ofl->ofl_flags |= FLG_OF_FATAL;
410 					return (0);
411 				}
412 
413 				/*
414 				 * Construct the member filename.
415 				 */
416 				if ((arhdr = elf_getarhdr(arelf)) == NULL) {
417 					eprintf(ofl->ofl_lml, ERR_ELF,
418 					    MSG_INTL(MSG_ELF_GETARHDR), name);
419 					ofl->ofl_flags |= FLG_OF_FATAL;
420 					return (0);
421 				}
422 				arname = arhdr->ar_name;
423 
424 				/*
425 				 * Construct the members full pathname, using
426 				 * the format "%s(%s)".
427 				 */
428 				len = strlen(name) + strlen(arname) + 3;
429 				if ((arpath = libld_malloc(len)) == 0)
430 					return (S_ERROR);
431 				(void) snprintf(arpath, len,
432 				    MSG_ORIG(MSG_FMT_ARMEM), name, arname);
433 			}
434 
435 			/*
436 			 * If the symbol for which this archive member is
437 			 * being processed is a tentative symbol, then this
438 			 * member must be verified to insure that it is
439 			 * going to provided a symbol definition that will
440 			 * override the tentative symbol.
441 			 */
442 			if ((allexrt == 0) && (sym->st_shndx == SHN_COMMON)) {
443 				/* LINTED */
444 				Byte bind = (Byte)ELF_ST_BIND(sym->st_info);
445 
446 				/*
447 				 * If we don't already have a member structure
448 				 * allocate one.
449 				 */
450 				if (!amp) {
451 					if ((amp = libld_calloc(sizeof (Ar_mem),
452 					    1)) == 0)
453 						return (S_ERROR);
454 					amp->am_elf = arelf;
455 					amp->am_name = arname;
456 					amp->am_path = arpath;
457 				}
458 				DBG_CALL(Dbg_syms_ar_checking(ofl->ofl_lml,
459 				    ndx, arsym, arname));
460 				if ((err = process_member(amp, arsym->as_name,
461 				    bind, ofl)) == S_ERROR)
462 					return (S_ERROR);
463 
464 				/*
465 				 * If it turns out that we don't need this
466 				 * member simply initialize all other auxiliary
467 				 * entries that match this offset with this
468 				 * members address.  In this way we can resuse
469 				 * this information if we recurse back to this
470 				 * symbol.
471 				 */
472 				if (err == 0) {
473 					if (aup->au_mem == 0)
474 						ld_ar_member(adp, arsym,
475 						    aup, amp);
476 					continue;
477 				}
478 			}
479 
480 			/*
481 			 * Process the archive member.  Retain any error for
482 			 * return to the caller.
483 			 */
484 			DBG_CALL(Dbg_syms_ar_resolve(ofl->ofl_lml, ndx, arsym,
485 			    arname, allexrt));
486 			if ((err = (uintptr_t)ld_process_ifl(arpath, NULL, fd,
487 			    arelf, FLG_IF_EXTRACT | FLG_IF_NEEDED, ofl,
488 			    &_rej)) == S_ERROR)
489 				return (S_ERROR);
490 
491 			/*
492 			 * If this member is rejected maintain the first
493 			 * rejection error for possible later display.  Keep the
494 			 * member as extracted so that we don't try and process
495 			 * it again on a rescan.
496 			 */
497 			if (_rej.rej_type) {
498 				if (rej.rej_type == 0) {
499 					rej.rej_type = _rej.rej_type;
500 					rej.rej_info = _rej.rej_info;
501 					rej.rej_name = (const char *)arpath;
502 				}
503 				ld_ar_member(adp, arsym, aup, FLG_ARMEM_PROC);
504 				continue;
505 			}
506 
507 			/*
508 			 * Indicate that the extracted member is in use.  This
509 			 * enables debugging diags, and indicates that a further
510 			 * rescan of all archives may be necessary.
511 			 */
512 			found = 1;
513 			ofl->ofl_flags1 |= FLG_OF1_EXTRACT;
514 			adp->ad_flags |= FLG_ARD_EXTRACT;
515 
516 			/*
517 			 * If not under '-z allextract' signal the need to
518 			 * rescan this archive.
519 			 */
520 			if (allexrt == 0)
521 				again = 1;
522 
523 			ld_ar_member(adp, arsym, aup, FLG_ARMEM_PROC);
524 			DBG_CALL(Dbg_util_nl(ofl->ofl_lml, DBG_NL_STD));
525 		}
526 	} while (again);
527 
528 	/*
529 	 * If no objects have been found in the archive test for any rejections
530 	 * and if one had occurred issue a warning - its possible a user has
531 	 * pointed at an archive containing the wrong class of elf members.
532 	 */
533 	if (found == 0) {
534 		if (rej.rej_type)
535 			eprintf(ofl->ofl_lml, ERR_WARNING,
536 			    MSG_INTL(reject[rej.rej_type]),
537 			    rej.rej_name ? rej.rej_name :
538 			    MSG_INTL(MSG_STR_UNKNOWN), conv_reject_desc(&rej));
539 	}
540 
541 	/*
542 	 * If this archive was extracted by -z allextract, the ar_aux table
543 	 * and elf descriptor can be freed.  Set ad_elf to NULL to mark the
544 	 * archive is completely processed.
545 	 */
546 	if (allexrt) {
547 		(void) elf_end(adp->ad_elf);
548 		adp->ad_elf = (Elf *)NULL;
549 	}
550 
551 	return (1);
552 }
553