xref: /illumos-gate/usr/src/cmd/sgs/ld/common/ld.c (revision b8052df9f609edb713f6828c9eecc3d7be19dfb3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include	<stdio.h>
27 #include	<stdlib.h>
28 #include	<unistd.h>
29 #include	<stdarg.h>
30 #include	<string.h>
31 #include	<strings.h>
32 #include	<errno.h>
33 #include	<fcntl.h>
34 #include	<libintl.h>
35 #include	<locale.h>
36 #include	<fcntl.h>
37 #include	<ar.h>
38 #include	<gelf.h>
39 #include	"conv.h"
40 #include	"libld.h"
41 #include	"machdep.h"
42 #include	"msg.h"
43 
44 /*
45  * The following prevent us from having to include ctype.h which defines these
46  * functions as macros which reference the __ctype[] array.  Go through .plt's
47  * to get to these functions in libc rather than have every invocation of ld
48  * have to suffer the R_SPARC_COPY overhead of the __ctype[] array.
49  */
50 extern int	isspace(int);
51 
52 /*
53  * We examine ELF objects, and archives containing ELF objects, in order
54  * to determine the ELFCLASS of the resulting object and/or the linker to be
55  * used. We want to avoid the overhead of libelf for this, at least until
56  * we are certain that we need it, so we start by reading bytes from
57  * the beginning of the file. This type defines the buffer used to read
58  * these initial bytes.
59  *
60  * A plain ELF object will start with an ELF header, whereas an archive
61  * starts with a magic string (ARMAG) that is SARMAG bytes long. Any valid
62  * ELF file or archive will contain more bytes than this buffer, so any
63  * file shorter than this can be safely assummed not to be of interest.
64  *
65  * The ELF header for ELFCLASS32 and ELFCLASS64 are identical up through the
66  * the e_version field, and all the information we require is found in this
67  * common prefix. Furthermore, this cannot change, as the layout of an ELF
68  * header is fixed by the ELF ABI. Hence, the ehdr part of this union is
69  * not a full ELF header, but only the class-independent prefix that we need.
70  *
71  * As this is a raw (non-libelf) read, we are responsible for handling any
72  * byte order difference between the object and the system running this
73  * program when we read any datum larger than a byte (i.e. e_machine) from
74  * this header.
75  */
76 typedef union {
77 	struct {	/* Must match start of ELFxx_Ehdr in <sys/elf.h> */
78 		uchar_t		e_ident[EI_NIDENT];	/* ident bytes */
79 		Half		e_type;			/* file type */
80 		Half		e_machine;		/* target machine */
81 	} ehdr;
82 	char			armag[SARMAG];
83 } FILE_HDR;
84 
85 
86 /*
87  * Print a message to stdout
88  */
89 void
90 veprintf(Lm_list *lml, Error error, const char *format, va_list args)
91 {
92 	static const char	*strings[ERR_NUM];
93 
94 #if	defined(lint)
95 	/*
96 	 * The lml argument is only meaningful for diagnostics sent to ld.so.1.
97 	 * Supress the lint error by making a dummy assignment.
98 	 */
99 	lml = 0;
100 #endif
101 	/*
102 	 * For error types we issue a prefix for, make sure the necessary
103 	 * string has been internationalized and is ready.
104 	 */
105 	switch (error) {
106 	case ERR_WARNING_NF:
107 		if (strings[ERR_WARNING_NF] == NULL)
108 			strings[ERR_WARNING_NF] = MSG_INTL(MSG_ERR_WARNING);
109 		break;
110 	case ERR_WARNING:
111 		if (strings[ERR_WARNING] == NULL)
112 			strings[ERR_WARNING] = MSG_INTL(MSG_ERR_WARNING);
113 		break;
114 	case ERR_GUIDANCE:
115 		if (strings[ERR_GUIDANCE] == NULL)
116 			strings[ERR_GUIDANCE] = MSG_INTL(MSG_ERR_GUIDANCE);
117 		break;
118 	case ERR_FATAL:
119 		if (strings[ERR_FATAL] == NULL)
120 			strings[ERR_FATAL] = MSG_INTL(MSG_ERR_FATAL);
121 		break;
122 	case ERR_ELF:
123 		if (strings[ERR_ELF] == NULL)
124 			strings[ERR_ELF] = MSG_INTL(MSG_ERR_ELF);
125 	}
126 
127 	/* If strings[] element for our error type is non-NULL, issue prefix */
128 	if (strings[error] != NULL) {
129 		(void) fputs(MSG_ORIG(MSG_STR_LDDIAG), stderr);
130 		(void) fputs(strings[error], stderr);
131 	}
132 
133 	(void) vfprintf(stderr, format, args);
134 	if (error == ERR_ELF) {
135 		int	elferr;
136 
137 		if ((elferr = elf_errno()) != 0)
138 			(void) fprintf(stderr, MSG_ORIG(MSG_STR_ELFDIAG),
139 			    elf_errmsg(elferr));
140 	}
141 	(void) fprintf(stderr, MSG_ORIG(MSG_STR_NL));
142 	(void) fflush(stderr);
143 }
144 
145 
146 /*
147  * Print a message to stdout
148  */
149 /* VARARGS3 */
150 void
151 eprintf(Lm_list *lml, Error error, const char *format, ...)
152 {
153 	va_list	args;
154 
155 	va_start(args, format);
156 	veprintf(lml, error, format, args);
157 	va_end(args);
158 }
159 
160 
161 /*
162  * Examine the first object in an archive to determine its ELFCLASS
163  * and machine type.
164  *
165  * entry:
166  *	fd - Open file descriptor for file
167  *	elf - libelf ELF descriptor
168  *	class_ret, mach_ret - Address of variables to receive ELFCLASS
169  *		and machine type.
170  *
171  * exit:
172  *	On success, *class_ret and *mach_ret are filled in, and True (1)
173  *	is returned. On failure, False (0) is returned.
174  */
175 static int
176 archive(int fd, Elf *elf, uchar_t *class_ret, Half *mach_ret)
177 {
178 	Elf_Cmd		cmd = ELF_C_READ;
179 	Elf_Arhdr	*arhdr;
180 	Elf		*_elf = NULL;
181 	int		found = 0;
182 
183 	/*
184 	 * Process each item within the archive until we find the first
185 	 * ELF object, or alternatively another archive to recurse into.
186 	 * Stop after analyzing the first plain object found.
187 	 */
188 	while (!found && ((_elf = elf_begin(fd, cmd, elf)) != NULL)) {
189 		if ((arhdr = elf_getarhdr(_elf)) == NULL)
190 			return (0);
191 		if (*arhdr->ar_name != '/') {
192 			switch (elf_kind(_elf)) {
193 			case ELF_K_AR:
194 				found = archive(fd, _elf, class_ret, mach_ret);
195 				break;
196 			case ELF_K_ELF:
197 				if (gelf_getclass(_elf) == ELFCLASS64) {
198 					Elf64_Ehdr *ehdr;
199 
200 					if ((ehdr = elf64_getehdr(_elf)) ==
201 					    NULL)
202 						break;
203 					*class_ret = ehdr->e_ident[EI_CLASS];
204 					*mach_ret = ehdr->e_machine;
205 				} else {
206 					Elf32_Ehdr *ehdr;
207 
208 					if ((ehdr = elf32_getehdr(_elf)) ==
209 					    NULL)
210 						break;
211 					*class_ret = ehdr->e_ident[EI_CLASS];
212 					*mach_ret = ehdr->e_machine;
213 				}
214 				found = 1;
215 				break;
216 			}
217 		}
218 
219 		cmd = elf_next(_elf);
220 		(void) elf_end(_elf);
221 	}
222 
223 	return (found);
224 }
225 
226 /*
227  * Determine:
228  *	- ELFCLASS of resulting object (class)
229  *	- ELF machine type of resulting object (m_mach)
230  *
231  * In order of priority, we determine this information as follows:
232  *
233  * -	Command line options (-32, -64 -z target).
234  * -	From the first plain object seen on the command line. (This is
235  *	by far the most common case.)
236  * -	From the first object contained within the first archive
237  *	on the command line.
238  * -	If all else fails, we assume a 32-bit object for the native machine.
239  *
240  * entry:
241  *	argc, argv - Command line argument vector
242  *	class_ret - Address of variable to receive ELFCLASS of output object
243  */
244 static int
245 process_args(int argc, char **argv, uchar_t *class_ret, Half *mach)
246 {
247 	uchar_t	class = ELFCLASSNONE, ar_class;
248 	Half	mach32 = EM_NONE, mach64 = EM_NONE, ar_mach;
249 	int	c, ar_found = 0;
250 
251 	/*
252 	 * In general, libld.so is responsible for processing the
253 	 * command line options. The exception to this are those options
254 	 * that contain information about which linker to run and the
255 	 * class/machine of the output object. We examine the options
256 	 * here looking for the following:
257 	 *
258 	 *	-32	Produce an ELFCLASS32 object. This is the default, so
259 	 *		-32 is only needed when linking entirely from archives,
260 	 *		and the first archive contains a mix of 32 and 64-bit
261 	 *		objects, and the first object in that archive is 64-bit.
262 	 *		We do not expect this option to get much use, but it
263 	 *		ensures that the user can handle any situation.
264 	 *
265 	 *	-64	Produce an ELFCLASS64 object. (Note that this will
266 	 *		indirectly cause the use of the 64-bit linker if
267 	 *		the system is 64-bit capable). The most common need
268 	 *		for this option is when linking a filter object entirely
269 	 *		from a mapfile. The less common case is when linking
270 	 *		entirely from archives, and the first archive contains
271 	 *		a mix of 32 and 64-bit objects, and the first object
272 	 *		in that archive is 32-bit.
273 	 *
274 	 *	-z target=platform
275 	 *		Produce output object for the specified platform.
276 	 *		This option is needed when producing an object
277 	 *		for a non-native target entirely from a mapfile,
278 	 *		or when linking entirely from an archive containing
279 	 *		objects for multiple targets, and the first object
280 	 *		in the archive is not for the desired target.
281 	 *
282 	 * If we've already processed an object and we find -32/-64, and
283 	 * the object is of the wrong class, we have an error condition.
284 	 * We ignore it here, and let it fall through to libld, where the
285 	 * proper diagnosis and error message will occur.
286 	 */
287 	opterr = 0;
288 	optind = 1;
289 getmore:
290 	while ((c = ld_getopt(0, optind, argc, argv)) != -1) {
291 		switch (c) {
292 		case '3':
293 			if (strncmp(optarg, MSG_ORIG(MSG_ARG_TWO),
294 			    MSG_ARG_TWO_SIZE) == 0)
295 				class = ELFCLASS32;
296 			break;
297 
298 		case '6':
299 			if (strncmp(optarg, MSG_ORIG(MSG_ARG_FOUR),
300 			    MSG_ARG_FOUR_SIZE) == 0)
301 				class = ELFCLASS64;
302 			break;
303 
304 		case 'z':
305 			/* -z target=platform */
306 			if (strncmp(optarg, MSG_ORIG(MSG_ARG_TARGET),
307 			    MSG_ARG_TARGET_SIZE) == 0) {
308 				char *pstr = optarg + MSG_ARG_TARGET_SIZE;
309 
310 				if (strcasecmp(pstr,
311 				    MSG_ORIG(MSG_TARG_SPARC)) == 0) {
312 					mach32 = EM_SPARC;
313 					mach64 = EM_SPARCV9;
314 				} else if (strcasecmp(pstr,
315 				    MSG_ORIG(MSG_TARG_X86)) == 0) {
316 					mach32 = EM_386;
317 					mach64 = EM_AMD64;
318 				} else {
319 					eprintf(0, ERR_FATAL,
320 					    MSG_INTL(MSG_ERR_BADTARG), pstr);
321 					return (1);
322 				}
323 			}
324 			break;
325 		}
326 	}
327 
328 	/*
329 	 * Continue to look for the first ELF object to determine the class of
330 	 * objects to operate on. At the same time, look for the first archive
331 	 * of ELF objects --- if no plain ELF object is specified, the type
332 	 * of the first ELF object in the first archive will be used. If
333 	 * there is no object, and no archive, then we fall back to a 32-bit
334 	 * object for the native machine.
335 	 */
336 	for (; optind < argc; optind++) {
337 		int		fd;
338 		FILE_HDR	hdr;
339 
340 		/*
341 		 * If we detect some more options return to getopt().
342 		 * Checking argv[optind][1] against null prevents a forever
343 		 * loop if an unadorned `-' argument is passed to us.
344 		 */
345 		if (argv[optind][0] == '-') {
346 			if (argv[optind][1] == '\0')
347 				continue;
348 			else
349 				goto getmore;
350 		}
351 
352 		/*
353 		 * If we've already determined the object class and
354 		 * machine type, continue to the next argument. Only
355 		 * the first object contributes to this decision, and
356 		 * there's no value to opening or examing the subsequent
357 		 * ones. We do need to keep going though, because there
358 		 * may be additional options that might affect our
359 		 * class/machine decision.
360 		 */
361 		if ((class != ELFCLASSNONE) && (mach32 != EM_NONE))
362 			continue;
363 
364 		/*
365 		 * Open the file and determine if it is an object. We are
366 		 * looking for ELF objects, or archives of ELF objects.
367 		 *
368 		 * Plain objects are simple, and are the common case, so
369 		 * we examine them directly and avoid the map-unmap-map
370 		 * that would occur if we used libelf. Archives are too
371 		 * complex to be worth accessing directly, so if we identify
372 		 * an archive, we use libelf on it and accept the cost.
373 		 */
374 		if ((fd = open(argv[optind], O_RDONLY)) == -1) {
375 			int err = errno;
376 
377 			eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_OPEN),
378 			    argv[optind], strerror(err));
379 			return (1);
380 		}
381 
382 		if (pread(fd, &hdr, sizeof (hdr), 0) != sizeof (hdr)) {
383 			(void) close(fd);
384 			continue;
385 		}
386 
387 		if ((hdr.ehdr.e_ident[EI_MAG0] == ELFMAG0) &&
388 		    (hdr.ehdr.e_ident[EI_MAG1] == ELFMAG1) &&
389 		    (hdr.ehdr.e_ident[EI_MAG2] == ELFMAG2) &&
390 		    (hdr.ehdr.e_ident[EI_MAG3] == ELFMAG3)) {
391 			if (class == ELFCLASSNONE) {
392 				class = hdr.ehdr.e_ident[EI_CLASS];
393 				if ((class != ELFCLASS32) &&
394 				    (class != ELFCLASS64))
395 					class = ELFCLASSNONE;
396 			}
397 
398 			if (mach32 == EM_NONE) {
399 				int	one = 1;
400 				uchar_t	*one_p = (uchar_t *)&one;
401 				int	ld_elfdata;
402 
403 				ld_elfdata = (one_p[0] == 1) ?
404 				    ELFDATA2LSB : ELFDATA2MSB;
405 				/*
406 				 * Both the 32 and 64-bit versions get the
407 				 * type from the object. If the user has
408 				 * asked for an inconsistant class/machine
409 				 * combination, libld will catch it.
410 				 */
411 				mach32 = mach64 =
412 				    (ld_elfdata == hdr.ehdr.e_ident[EI_DATA]) ?
413 				    hdr.ehdr.e_machine :
414 				    BSWAP_HALF(hdr.ehdr.e_machine);
415 			}
416 		} else if (!ar_found &&
417 		    (memcmp(&hdr.armag, ARMAG, SARMAG) == 0)) {
418 			Elf	*elf;
419 
420 			(void) elf_version(EV_CURRENT);
421 			if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL) {
422 				(void) close(fd);
423 				continue;
424 			}
425 			if (elf_kind(elf) == ELF_K_AR)
426 				ar_found =
427 				    archive(fd, elf, &ar_class, &ar_mach);
428 			(void) elf_end(elf);
429 		}
430 
431 		(void) close(fd);
432 	}
433 
434 	/*
435 	 * ELFCLASS of output object: If we did not establish a class from a
436 	 * command option, or from the first plain object, then use the class
437 	 * from the first archive, and failing that, default to 32-bit.
438 	 */
439 	if (class == ELFCLASSNONE)
440 		class = ar_found ? ar_class : ELFCLASS32;
441 	*class_ret = class;
442 
443 	/*
444 	 * Machine type of output object: If we did not establish a machine
445 	 * type from the command line, or from the first plain object, then
446 	 * use the machine established by the first archive, and failing that,
447 	 * use the native machine.
448 	 */
449 	*mach = (class == ELFCLASS64) ? mach64 : mach32;
450 	if (*mach == EM_NONE)
451 		if (ar_found)
452 			*mach = ar_mach;
453 		else
454 			*mach = (class == ELFCLASS64) ? M_MACH_64 : M_MACH_32;
455 
456 	return (0);
457 }
458 
459 /*
460  * Process an LD_OPTIONS environment string.  This routine is first called to
461  * count the number of options, and second to initialize a new argument array
462  * with each option.
463  */
464 static int
465 process_ldoptions(char *str, char **nargv)
466 {
467 	int	argc = 0;
468 	char	*arg = str;
469 
470 	/*
471 	 * Walk the environment string processing any arguments that are
472 	 * separated by white space.
473 	 */
474 	while (*str != '\0') {
475 		if (isspace(*str)) {
476 			/*
477 			 * If a new argument array has been provided, terminate
478 			 * the original environment string, and initialize the
479 			 * appropriate argument array entry.
480 			 */
481 			if (nargv) {
482 				*str++ = '\0';
483 				nargv[argc] = arg;
484 			}
485 
486 			argc++;
487 			while (isspace(*str))
488 				str++;
489 			arg = str;
490 		} else
491 			str++;
492 	}
493 	if (arg != str) {
494 		/*
495 		 * If a new argument array has been provided, initialize the
496 		 * final argument array entry.
497 		 */
498 		if (nargv)
499 			nargv[argc] = arg;
500 		argc++;
501 	}
502 
503 	return (argc);
504 }
505 
506 /*
507  * Determine whether an LD_OPTIONS environment variable is set, and if so,
508  * prepend environment string as a series of options to the argv array.
509  */
510 static int
511 prepend_ldoptions(int *argcp, char ***argvp)
512 {
513 	int	nargc;
514 	char	**nargv, *ld_options;
515 	int	err, count;
516 
517 	if ((ld_options = getenv(MSG_ORIG(MSG_LD_OPTIONS))) == NULL)
518 		return (0);
519 
520 	/*
521 	 * Get rid of any leading white space, and make sure the environment
522 	 * string has size.
523 	 */
524 	while (isspace(*ld_options))
525 		ld_options++;
526 	if (ld_options[0] == '\0')
527 		return (0);
528 
529 	/*
530 	 * Prevent modification of actual environment strings.
531 	 */
532 	if ((ld_options = strdup(ld_options)) == NULL) {
533 		err = errno;
534 		eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_ALLOC), strerror(err));
535 		return (1);
536 	}
537 
538 	/*
539 	 * Determine the number of options provided.
540 	 */
541 	nargc = process_ldoptions(ld_options, NULL);
542 
543 	/*
544 	 * Allocate a new argv array big enough to hold the new options from
545 	 * the environment string and the old argv options.
546 	 */
547 	if ((nargv = malloc((nargc + *argcp + 1) * sizeof (char *))) == NULL) {
548 		err = errno;
549 		eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_ALLOC), strerror(err));
550 		return (1);
551 	}
552 
553 	/*
554 	 * Initialize first element of new argv array to be the first element
555 	 * of the old argv array (ie. calling programs name).  Then add the new
556 	 * args obtained from the environment.
557 	 */
558 	nargc = 0;
559 	nargv[nargc++] = (*argvp)[0];
560 	nargc += process_ldoptions(ld_options, &nargv[nargc]);
561 
562 	/*
563 	 * Now add the original argv array (skipping argv[0]) to the end of the
564 	 * new argv array, and re-vector argc and argv to reference this new
565 	 * array
566 	 */
567 	for (count = 1; count < *argcp; count++, nargc++)
568 		nargv[nargc] = (*argvp)[count];
569 
570 	nargv[nargc] = NULL;
571 
572 	*argcp = nargc;
573 	*argvp = nargv;
574 
575 	return (0);
576 }
577 
578 /*
579  * Check to see if there is a LD_ALTEXEC=<path to alternate ld> in the
580  * environment.  If so, first null the environment variable out, and then
581  * exec() the binary pointed to by the environment variable, passing the same
582  * arguments as the originating process.  This mechanism permits using
583  * alternate link-editors (debugging/developer copies) even in complex build
584  * environments.
585  */
586 static int
587 ld_altexec(char **argv, char **envp)
588 {
589 	char	*execstr;
590 	char	**str;
591 	int	err;
592 
593 	for (str = envp; *str; str++) {
594 		if (strncmp(*str, MSG_ORIG(MSG_LD_ALTEXEC),
595 		    MSG_LD_ALTEXEC_SIZE) == 0) {
596 			break;
597 		}
598 	}
599 
600 	/*
601 	 * If LD_ALTEXEC isn't set, return to continue executing the present
602 	 * link-editor.
603 	 */
604 	if (*str == 0)
605 		return (0);
606 
607 	/*
608 	 * Get a pointer to the actual string.  If it's a null entry, return.
609 	 */
610 	execstr = strdup(*str + MSG_LD_ALTEXEC_SIZE);
611 	if (*execstr == '\0')
612 		return (0);
613 
614 	/*
615 	 * Null out the LD_ALTEXEC= environment entry.
616 	 */
617 	(*str)[MSG_LD_ALTEXEC_SIZE] = '\0';
618 
619 	/*
620 	 * Set argv[0] to point to our new linker
621 	 */
622 	argv[0] = execstr;
623 
624 	/*
625 	 * And attempt to execute it.
626 	 */
627 	(void) execve(execstr, argv, envp);
628 
629 	/*
630 	 * If the exec() fails, return a failure indication.
631 	 */
632 	err = errno;
633 	eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_EXEC), execstr,
634 	    strerror(err));
635 	return (1);
636 }
637 
638 int
639 main(int argc, char **argv, char **envp)
640 {
641 	uchar_t		class;
642 	Half		mach;
643 
644 	/*
645 	 * Establish locale.
646 	 */
647 	(void) setlocale(LC_MESSAGES, MSG_ORIG(MSG_STR_EMPTY));
648 	(void) textdomain(MSG_ORIG(MSG_SUNW_OST_SGS));
649 
650 	/*
651 	 * Execute an alternate linker if the LD_ALTEXEC environment variable is
652 	 * set.  If a specified alternative could not be found, bail.
653 	 */
654 	if (ld_altexec(argv, envp))
655 		return (1);
656 
657 	/*
658 	 * Check the LD_OPTIONS environment variable, and if present prepend
659 	 * the arguments specified to the command line argument list.
660 	 */
661 	if (prepend_ldoptions(&argc, &argv))
662 		return (1);
663 
664 	/*
665 	 * Examine the command arguments to determine:
666 	 *	- object class
667 	 *	- link-editor class
668 	 *	- target machine
669 	 */
670 	if (process_args(argc, argv, &class, &mach))
671 		return (1);
672 
673 	/* Call the libld entry point for the specified ELFCLASS */
674 	if (class == ELFCLASS64)
675 		return (ld64_main(argc, argv, mach));
676 	else
677 		return (ld32_main(argc, argv, mach));
678 }
679 
680 /*
681  * We supply this function for the msg module
682  */
683 const char *
684 _ld_msg(Msg mid)
685 {
686 	return (gettext(MSG_ORIG(mid)));
687 }
688