xref: /illumos-gate/usr/src/cmd/sgs/ld/common/ld.c (revision e7cbe64f7a72dae5cb44f100db60ca88f3313c65)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 
29 #include	<stdio.h>
30 #include	<stdlib.h>
31 #include	<unistd.h>
32 #include	<stdarg.h>
33 #include	<string.h>
34 #include	<strings.h>
35 #include	<errno.h>
36 #include	<fcntl.h>
37 #include	<libintl.h>
38 #include	<locale.h>
39 #include	<fcntl.h>
40 #include	"conv.h"
41 #include	"libld.h"
42 #include	"machdep.h"
43 #include	"msg.h"
44 
45 /*
46  * The following prevent us from having to include ctype.h which defines these
47  * functions as macros which reference the __ctype[] array.  Go through .plt's
48  * to get to these functions in libc rather than have every invocation of ld
49  * have to suffer the R_SPARC_COPY overhead of the __ctype[] array.
50  */
51 extern int	isspace(int);
52 
53 /*
54  * Print a message to stdout
55  */
56 /* VARARGS3 */
57 void
58 eprintf(Lm_list *lml, Error error, const char *format, ...)
59 {
60 	va_list			args;
61 	static const char	*strings[ERR_NUM] = { MSG_ORIG(MSG_STR_EMPTY) };
62 
63 #if	defined(lint)
64 	/*
65 	 * The lml argument is only meaningful for diagnostics sent to ld.so.1.
66 	 * Supress the lint error by making a dummy assignment.
67 	 */
68 	lml = 0;
69 #endif
70 	if (error > ERR_NONE) {
71 		if (error == ERR_WARNING) {
72 			if (strings[ERR_WARNING] == 0)
73 				strings[ERR_WARNING] =
74 				    MSG_INTL(MSG_ERR_WARNING);
75 		} else if (error == ERR_FATAL) {
76 			if (strings[ERR_FATAL] == 0)
77 				strings[ERR_FATAL] = MSG_INTL(MSG_ERR_FATAL);
78 		} else if (error == ERR_ELF) {
79 			if (strings[ERR_ELF] == 0)
80 				strings[ERR_ELF] = MSG_INTL(MSG_ERR_ELF);
81 		}
82 		(void) fputs(MSG_ORIG(MSG_STR_LDDIAG), stderr);
83 	}
84 	(void) fputs(strings[error], stderr);
85 
86 	va_start(args, format);
87 	(void) vfprintf(stderr, format, args);
88 	if (error == ERR_ELF) {
89 		int	elferr;
90 
91 		if ((elferr = elf_errno()) != 0)
92 			(void) fprintf(stderr, MSG_ORIG(MSG_STR_ELFDIAG),
93 			    elf_errmsg(elferr));
94 	}
95 	(void) fprintf(stderr, MSG_ORIG(MSG_STR_NL));
96 	(void) fflush(stderr);
97 	va_end(args);
98 }
99 
100 
101 /*
102  * Determine:
103  *	- ELFCLASS of resulting object (aoutclass)
104  *	- Whether we need the 32 or 64-bit libld (ldclass)
105  *	- ELF machine type of resulting object (m_mach)
106  */
107 static int
108 process_args(int argc, char **argv, uchar_t *aoutclass, uchar_t *ldclass,
109     Half *mach)
110 {
111 #if	defined(_LP64)
112 	uchar_t lclass = ELFCLASS64;
113 #else
114 	uchar_t	lclass = ELFCLASSNONE;
115 #endif
116 	uchar_t	aclass = ELFCLASSNONE;
117 	Half	mach32 = EM_NONE, mach64 = EM_NONE;
118 	int	c;
119 
120 getmore:
121 	/*
122 	 * In general, libld.so is responsible for processing the
123 	 * command line options. The exception to this are those options
124 	 * that contain information about which linker to run and the
125 	 * class/machine of the output object. We examine the options
126 	 * here looking for the following:
127 	 *
128 	 *	-64
129 	 *		Produce an ELFCLASS64 object. Use the 64-bit linker.
130 	 *
131 	 *	-zaltexec64
132 	 *		Use the 64-bit linker regardless of the class
133 	 *		of the output object.
134 	 *
135 	 *	-z target=platform
136 	 *		Produce output object for the specified platform.
137 	 *
138 	 * The -64 and -ztarget options are used when the only input to
139 	 * ld() is a mapfile or archive, and a 64-bit or non-native output
140 	 * object is required.
141 	 *
142 	 * If we've already processed a 32-bit object and we find -64, we have
143 	 * an error condition, but let this fall through to libld to obtain the
144 	 * default error message.
145 	 */
146 	opterr = 0;
147 	while ((c = getopt(argc, argv, MSG_ORIG(MSG_STR_OPTIONS))) != -1) {
148 		switch (c) {
149 			case '6':
150 				if (strncmp(optarg, MSG_ORIG(MSG_ARG_FOUR),
151 				    MSG_ARG_FOUR_SIZE) == 0)
152 					aclass = ELFCLASS64;
153 				break;
154 
155 			case 'z':
156 #if	!defined(_LP64)
157 				/* -z altexec64 */
158 				if (strncmp(optarg, MSG_ORIG(MSG_ARG_ALTEXEC64),
159 				    MSG_ARG_ALTEXEC64_SIZE) == 0) {
160 					lclass = ELFCLASS64;
161 					break;
162 				}
163 #endif
164 
165 				/* -z target=platform */
166 				if (strncmp(optarg, MSG_ORIG(MSG_ARG_TARGET),
167 				    MSG_ARG_TARGET_SIZE) == 0) {
168 					char *pstr =
169 					    optarg + MSG_ARG_TARGET_SIZE;
170 
171 					if (strcasecmp(pstr,
172 					    MSG_ORIG(MSG_TARG_SPARC)) == 0) {
173 						mach32 = EM_SPARC;
174 						mach64 = EM_SPARCV9;
175 					} else if (strcasecmp(pstr,
176 					    MSG_ORIG(MSG_TARG_X86)) == 0) {
177 						mach32 = EM_386;
178 						mach64 = EM_AMD64;
179 					} else {
180 						eprintf(0, ERR_FATAL,
181 						    MSG_INTL(MSG_ERR_BADTARG),
182 						    pstr);
183 						return (1);
184 					}
185 				}
186 				break;
187 
188 			default:
189 				break;
190 		}
191 	}
192 
193 	/*
194 	 * Continue to look for the first ELF object to determine the class of
195 	 * objects to operate on.
196 	 */
197 	for (; optind < argc; optind++) {
198 		int		fd;
199 		Elf32_Ehdr	ehdr32;
200 
201 		/*
202 		 * If we detect some more options return to getopt().
203 		 * Checking argv[optind][1] against null prevents a forever
204 		 * loop if an unadorned `-' argument is passed to us.
205 		 */
206 		if (argv[optind][0] == '-') {
207 			if (argv[optind][1] == '\0')
208 				continue;
209 			else
210 				goto getmore;
211 		}
212 
213 		/*
214 		 * If we've already determined the object class and
215 		 * machine type, continue to the next argument. Only
216 		 * the first object contributes to this decision, and
217 		 * there's no value to opening or examing the subsequent
218 		 * ones. We do need to keep going though, because there
219 		 * may be additional options that might affect our
220 		 * class/machine decision.
221 		 */
222 		if ((aclass != ELFCLASSNONE) && (mach32 != EM_NONE))
223 			continue;
224 
225 		/*
226 		 * Open the file and determine the files ELF class.
227 		 */
228 		if ((fd = open(argv[optind], O_RDONLY)) == -1) {
229 			int err = errno;
230 
231 			eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_OPEN),
232 			    argv[optind], strerror(err));
233 			return (1);
234 		}
235 
236 		/*
237 		 * Note that we read an entire 32-bit ELF header struct
238 		 * here, even though we have yet to determine that the
239 		 * file is an ELF object or that it is ELFCLASS32. We
240 		 * do this because:
241 		 *	- Any valid ELF object of any class must
242 		 *		have at least this number of bytes in it,
243 		 *		since an ELF header is manditory, and since
244 		 *		a 32-bit header is smaller than a 64-bit one.
245 		 *	- The 32 and 64-bit ELF headers are identical
246 		 *		up through the e_version field, so we can
247 		 *		obtain the e_machine value of a 64-bit
248 		 *		object via the e_machine value we read into
249 		 *		the 32-bit version. This cannot change, because
250 		 *		the layout of an ELF header is fixed by the ABI.
251 		 *
252 		 * Note however that we do have to worry about the byte
253 		 * order difference between the object and the system
254 		 * running this program when we read the e_machine value,
255 		 * since it is a multi-byte value;
256 		 */
257 		if ((read(fd, &ehdr32, sizeof (ehdr32)) == sizeof (ehdr32)) &&
258 		    (ehdr32.e_ident[EI_MAG0] == ELFMAG0) &&
259 		    (ehdr32.e_ident[EI_MAG1] == ELFMAG1) &&
260 		    (ehdr32.e_ident[EI_MAG2] == ELFMAG2) &&
261 		    (ehdr32.e_ident[EI_MAG3] == ELFMAG3)) {
262 			if (aclass == ELFCLASSNONE) {
263 				aclass = ehdr32.e_ident[EI_CLASS];
264 				if ((aclass != ELFCLASS32) &&
265 				    (aclass != ELFCLASS64))
266 					aclass = ELFCLASSNONE;
267 			}
268 
269 			if (mach32 == EM_NONE) {
270 				int	one = 1;
271 				uchar_t	*one_p = (uchar_t *)&one;
272 				int	ld_elfdata;
273 
274 				ld_elfdata = (one_p[0] == 1) ?
275 				    ELFDATA2LSB : ELFDATA2MSB;
276 				/*
277 				 * Both the 32 and 64-bit versions get the
278 				 * type from the object. If the user has
279 				 * asked for an inconsistant class/machine
280 				 * combination, libld will catch it.
281 				 */
282 				mach32 = mach64 =
283 				    (ld_elfdata == ehdr32.e_ident[EI_DATA]) ?
284 				    ehdr32.e_machine :
285 				    BSWAP_HALF(ehdr32.e_machine);
286 			}
287 		}
288 
289 		(void) close(fd);
290 	}
291 
292 	/*
293 	 * If we couldn't establish a class, default to 32-bit.
294 	 */
295 	if (aclass == ELFCLASSNONE)
296 		aclass = ELFCLASS32;
297 	*aoutclass = aclass;
298 
299 	if (lclass == ELFCLASSNONE)
300 		lclass = ELFCLASS32;
301 	*ldclass = lclass;
302 
303 	/*
304 	 * Use the machine type that goes with the class we've determined.
305 	 * If we didn't find a usable machine type, use the native
306 	 * machine.
307 	 */
308 	*mach = (aclass == ELFCLASS64) ? mach64 : mach32;
309 	if (*mach == EM_NONE)
310 		*mach = (aclass == ELFCLASS64) ? M_MACH_64 : M_MACH_32;
311 
312 	return (0);
313 }
314 
315 /*
316  * Prepend environment string as a series of options to the argv array.
317  */
318 static int
319 prepend_ldoptions(char *ld_options, int *argcp, char ***argvp)
320 {
321 	int	nargc;			/* new argc */
322 	char	**nargv;		/* new argv */
323 	char	*arg, *string;
324 	int	count;
325 
326 	/*
327 	 * Get rid of leading white space, and make sure the string has size.
328 	 */
329 	while (isspace(*ld_options))
330 		ld_options++;
331 	if (*ld_options == '\0')
332 		return (1);
333 
334 	nargc = 0;
335 	arg = string = ld_options;
336 
337 	/*
338 	 * Walk the environment string counting any arguments that are
339 	 * separated by white space.
340 	 */
341 	while (*string != '\0') {
342 		if (isspace(*string)) {
343 			nargc++;
344 			while (isspace(*string))
345 				string++;
346 			arg = string;
347 		} else
348 			string++;
349 	}
350 	if (arg != string)
351 		nargc++;
352 
353 	/*
354 	 * Allocate a new argv array big enough to hold the new options from
355 	 * the environment string and the old argv options.
356 	 */
357 	if ((nargv = calloc(nargc + *argcp, sizeof (char *))) == 0) {
358 		int	err = errno;
359 		eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_ALLOC), strerror(err));
360 		return (0);
361 	}
362 
363 	/*
364 	 * Initialize first element of new argv array to be the first element
365 	 * of the old argv array (ie. calling programs name).  Then add the new
366 	 * args obtained from the environment.
367 	 */
368 	nargv[0] = (*argvp)[0];
369 	nargc = 0;
370 	arg = string = ld_options;
371 	while (*string != '\0') {
372 		if (isspace(*string)) {
373 			nargc++;
374 			*string++ = '\0';
375 			nargv[nargc] = arg;
376 			while (isspace(*string))
377 				string++;
378 			arg = string;
379 		} else
380 			string++;
381 	}
382 	if (arg != string) {
383 		nargc++;
384 		nargv[nargc] = arg;
385 	}
386 
387 	/*
388 	 * Now add the original argv array (skipping argv[0]) to the end of the
389 	 * new argv array, and overwrite the old argc and argv.
390 	 */
391 	for (count = 1; count < *argcp; count++) {
392 		nargc++;
393 		nargv[nargc] = (*argvp)[count];
394 	}
395 	*argcp = ++nargc;
396 	*argvp = nargv;
397 
398 	return (1);
399 }
400 
401 /*
402  * Check to see if there is a LD_ALTEXEC=<path to alternate ld> in the
403  * environment.  If so, first null the environment variable out, and then
404  * exec() the binary pointed to by the environment variable, passing the same
405  * arguments as the originating process.  This mechanism permits using
406  * alternate link-editors (debugging/developer copies) even in complex build
407  * environments.
408  */
409 static int
410 ld_altexec(char **argv, char **envp)
411 {
412 	char	*execstr;
413 	char	**str;
414 	int	err;
415 
416 	for (str = envp; *str; str++) {
417 		if (strncmp(*str, MSG_ORIG(MSG_LD_ALTEXEC),
418 		    MSG_LD_ALTEXEC_SIZE) == 0) {
419 			break;
420 		}
421 	}
422 
423 	/*
424 	 * If LD_ALTEXEC isn't set, return to continue executing the present
425 	 * link-editor.
426 	 */
427 	if (*str == 0)
428 		return (0);
429 
430 	/*
431 	 * Get a pointer to the actual string.  If it's a null entry, return.
432 	 */
433 	execstr = strdup(*str + MSG_LD_ALTEXEC_SIZE);
434 	if (*execstr == '\0')
435 		return (0);
436 
437 	/*
438 	 * Null out the LD_ALTEXEC= environment entry.
439 	 */
440 	(*str)[MSG_LD_ALTEXEC_SIZE] = '\0';
441 
442 	/*
443 	 * Set argv[0] to point to our new linker
444 	 */
445 	argv[0] = execstr;
446 
447 	/*
448 	 * And attempt to execute it.
449 	 */
450 	(void) execve(execstr, argv, envp);
451 
452 	/*
453 	 * If the exec() fails, return a failure indication.
454 	 */
455 	err = errno;
456 	eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_EXEC), execstr,
457 	    strerror(err));
458 	return (1);
459 }
460 
461 int
462 main(int argc, char **argv, char **envp)
463 {
464 	char		*ld_options, **oargv = argv;
465 	uchar_t 	aoutclass, ldclass, checkclass;
466 	Half		mach;
467 
468 	/*
469 	 * XX64 -- Strip "-Wl," from the head of each argument.  This is to
470 	 * accommodate awkwardness in passing ld arguments to gcc while
471 	 * maintaining the structure of the OSNet build environment's Makefiles.
472 	 */
473 	{
474 		int i;
475 		char *p;
476 
477 		for (i = 0; i < argc; i++) {
478 			p = argv[i];
479 			while (*(p + 1) == 'W' && strncmp(p, "-Wl,-", 5) == 0)
480 				argv[i] = (p += 4);
481 		}
482 	}
483 
484 	/*
485 	 * Establish locale.
486 	 */
487 	(void) setlocale(LC_MESSAGES, MSG_ORIG(MSG_STR_EMPTY));
488 	(void) textdomain(MSG_ORIG(MSG_SUNW_OST_SGS));
489 
490 	/*
491 	 * Execute an alternate linker if the LD_ALTEXEC environment variable is
492 	 * set.  If a specified alternative could not be found, bail.
493 	 */
494 	if (ld_altexec(argv, envp))
495 		return (1);
496 
497 	/*
498 	 * Check the LD_OPTIONS environment variable, and if present prepend
499 	 * the arguments specified to the command line argument list.
500 	 */
501 	if ((ld_options = getenv(MSG_ORIG(MSG_LD_OPTIONS))) != NULL) {
502 		/*
503 		 * Prevent modification of actual environment strings.
504 		 */
505 		if (((ld_options = strdup(ld_options)) == NULL) ||
506 		    (prepend_ldoptions(ld_options, &argc, &argv) == 0))
507 			return (1);
508 	}
509 
510 	/*
511 	 * Examine the command arguments to determine:
512 	 *	- object class
513 	 *	- link-editor class
514 	 *	- target machine
515 	 */
516 	if (process_args(argc, argv, &aoutclass, &ldclass, &mach))
517 		return (1);
518 
519 	/*
520 	 * If we're processing 64-bit objects, or the user specifically asked
521 	 * for a 64-bit link-editor, determine if a 64-bit ld() can be executed.
522 	 * Bail if a 64-bit ld() was explicitly asked for, but one could not be
523 	 * found.
524 	 */
525 	if ((aoutclass == ELFCLASS64) || (ldclass == ELFCLASS64))
526 		checkclass = conv_check_native(oargv, envp);
527 
528 	if ((ldclass == ELFCLASS64) && (checkclass != ELFCLASS64)) {
529 		eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_64));
530 		return (1);
531 	}
532 
533 	/*
534 	 * Reset the getopt(3c) error message flag, and call the generic entry
535 	 * point using the appropriate class.
536 	 */
537 	optind = opterr = 1;
538 	if (aoutclass == ELFCLASS64)
539 		return (ld64_main(argc, argv, mach));
540 	else
541 		return (ld32_main(argc, argv, mach));
542 }
543 
544 /*
545  * Exported interfaces required by our dependencies.  libld and friends bind to
546  * the different implementations of these provided by either ld or ld.so.1.
547  */
548 const char *
549 _ld_msg(Msg mid)
550 {
551 	return (gettext(MSG_ORIG(mid)));
552 }
553