1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2023 Oxide Computer Company 25 */ 26 27 #include <ctype.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <unistd.h> 31 #include <stdarg.h> 32 #include <stdbool.h> 33 #include <string.h> 34 #include <strings.h> 35 #include <errno.h> 36 #include <fcntl.h> 37 #include <libintl.h> 38 #include <locale.h> 39 #include <fcntl.h> 40 #include <ar.h> 41 #include <gelf.h> 42 #include "conv.h" 43 #include "libld.h" 44 #include "machdep.h" 45 #include "msg.h" 46 47 typedef int (*ld_main_f)(int, char *[], Half); 48 49 static const char *errstr[ERR_NUM]; 50 51 static void 52 init_strings(void) 53 { 54 (void) setlocale(LC_MESSAGES, MSG_ORIG(MSG_STR_EMPTY)); 55 (void) textdomain(MSG_ORIG(MSG_SUNW_OST_SGS)); 56 57 /* 58 * For error types we issue a prefix for, make sure the necessary 59 * string has been internationalized and is ready. 60 */ 61 errstr[ERR_WARNING_NF] = MSG_INTL(MSG_ERR_WARNING); 62 errstr[ERR_WARNING] = MSG_INTL(MSG_ERR_WARNING); 63 errstr[ERR_GUIDANCE] = MSG_INTL(MSG_ERR_GUIDANCE); 64 errstr[ERR_FATAL] = MSG_INTL(MSG_ERR_FATAL); 65 errstr[ERR_ELF] = MSG_INTL(MSG_ERR_ELF); 66 } 67 68 /* 69 * Returns a duplicate of the given environment variable, with 70 * leading whitespace stripped off. Returns NULL if the variable 71 * is not in the environment, or if it is empty. Allocation 72 * failure terminates the program. 73 */ 74 static char * 75 getenv_nonempty(const char *name) 76 { 77 char *var; 78 79 var = getenv(name); 80 if (var == NULL) 81 return (NULL); 82 while (isspace(*var)) 83 var++; 84 if (*var == '\0') 85 return (NULL); 86 var = strdup(var); 87 if (var == NULL) { 88 eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_ALLOC), strerror(errno)); 89 exit(EXIT_FAILURE); 90 } 91 92 return (var); 93 } 94 95 /* 96 * Like strsep(3), but using `isspace` instead of 97 * a separator string. 98 */ 99 static char * 100 strsep_ws(char **strp) 101 { 102 char *str, *s; 103 104 str = *strp; 105 if (*str == '\0') 106 return (NULL); 107 s = str; 108 while (*s != '\0' && !isspace(*s)) 109 s++; 110 if (*s != '\0') 111 *s++ = '\0'; 112 *strp = s; 113 114 return (str); 115 } 116 117 /* 118 * We examine ELF objects, and archives containing ELF objects, in order 119 * to determine the ELFCLASS of the resulting object and/or the linker to be 120 * used. We want to avoid the overhead of libelf for this, at least until 121 * we are certain that we need it, so we start by reading bytes from 122 * the beginning of the file. This type defines the buffer used to read 123 * these initial bytes. 124 * 125 * A plain ELF object will start with an ELF header, whereas an archive 126 * starts with a magic string (ARMAG) that is SARMAG bytes long. Any valid 127 * ELF file or archive will contain more bytes than this buffer, so any 128 * file shorter than this can be safely assummed not to be of interest. 129 * 130 * The ELF header for ELFCLASS32 and ELFCLASS64 are identical up through the 131 * the e_version field, and all the information we require is found in this 132 * common prefix. Furthermore, this cannot change, as the layout of an ELF 133 * header is fixed by the ELF ABI. Hence, the ehdr part of this union is 134 * not a full ELF header, but only the class-independent prefix that we need. 135 * 136 * As this is a raw (non-libelf) read, we are responsible for handling any 137 * byte order difference between the object and the system running this 138 * program when we read any datum larger than a byte (i.e. e_machine) from 139 * this header. 140 */ 141 typedef union { 142 struct { /* Must match start of ELFxx_Ehdr in <sys/elf.h> */ 143 uchar_t e_ident[EI_NIDENT]; /* ident bytes */ 144 Half e_type; /* file type */ 145 Half e_machine; /* target machine */ 146 } ehdr; 147 char armag[SARMAG]; 148 } FILE_HDR; 149 150 /* 151 * Print a message to stdout 152 * The lml argument is only meaningful for diagnostics sent to ld.so.1, 153 * and is ignored here. 154 */ 155 void 156 veprintf(Lm_list *lml __unused, Error error, const char *format, va_list args) 157 { 158 const char *err; 159 160 /* If strings[] element for our error type is non-NULL, issue prefix */ 161 err = errstr[error]; 162 if (err != NULL) 163 (void) fprintf(stderr, "%s%s", MSG_ORIG(MSG_STR_LDDIAG), err); 164 (void) vfprintf(stderr, format, args); 165 166 if (error == ERR_ELF) { 167 int elferr; 168 169 elferr = elf_errno(); 170 if (elferr != 0) { 171 err = elf_errmsg(elferr); 172 (void) fprintf(stderr, MSG_ORIG(MSG_STR_ELFDIAG), err); 173 } 174 } 175 (void) fprintf(stderr, MSG_ORIG(MSG_STR_NL)); 176 (void) fflush(stderr); 177 } 178 179 /* 180 * Print a message to stderr 181 */ 182 /* VARARGS3 */ 183 void 184 eprintf(Lm_list *lml, Error error, const char *format, ...) 185 { 186 va_list args; 187 188 va_start(args, format); 189 veprintf(lml, error, format, args); 190 va_end(args); 191 } 192 193 194 /* 195 * Examine the first object in an archive to determine its ELFCLASS 196 * and machine type. 197 * 198 * entry: 199 * fd - Open file descriptor for file 200 * elf - libelf ELF descriptor 201 * class_ret, mach_ret - Address of variables to receive ELFCLASS 202 * and machine type. 203 * 204 * exit: 205 * On success, *class_ret and *mach_ret are filled in, and True (1) 206 * is returned. On failure, False (0) is returned. 207 */ 208 static bool 209 archive(int fd, Elf *elf, uchar_t *class_ret, Half *mach_ret) 210 { 211 Elf_Cmd cmd; 212 Elf *nelf; 213 214 /* 215 * Process each item within the archive until we find the first 216 * ELF object, or alternatively another archive to recurse into. 217 * Stop after analyzing the first plain object found. 218 */ 219 for (cmd = ELF_C_READ, nelf = NULL; 220 (nelf = elf_begin(fd, cmd, elf)) != NULL; 221 cmd = elf_next(nelf), (void) elf_end(nelf)) { 222 Elf_Arhdr *arhdr = elf_getarhdr(nelf); 223 224 if (arhdr == NULL) 225 return (false); 226 if (*arhdr->ar_name == '/') 227 continue; 228 switch (elf_kind(nelf)) { 229 case ELF_K_AR: 230 if (archive(fd, nelf, class_ret, mach_ret)) 231 return (true); 232 break; 233 case ELF_K_ELF: 234 if (gelf_getclass(nelf) == ELFCLASS64) { 235 Elf64_Ehdr *ehdr = elf64_getehdr(nelf); 236 237 if (ehdr == NULL) 238 continue; 239 *class_ret = ehdr->e_ident[EI_CLASS]; 240 *mach_ret = ehdr->e_machine; 241 } else { 242 Elf32_Ehdr *ehdr = elf32_getehdr(nelf); 243 244 if (ehdr == NULL) 245 continue; 246 *class_ret = ehdr->e_ident[EI_CLASS]; 247 *mach_ret = ehdr->e_machine; 248 } 249 return (true); 250 } 251 } 252 253 return (false); 254 } 255 256 /* 257 * Determine: 258 * - ELFCLASS of resulting object (class) 259 * - ELF machine type of resulting object (m_mach) 260 * 261 * In order of priority, we determine this information as follows: 262 * 263 * - Command line options (-32, -64 -z target). 264 * - From the first plain object seen on the command line. (This is 265 * by far the most common case.) 266 * - From the first object contained within the first archive 267 * on the command line. 268 * - If all else fails, we assume a 32-bit object for the native machine. 269 * 270 * entry: 271 * argc, argv - Command line argument vector 272 * class_ret - Address of variable to receive ELFCLASS of output object 273 */ 274 static ld_main_f 275 process_args(int argc, char *argv[], uchar_t *class_ret, Half *mach) 276 { 277 Half mach32 = EM_NONE; 278 Half mach64 = EM_NONE; 279 bool ar_found = false; 280 uint8_t class = ELFCLASSNONE; 281 const char *targ_sparc = MSG_ORIG(MSG_TARG_SPARC); 282 const char *targ_x86 = MSG_ORIG(MSG_TARG_X86); 283 uint8_t ar_class; 284 Half ar_mach; 285 char *pstr; 286 const char *err; 287 int c; 288 289 /* 290 * In general, libld.so is responsible for processing the 291 * command line options. The exception to this are those options 292 * that contain information about which linker to run and the 293 * class/machine of the output object. We examine the options 294 * here looking for the following: 295 * 296 * -32 Produce an ELFCLASS32 object. This is the default, so 297 * -32 is only needed when linking entirely from archives, 298 * and the first archive contains a mix of 32 and 64-bit 299 * objects, and the first object in that archive is 64-bit. 300 * We do not expect this option to get much use, but it 301 * ensures that the user can handle any situation. 302 * 303 * -64 Produce an ELFCLASS64 object. (Note that this will 304 * indirectly cause the use of the 64-bit linker if 305 * the system is 64-bit capable). The most common need 306 * for this option is when linking a filter object entirely 307 * from a mapfile. The less common case is when linking 308 * entirely from archives, and the first archive contains 309 * a mix of 32 and 64-bit objects, and the first object 310 * in that archive is 32-bit. 311 * 312 * -z target=platform 313 * Produce output object for the specified platform. 314 * This option is needed when producing an object 315 * for a non-native target entirely from a mapfile, 316 * or when linking entirely from an archive containing 317 * objects for multiple targets, and the first object 318 * in the archive is not for the desired target. 319 * 320 * If we've already processed an object and we find -32/-64, and 321 * the object is of the wrong class, we have an error condition. 322 * We ignore it here, and let it fall through to libld, where the 323 * proper diagnosis and error message will occur. 324 * 325 * Note that these options can all be given more than once, even if 326 * doing so would be ambiguous: this is for backwards compatibility 327 * with Makefiles and shell scripts and so on that are themselves 328 * ambiguous. 329 */ 330 opterr = 0; 331 optind = 1; 332 333 getmore: 334 while ((c = ld_getopt(0, optind, argc, argv)) != -1) { 335 switch (c) { 336 case '3': 337 /* 338 * MSG_ORIG(MSG_ARG_TWO) is just the non-localized 339 * string literal "2", but...ok. 340 */ 341 if (strcmp(optarg, MSG_ORIG(MSG_ARG_TWO)) != 0) { 342 err = MSG_INTL(MSG_ERR_BADARG); 343 eprintf(0, ERR_FATAL, err, '3', optarg); 344 exit(EXIT_FAILURE); 345 } 346 class = ELFCLASS32; 347 break; 348 case '6': 349 if (strcmp(optarg, MSG_ORIG(MSG_ARG_FOUR)) != 0) { 350 err = MSG_INTL(MSG_ERR_BADARG); 351 eprintf(0, ERR_FATAL, err, '6', optarg); 352 exit(EXIT_FAILURE); 353 } 354 class = ELFCLASS64; 355 break; 356 case 'z': 357 /* -z target=platform; silently skip everything else */ 358 if (strncmp(optarg, MSG_ORIG(MSG_ARG_TARGET), 359 MSG_ARG_TARGET_SIZE) != 0) { 360 continue; 361 } 362 pstr = optarg + MSG_ARG_TARGET_SIZE; 363 if (strcasecmp(pstr, targ_sparc) == 0) { 364 mach32 = EM_SPARC; 365 mach64 = EM_SPARCV9; 366 } else if (strcasecmp(pstr, targ_x86) == 0) { 367 mach32 = EM_386; 368 mach64 = EM_AMD64; 369 } else { 370 err = MSG_INTL(MSG_ERR_BADTARG); 371 eprintf(0, ERR_FATAL, err, pstr); 372 exit(EXIT_FAILURE); 373 } 374 break; 375 } 376 } 377 378 /* 379 * Continue to look for the first ELF object to determine the class of 380 * objects to operate on. At the same time, look for the first archive 381 * of ELF objects --- if no plain ELF object is specified, the type 382 * of the first ELF object in the first archive will be used. If 383 * there is no object, and no archive, then we fall back to a 32-bit 384 * object for the native machine. 385 */ 386 for (; optind < argc; optind++) { 387 int fd; 388 FILE_HDR hdr; 389 390 /* 391 * If we detect some more options return to getopt(). 392 * Checking argv[optind][1] against null prevents a forever 393 * loop if an unadorned `-' argument is passed to us. 394 */ 395 if (argv[optind][0] == '-') { 396 if (argv[optind][1] != '\0') 397 goto getmore; 398 continue; 399 } 400 401 /* 402 * If we've already determined the object class and 403 * machine type, continue to the next argument. Only 404 * the first object contributes to this decision, and 405 * there's no value to opening or examing the subsequent 406 * ones. We do need to keep going though, because there 407 * may be additional options that might affect our 408 * class/machine decision. 409 */ 410 if (class != ELFCLASSNONE && mach32 != EM_NONE) 411 continue; 412 413 /* 414 * Open the file and determine if it is an object. We are 415 * looking for ELF objects, or archives of ELF objects. 416 * 417 * Plain objects are simple, and are the common case, so 418 * we examine them directly and avoid the map-unmap-map 419 * that would occur if we used libelf. Archives are too 420 * complex to be worth accessing directly, so if we identify 421 * an archive, we use libelf on it and accept the cost. 422 */ 423 if ((fd = open(argv[optind], O_RDONLY)) == -1) { 424 int err = errno; 425 426 eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_OPEN), 427 argv[optind], strerror(err)); 428 exit(EXIT_FAILURE); 429 } 430 431 if (pread(fd, &hdr, sizeof (hdr), 0) != sizeof (hdr)) { 432 (void) close(fd); 433 continue; 434 } 435 436 if ((hdr.ehdr.e_ident[EI_MAG0] == ELFMAG0) && 437 (hdr.ehdr.e_ident[EI_MAG1] == ELFMAG1) && 438 (hdr.ehdr.e_ident[EI_MAG2] == ELFMAG2) && 439 (hdr.ehdr.e_ident[EI_MAG3] == ELFMAG3)) { 440 if (class == ELFCLASSNONE) { 441 class = hdr.ehdr.e_ident[EI_CLASS]; 442 if ((class != ELFCLASS32) && 443 (class != ELFCLASS64)) 444 class = ELFCLASSNONE; 445 } 446 447 if (mach32 == EM_NONE) { 448 int one = 1; 449 uchar_t *one_p = (uchar_t *)&one; 450 int ld_elfdata; 451 452 ld_elfdata = (one_p[0] == 1) ? 453 ELFDATA2LSB : ELFDATA2MSB; 454 /* 455 * Both the 32 and 64-bit versions get the 456 * type from the object. If the user has 457 * asked for an inconsistant class/machine 458 * combination, libld will catch it. 459 */ 460 mach32 = mach64 = 461 (ld_elfdata == hdr.ehdr.e_ident[EI_DATA]) ? 462 hdr.ehdr.e_machine : 463 BSWAP_HALF(hdr.ehdr.e_machine); 464 } 465 } else if (!ar_found && 466 (memcmp(&hdr.armag, ARMAG, SARMAG) == 0)) { 467 Elf *elf; 468 469 (void) elf_version(EV_CURRENT); 470 if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL) { 471 (void) close(fd); 472 continue; 473 } 474 if (elf_kind(elf) == ELF_K_AR) 475 ar_found = 476 archive(fd, elf, &ar_class, &ar_mach); 477 (void) elf_end(elf); 478 } 479 480 (void) close(fd); 481 } 482 483 /* 484 * ELFCLASS of output object: If we did not establish a class from a 485 * command option, or from the first plain object, then use the class 486 * from the first archive, and failing that, default to 32-bit. 487 */ 488 if (class == ELFCLASSNONE) 489 class = ar_found ? ar_class : ELFCLASS32; 490 *class_ret = class; 491 492 /* 493 * Machine type of output object: If we did not establish a machine 494 * type from the command line, or from the first plain object, then 495 * use the machine established by the first archive, and failing that, 496 * use the native machine. 497 */ 498 *mach = (class == ELFCLASS64) ? mach64 : mach32; 499 if (*mach == EM_NONE) 500 if (ar_found) 501 *mach = ar_mach; 502 else 503 *mach = (class == ELFCLASS64) ? M_MACH_64 : M_MACH_32; 504 505 if (class == ELFCLASS32) 506 return (ld32_main); 507 508 return (ld64_main); 509 } 510 511 struct strlist { 512 struct strlist *sl_next; 513 char *sl_str; 514 }; 515 516 /* 517 * Parse an LD_OPTIONS environment string. Returns a linked list of strings 518 * parsed from the original list, or NULL if the list is empty. 519 */ 520 static struct strlist * 521 split_options(char *str) 522 { 523 struct strlist *strs = NULL; 524 struct strlist **nextp = &strs; 525 struct strlist *next; 526 char *arg; 527 528 while ((arg = strsep_ws(&str)) != NULL) { 529 if (*arg == '\0') 530 continue; 531 next = calloc(1, sizeof (struct strlist)); 532 if (next == NULL) { 533 eprintf(0, ERR_FATAL, 534 MSG_INTL(MSG_SYS_ALLOC), strerror(errno)); 535 exit(EXIT_FAILURE); 536 } 537 next->sl_str = arg; 538 *nextp = next; 539 nextp = &next->sl_next; 540 } 541 542 return (strs); 543 } 544 545 /* 546 * Determine whether an LD_OPTIONS environment variable is set, and if so, 547 * prepend environment string as a series of options to the argv array. 548 */ 549 static void 550 prepend_ldoptions(int *argcp, char **argvp[]) 551 { 552 int argc, nargc; 553 char **argv, **nargv, *ld_options; 554 struct strlist *opts, *p, *t; 555 556 ld_options = getenv_nonempty(MSG_ORIG(MSG_LD_OPTIONS)); 557 if (ld_options == NULL) 558 return; 559 560 /* 561 * Parse and count options. 562 */ 563 opts = split_options(ld_options); 564 for (nargc = 0, p = opts; p != NULL; p = p->sl_next) 565 nargc++; 566 567 /* 568 * Allocate a new argument vector big enough to hold both the old 569 * and new arguments. 570 */ 571 argc = *argcp; 572 argv = *argvp; 573 nargv = calloc(nargc + argc + 1, sizeof (char *)); 574 if (nargv == NULL) { 575 eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_ALLOC), strerror(errno)); 576 exit(EXIT_FAILURE); 577 } 578 579 /* 580 * Initialize first element of new argv array to be the first element 581 * of the old argv array (ie. calling programs name). Then add the new 582 * args obtained from the environment. 583 */ 584 nargv[0] = argv[0]; 585 for (nargc = 1, p = opts; p != NULL; nargc++, p = p->sl_next) 586 nargv[nargc] = p->sl_str; 587 588 /* 589 * Now add the original argv array (skipping argv[0]) to the end of the 590 * new argv array, and re-vector argc and argv to reference this new 591 * array 592 */ 593 for (int i = 1; i < argc; i++, nargc++) 594 nargv[nargc] = argv[i]; 595 nargv[nargc] = NULL; 596 597 /* 598 * Clean up the strlist. 599 */ 600 for (t = NULL, p = opts; p != NULL; p = t) { 601 t = p->sl_next; 602 free(p); 603 } 604 605 *argcp = nargc; 606 *argvp = nargv; 607 } 608 609 /* 610 * Check to see if there is a LD_ALTEXEC=<path to alternate ld> in the 611 * environment. If so, first null the environment variable out, and then 612 * exec() the binary pointed to by the environment variable, passing the same 613 * arguments as the originating process. This mechanism permits using 614 * alternate link-editors (debugging/developer copies) even in complex build 615 * environments. 616 */ 617 static void 618 ld_altexec(int argc, char *argv[], char *envp[]) 619 { 620 char *bin; 621 struct strlist *opts, *p, *t; 622 char **nargv; 623 int i; 624 625 /* 626 * If LD_ALTEXEC isn't set, or is empty, return to continue executing 627 * the present link-editor. Note that we unconditionally unset it. 628 */ 629 bin = getenv_nonempty(MSG_ORIG(MSG_LD_ALTEXEC)); 630 (void) unsetenv(MSG_ORIG(MSG_LD_ALTEXEC)); 631 if (bin == NULL) 632 return; 633 634 /* Parse and count options, including argv[0]. */ 635 opts = split_options(bin); 636 if (opts == NULL) 637 return; 638 639 640 for (p = opts; p != NULL; p = p->sl_next) 641 argc++; 642 643 nargv = calloc(argc, sizeof (char *)); 644 if (nargv == NULL) { 645 eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_ALLOC), strerror(errno)); 646 exit(EXIT_FAILURE); 647 } 648 for (i = 0, p = opts; p != NULL; p = p->sl_next, i++) 649 nargv[i] = p->sl_str; 650 /* Note that `argc` now counts the NULL at the end of `nargv`. */ 651 for (; i < argc; i++) 652 nargv[i] = *++argv; 653 654 /* 655 * Clean up the strlist. 656 */ 657 for (t = NULL, p = opts; p != NULL; p = t) { 658 t = p->sl_next; 659 free(p); 660 } 661 662 /* 663 * Set argv[0] to point to our new linker And attempt to execute it. 664 */ 665 (void) execve(bin, nargv, envp); 666 667 /* 668 * If the exec() fails, exit with failure. 669 */ 670 eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_EXEC), bin, strerror(errno)); 671 exit(EXIT_FAILURE); 672 } 673 674 int 675 main(int argc, char *argv[], char *envp[]) 676 { 677 uint8_t class; 678 Half mach; 679 ld_main_f ld_main; 680 681 /* 682 * Establish locale and initialize error strings. 683 */ 684 init_strings(); 685 686 /* 687 * Maybe execute an alternate linker. If the LD_ALTEXEC 688 * environment variable is set, we will try and run what it 689 * points to or fail. If it is not set, we simply continue. 690 */ 691 ld_altexec(argc, argv, envp); 692 693 /* 694 * Maybe process additional arguments. If the LD_OPTIONS 695 * environment variable is set, and if present prepend 696 * the arguments specified to the command line argument list. 697 */ 698 prepend_ldoptions(&argc, &argv); 699 700 /* 701 * Examine the command arguments to determine: 702 * - object class 703 * - link-editor class 704 * - target machine 705 */ 706 ld_main = process_args(argc, argv, &class, &mach); 707 708 /* Call the libld entry point for the specified ELFCLASS */ 709 return (ld_main(argc, argv, mach)); 710 } 711 712 /* 713 * We supply this function for the msg module 714 */ 715 const char * 716 _ld_msg(Msg mid) 717 { 718 return (gettext(MSG_ORIG(mid))); 719 } 720