1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/kmem.h> 26 #include <sys/errno.h> 27 #include <sys/systm.h> 28 #include <sys/cmn_err.h> 29 #include <sys/brand.h> 30 #include <sys/machbrand.h> 31 #include <sys/modctl.h> 32 #include <sys/rwlock.h> 33 #include <sys/zone.h> 34 #include <sys/pathname.h> 35 36 #define SUPPORTED_BRAND_VERSION BRAND_VER_1 37 38 #if defined(__sparcv9) 39 /* sparcv9 uses system wide brand interposition hooks */ 40 static void brand_plat_interposition_enable(void); 41 static void brand_plat_interposition_disable(void); 42 43 struct brand_mach_ops native_mach_ops = { 44 NULL, NULL 45 }; 46 #else /* !__sparcv9 */ 47 struct brand_mach_ops native_mach_ops = { 48 NULL, NULL, NULL, NULL 49 }; 50 #endif /* !__sparcv9 */ 51 52 brand_t native_brand = { 53 BRAND_VER_1, 54 "native", 55 NULL, 56 &native_mach_ops 57 }; 58 59 /* 60 * Used to maintain a list of all the brands currently loaded into the 61 * kernel. 62 */ 63 struct brand_list { 64 int bl_refcnt; 65 struct brand_list *bl_next; 66 brand_t *bl_brand; 67 }; 68 69 static struct brand_list *brand_list = NULL; 70 71 /* 72 * This lock protects the integrity of the brand list. 73 */ 74 static kmutex_t brand_list_lock; 75 76 void 77 brand_init() 78 { 79 mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL); 80 p0.p_brand = &native_brand; 81 } 82 83 int 84 brand_register(brand_t *brand) 85 { 86 struct brand_list *list, *scan; 87 88 if (brand == NULL) 89 return (EINVAL); 90 91 if (brand->b_version != SUPPORTED_BRAND_VERSION) { 92 if (brand->b_version < SUPPORTED_BRAND_VERSION) { 93 cmn_err(CE_WARN, 94 "brand '%s' was built to run on older versions " 95 "of Solaris.", 96 brand->b_name); 97 } else { 98 cmn_err(CE_WARN, 99 "brand '%s' was built to run on a newer version " 100 "of Solaris.", 101 brand->b_name); 102 } 103 return (EINVAL); 104 } 105 106 /* Sanity checks */ 107 if (brand->b_name == NULL || brand->b_ops == NULL || 108 brand->b_ops->b_brandsys == NULL) { 109 cmn_err(CE_WARN, "Malformed brand"); 110 return (EINVAL); 111 } 112 113 list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP); 114 115 /* Add the brand to the list of loaded brands. */ 116 mutex_enter(&brand_list_lock); 117 118 /* 119 * Check to be sure we haven't already registered this brand. 120 */ 121 for (scan = brand_list; scan != NULL; scan = scan->bl_next) { 122 if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) { 123 cmn_err(CE_WARN, 124 "Invalid attempt to load a second instance of " 125 "brand %s", brand->b_name); 126 mutex_exit(&brand_list_lock); 127 kmem_free(list, sizeof (struct brand_list)); 128 return (EINVAL); 129 } 130 } 131 132 #if defined(__sparcv9) 133 /* sparcv9 uses system wide brand interposition hooks */ 134 if (brand_list == NULL) 135 brand_plat_interposition_enable(); 136 #endif /* __sparcv9 */ 137 138 list->bl_brand = brand; 139 list->bl_refcnt = 0; 140 list->bl_next = brand_list; 141 brand_list = list; 142 143 mutex_exit(&brand_list_lock); 144 145 return (0); 146 } 147 148 /* 149 * The kernel module implementing this brand is being unloaded, so remove 150 * it from the list of active brands. 151 */ 152 int 153 brand_unregister(brand_t *brand) 154 { 155 struct brand_list *list, *prev; 156 157 /* Sanity checks */ 158 if (brand == NULL || brand->b_name == NULL) { 159 cmn_err(CE_WARN, "Malformed brand"); 160 return (EINVAL); 161 } 162 163 prev = NULL; 164 mutex_enter(&brand_list_lock); 165 166 for (list = brand_list; list != NULL; list = list->bl_next) { 167 if (list->bl_brand == brand) 168 break; 169 prev = list; 170 } 171 172 if (list == NULL) { 173 cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name); 174 mutex_exit(&brand_list_lock); 175 return (EINVAL); 176 } 177 178 if (list->bl_refcnt > 0) { 179 cmn_err(CE_WARN, "Unregistering brand %s which is still in use", 180 brand->b_name); 181 mutex_exit(&brand_list_lock); 182 return (EBUSY); 183 } 184 185 /* Remove brand from the list */ 186 if (prev != NULL) 187 prev->bl_next = list->bl_next; 188 else 189 brand_list = list->bl_next; 190 191 #if defined(__sparcv9) 192 /* sparcv9 uses system wide brand interposition hooks */ 193 if (brand_list == NULL) 194 brand_plat_interposition_disable(); 195 #endif /* __sparcv9 */ 196 197 mutex_exit(&brand_list_lock); 198 199 kmem_free(list, sizeof (struct brand_list)); 200 201 return (0); 202 } 203 204 /* 205 * Record that a zone of this brand has been instantiated. If the kernel 206 * module implementing this brand's functionality is not present, this 207 * routine attempts to load the module as a side effect. 208 */ 209 brand_t * 210 brand_register_zone(struct brand_attr *attr) 211 { 212 struct brand_list *l = NULL; 213 ddi_modhandle_t hdl = NULL; 214 char *modname; 215 int err = 0; 216 217 if (is_system_labeled()) { 218 cmn_err(CE_WARN, 219 "Branded zones are not allowed on labeled systems."); 220 return (NULL); 221 } 222 223 /* 224 * We make at most two passes through this loop. The first time 225 * through, we're looking to see if this is a new user of an 226 * already loaded brand. If the brand hasn't been loaded, we 227 * call ddi_modopen() to force it to be loaded and then make a 228 * second pass through the list of brands. If we don't find the 229 * brand the second time through it means that the modname 230 * specified in the brand_attr structure doesn't provide the brand 231 * specified in the brandname field. This would suggest a bug in 232 * the brand's config.xml file. We close the module and return 233 * 'NULL' to the caller. 234 */ 235 for (;;) { 236 /* 237 * Search list of loaded brands 238 */ 239 mutex_enter(&brand_list_lock); 240 for (l = brand_list; l != NULL; l = l->bl_next) 241 if (strcmp(attr->ba_brandname, 242 l->bl_brand->b_name) == 0) 243 break; 244 if ((l != NULL) || (hdl != NULL)) 245 break; 246 mutex_exit(&brand_list_lock); 247 248 /* 249 * We didn't find that the requested brand has been loaded 250 * yet, so we trigger the load of the appropriate kernel 251 * module and search the list again. 252 */ 253 modname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 254 (void) strcpy(modname, "brand/"); 255 (void) strcat(modname, attr->ba_modname); 256 hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err); 257 kmem_free(modname, MAXPATHLEN); 258 259 if (err != 0) 260 return (NULL); 261 } 262 263 /* 264 * If we found the matching brand, bump its reference count. 265 */ 266 if (l != NULL) 267 l->bl_refcnt++; 268 269 mutex_exit(&brand_list_lock); 270 271 if (hdl != NULL) 272 (void) ddi_modclose(hdl); 273 274 return ((l != NULL) ? l->bl_brand : NULL); 275 } 276 277 /* 278 * Return the number of zones currently using this brand. 279 */ 280 int 281 brand_zone_count(struct brand *bp) 282 { 283 struct brand_list *l; 284 int cnt = 0; 285 286 mutex_enter(&brand_list_lock); 287 for (l = brand_list; l != NULL; l = l->bl_next) 288 if (l->bl_brand == bp) { 289 cnt = l->bl_refcnt; 290 break; 291 } 292 mutex_exit(&brand_list_lock); 293 294 return (cnt); 295 } 296 297 void 298 brand_unregister_zone(struct brand *bp) 299 { 300 struct brand_list *list; 301 302 mutex_enter(&brand_list_lock); 303 for (list = brand_list; list != NULL; list = list->bl_next) { 304 if (list->bl_brand == bp) { 305 ASSERT(list->bl_refcnt > 0); 306 list->bl_refcnt--; 307 break; 308 } 309 } 310 mutex_exit(&brand_list_lock); 311 } 312 313 void 314 brand_setbrand(proc_t *p) 315 { 316 brand_t *bp = p->p_zone->zone_brand; 317 318 ASSERT(bp != NULL); 319 ASSERT(p->p_brand == &native_brand); 320 321 /* 322 * We should only be called from exec(), when we know the process 323 * is single-threaded. 324 */ 325 ASSERT(p->p_tlist == p->p_tlist->t_forw); 326 327 p->p_brand = bp; 328 ASSERT(PROC_IS_BRANDED(p)); 329 BROP(p)->b_setbrand(p); 330 } 331 332 void 333 brand_clearbrand(proc_t *p, boolean_t no_lwps) 334 { 335 brand_t *bp = p->p_zone->zone_brand; 336 klwp_t *lwp = NULL; 337 ASSERT(bp != NULL); 338 ASSERT(!no_lwps || (p->p_tlist == NULL)); 339 340 /* 341 * If called from exec_common() or proc_exit(), 342 * we know the process is single-threaded. 343 * If called from fork_fail, p_tlist is NULL. 344 */ 345 if (!no_lwps) { 346 ASSERT(p->p_tlist == p->p_tlist->t_forw); 347 lwp = p->p_tlist->t_lwp; 348 } 349 350 ASSERT(PROC_IS_BRANDED(p)); 351 BROP(p)->b_proc_exit(p, lwp); 352 p->p_brand = &native_brand; 353 } 354 355 #if defined(__sparcv9) 356 /* 357 * Currently, only sparc has system level brand syscall interposition. 358 * On x86 we're able to enable syscall interposition on a per-cpu basis 359 * when a branded thread is scheduled to run on a cpu. 360 */ 361 362 /* Local variables needed for dynamic syscall interposition support */ 363 static uint32_t syscall_trap_patch_instr_orig; 364 static uint32_t syscall_trap32_patch_instr_orig; 365 366 /* Trap Table syscall entry hot patch points */ 367 extern void syscall_trap_patch_point(void); 368 extern void syscall_trap32_patch_point(void); 369 370 /* Alternate syscall entry handlers used when branded zones are running */ 371 extern void syscall_wrapper(void); 372 extern void syscall_wrapper32(void); 373 374 /* Macros used to facilitate sparcv9 instruction generation */ 375 #define BA_A_INSTR 0x30800000 /* ba,a addr */ 376 #define DISP22(from, to) \ 377 ((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff) 378 379 /*ARGSUSED*/ 380 static void 381 brand_plat_interposition_enable(void) 382 { 383 ASSERT(MUTEX_HELD(&brand_list_lock)); 384 385 /* 386 * Before we hot patch the kernel save the current instructions 387 * so that we can restore them later. 388 */ 389 syscall_trap_patch_instr_orig = 390 *(uint32_t *)syscall_trap_patch_point; 391 syscall_trap32_patch_instr_orig = 392 *(uint32_t *)syscall_trap32_patch_point; 393 394 /* 395 * Modify the trap table at the patch points. 396 * 397 * We basically replace the first instruction at the patch 398 * point with a ba,a instruction that will transfer control 399 * to syscall_wrapper or syscall_wrapper32 for 64-bit and 400 * 32-bit syscalls respectively. It's important to note that 401 * the annul bit is set in the branch so we don't execute 402 * the instruction directly following the one we're patching 403 * during the branch's delay slot. 404 * 405 * It also doesn't matter that we're not atomically updating both 406 * the 64 and 32 bit syscall paths at the same time since there's 407 * no actual branded processes running on the system yet. 408 */ 409 hot_patch_kernel_text((caddr_t)syscall_trap_patch_point, 410 BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper), 411 4); 412 hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point, 413 BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32), 414 4); 415 } 416 417 /*ARGSUSED*/ 418 static void 419 brand_plat_interposition_disable(void) 420 { 421 ASSERT(MUTEX_HELD(&brand_list_lock)); 422 423 /* 424 * Restore the original instructions at the trap table syscall 425 * patch points to disable the brand syscall interposition 426 * mechanism. 427 */ 428 hot_patch_kernel_text((caddr_t)syscall_trap_patch_point, 429 syscall_trap_patch_instr_orig, 4); 430 hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point, 431 syscall_trap32_patch_instr_orig, 4); 432 } 433 #endif /* __sparcv9 */ 434 435 /* 436 * The following functions can be shared among kernel brand modules which 437 * implement Solaris-derived brands, all of which need to do similar tasks 438 * to manage the brand. 439 */ 440 441 #if defined(_LP64) 442 static void 443 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst) 444 { 445 bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident)); 446 dst->e_type = src->e_type; 447 dst->e_machine = src->e_machine; 448 dst->e_version = src->e_version; 449 dst->e_entry = src->e_entry; 450 dst->e_phoff = src->e_phoff; 451 dst->e_shoff = src->e_shoff; 452 dst->e_flags = src->e_flags; 453 dst->e_ehsize = src->e_ehsize; 454 dst->e_phentsize = src->e_phentsize; 455 dst->e_phnum = src->e_phnum; 456 dst->e_shentsize = src->e_shentsize; 457 dst->e_shnum = src->e_shnum; 458 dst->e_shstrndx = src->e_shstrndx; 459 } 460 #endif /* _LP64 */ 461 462 /* 463 * Return -1 if the cmd was not handled by this function. 464 */ 465 /*ARGSUSED*/ 466 int 467 brand_solaris_cmd(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, 468 struct brand *pbrand, int brandvers) 469 { 470 brand_proc_data_t *spd; 471 brand_proc_reg_t reg; 472 proc_t *p = curproc; 473 int err; 474 475 /* 476 * There is one operation that is supported for a native 477 * process; B_EXEC_BRAND. This brand operaion is redundant 478 * since the kernel assumes a native process doing an exec 479 * in a branded zone is going to run a branded processes. 480 * hence we don't support this operation. 481 */ 482 if (cmd == B_EXEC_BRAND) 483 return (ENOSYS); 484 485 /* For all other operations this must be a branded process. */ 486 if (p->p_brand == &native_brand) 487 return (ENOSYS); 488 489 ASSERT(p->p_brand == pbrand); 490 ASSERT(p->p_brand_data != NULL); 491 492 spd = (brand_proc_data_t *)p->p_brand_data; 493 494 switch ((cmd)) { 495 case B_EXEC_NATIVE: 496 err = exec_common((char *)arg1, (const char **)arg2, 497 (const char **)arg3, EBA_NATIVE); 498 return (err); 499 500 /* 501 * Get the address of the user-space system call handler from 502 * the user process and attach it to the proc structure. 503 */ 504 case B_REGISTER: 505 if (p->p_model == DATAMODEL_NATIVE) { 506 if (copyin((void *)arg1, ®, sizeof (reg)) != 0) 507 return (EFAULT); 508 } 509 #if defined(_LP64) 510 else { 511 brand_common_reg32_t reg32; 512 513 if (copyin((void *)arg1, ®32, sizeof (reg32)) != 0) 514 return (EFAULT); 515 reg.sbr_version = reg32.sbr_version; 516 reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler; 517 } 518 #endif /* _LP64 */ 519 520 if (reg.sbr_version != brandvers) 521 return (ENOTSUP); 522 spd->spd_handler = reg.sbr_handler; 523 return (0); 524 525 case B_ELFDATA: 526 if (p->p_model == DATAMODEL_NATIVE) { 527 if (copyout(&spd->spd_elf_data, (void *)arg1, 528 sizeof (brand_elf_data_t)) != 0) 529 return (EFAULT); 530 } 531 #if defined(_LP64) 532 else { 533 brand_elf_data32_t sed32; 534 535 sed32.sed_phdr = spd->spd_elf_data.sed_phdr; 536 sed32.sed_phent = spd->spd_elf_data.sed_phent; 537 sed32.sed_phnum = spd->spd_elf_data.sed_phnum; 538 sed32.sed_entry = spd->spd_elf_data.sed_entry; 539 sed32.sed_base = spd->spd_elf_data.sed_base; 540 sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry; 541 sed32.sed_lddata = spd->spd_elf_data.sed_lddata; 542 if (copyout(&sed32, (void *)arg1, sizeof (sed32)) 543 != 0) 544 return (EFAULT); 545 } 546 #endif /* _LP64 */ 547 return (0); 548 549 /* 550 * The B_TRUSS_POINT subcommand exists so that we can see 551 * truss output from interposed system calls that return 552 * without first calling any other system call, meaning they 553 * would be invisible to truss(1). 554 * If the second argument is set non-zero, set errno to that 555 * value as well. 556 * 557 * Common arguments seen with truss are: 558 * 559 * arg1: syscall number 560 * arg2: errno 561 */ 562 case B_TRUSS_POINT: 563 return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2)); 564 } 565 566 return (-1); 567 } 568 569 /*ARGSUSED*/ 570 void 571 brand_solaris_copy_procdata(proc_t *child, proc_t *parent, struct brand *pbrand) 572 { 573 brand_proc_data_t *spd; 574 575 ASSERT(parent->p_brand == pbrand); 576 ASSERT(child->p_brand == pbrand); 577 ASSERT(parent->p_brand_data != NULL); 578 ASSERT(child->p_brand_data == NULL); 579 580 /* 581 * Just duplicate all the proc data of the parent for the 582 * child 583 */ 584 spd = kmem_alloc(sizeof (brand_proc_data_t), KM_SLEEP); 585 bcopy(parent->p_brand_data, spd, sizeof (brand_proc_data_t)); 586 child->p_brand_data = spd; 587 } 588 589 static void 590 restoreexecenv(struct execenv *ep, stack_t *sp) 591 { 592 klwp_t *lwp = ttolwp(curthread); 593 594 setexecenv(ep); 595 lwp->lwp_sigaltstack.ss_sp = sp->ss_sp; 596 lwp->lwp_sigaltstack.ss_size = sp->ss_size; 597 lwp->lwp_sigaltstack.ss_flags = sp->ss_flags; 598 } 599 600 /*ARGSUSED*/ 601 int 602 brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, 603 intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file, 604 cred_t *cred, int brand_action, struct brand *pbrand, char *bname, 605 char *brandlib, char *brandlib32, char *brandlinker, char *brandlinker32) 606 { 607 608 vnode_t *nvp; 609 Ehdr ehdr; 610 Addr uphdr_vaddr; 611 intptr_t voffset; 612 int interp; 613 int i, err; 614 struct execenv env; 615 struct execenv origenv; 616 stack_t orig_sigaltstack; 617 struct user *up = PTOU(curproc); 618 proc_t *p = ttoproc(curthread); 619 klwp_t *lwp = ttolwp(curthread); 620 brand_proc_data_t *spd; 621 brand_elf_data_t sed, *sedp; 622 char *linker; 623 uintptr_t lddata; /* lddata of executable's linker */ 624 625 ASSERT(curproc->p_brand == pbrand); 626 ASSERT(curproc->p_brand_data != NULL); 627 628 spd = (brand_proc_data_t *)curproc->p_brand_data; 629 sedp = &spd->spd_elf_data; 630 631 args->brandname = bname; 632 633 /* 634 * We will exec the brand library and then map in the target 635 * application and (optionally) the brand's default linker. 636 */ 637 if (args->to_model == DATAMODEL_NATIVE) { 638 args->emulator = brandlib; 639 linker = brandlinker; 640 } 641 #if defined(_LP64) 642 else { 643 args->emulator = brandlib32; 644 linker = brandlinker32; 645 } 646 #endif /* _LP64 */ 647 648 if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW, 649 NULLVPP, &nvp)) != 0) { 650 uprintf("%s: not found.", args->emulator); 651 return (err); 652 } 653 654 /* 655 * The following elf{32}exec call changes the execenv in the proc 656 * struct which includes changing the p_exec member to be the vnode 657 * for the brand library (e.g. /.SUNWnative/usr/lib/s10_brand.so.1). 658 * We will eventually set the p_exec member to be the vnode for the new 659 * executable when we call setexecenv(). However, if we get an error 660 * before that call we need to restore the execenv to its original 661 * values so that when we return to the caller fop_close() works 662 * properly while cleaning up from the failed exec(). Restoring the 663 * original value will also properly decrement the 2nd VN_RELE that we 664 * took on the brand library. 665 */ 666 origenv.ex_bssbase = p->p_bssbase; 667 origenv.ex_brkbase = p->p_brkbase; 668 origenv.ex_brksize = p->p_brksize; 669 origenv.ex_vp = p->p_exec; 670 orig_sigaltstack.ss_sp = lwp->lwp_sigaltstack.ss_sp; 671 orig_sigaltstack.ss_size = lwp->lwp_sigaltstack.ss_size; 672 orig_sigaltstack.ss_flags = lwp->lwp_sigaltstack.ss_flags; 673 674 if (args->to_model == DATAMODEL_NATIVE) { 675 err = elfexec(nvp, uap, args, idatap, level + 1, execsz, 676 setid, exec_file, cred, brand_action); 677 } 678 #if defined(_LP64) 679 else { 680 err = elf32exec(nvp, uap, args, idatap, level + 1, execsz, 681 setid, exec_file, cred, brand_action); 682 } 683 #endif /* _LP64 */ 684 VN_RELE(nvp); 685 if (err != 0) { 686 restoreexecenv(&origenv, &orig_sigaltstack); 687 return (err); 688 } 689 690 /* 691 * The u_auxv veCTors are set up by elfexec to point to the 692 * brand emulation library and linker. Save these so they can 693 * be copied to the specific brand aux vectors. 694 */ 695 bzero(&sed, sizeof (sed)); 696 for (i = 0; i < __KERN_NAUXV_IMPL; i++) { 697 switch (up->u_auxv[i].a_type) { 698 case AT_SUN_LDDATA: 699 sed.sed_lddata = up->u_auxv[i].a_un.a_val; 700 break; 701 case AT_BASE: 702 sed.sed_base = up->u_auxv[i].a_un.a_val; 703 break; 704 case AT_ENTRY: 705 sed.sed_entry = up->u_auxv[i].a_un.a_val; 706 break; 707 case AT_PHDR: 708 sed.sed_phdr = up->u_auxv[i].a_un.a_val; 709 break; 710 case AT_PHENT: 711 sed.sed_phent = up->u_auxv[i].a_un.a_val; 712 break; 713 case AT_PHNUM: 714 sed.sed_phnum = up->u_auxv[i].a_un.a_val; 715 break; 716 default: 717 break; 718 } 719 } 720 /* Make sure the emulator has an entry point */ 721 ASSERT(sed.sed_entry != NULL); 722 ASSERT(sed.sed_phdr != NULL); 723 724 bzero(&env, sizeof (env)); 725 if (args->to_model == DATAMODEL_NATIVE) { 726 err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, 727 &voffset, exec_file, &interp, &env.ex_bssbase, 728 &env.ex_brkbase, &env.ex_brksize, NULL); 729 } 730 #if defined(_LP64) 731 else { 732 Elf32_Ehdr ehdr32; 733 Elf32_Addr uphdr_vaddr32; 734 err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32, 735 &voffset, exec_file, &interp, &env.ex_bssbase, 736 &env.ex_brkbase, &env.ex_brksize, NULL); 737 Ehdr32to64(&ehdr32, &ehdr); 738 739 if (uphdr_vaddr32 == (Elf32_Addr)-1) 740 uphdr_vaddr = (Addr)-1; 741 else 742 uphdr_vaddr = uphdr_vaddr32; 743 } 744 #endif /* _LP64 */ 745 if (err != 0) { 746 restoreexecenv(&origenv, &orig_sigaltstack); 747 return (err); 748 } 749 750 /* 751 * Save off the important properties of the executable. The 752 * brand library will ask us for this data later, when it is 753 * initializing and getting ready to transfer control to the 754 * brand application. 755 */ 756 if (uphdr_vaddr == (Addr)-1) 757 sedp->sed_phdr = voffset + ehdr.e_phoff; 758 else 759 sedp->sed_phdr = voffset + uphdr_vaddr; 760 sedp->sed_entry = voffset + ehdr.e_entry; 761 sedp->sed_phent = ehdr.e_phentsize; 762 sedp->sed_phnum = ehdr.e_phnum; 763 764 if (interp) { 765 if (ehdr.e_type == ET_DYN) { 766 /* 767 * This is a shared object executable, so we 768 * need to pick a reasonable place to put the 769 * heap. Just don't use the first page. 770 */ 771 env.ex_brkbase = (caddr_t)PAGESIZE; 772 env.ex_bssbase = (caddr_t)PAGESIZE; 773 } 774 775 /* 776 * If the program needs an interpreter (most do), map 777 * it in and store relevant information about it in the 778 * aux vector, where the brand library can find it. 779 */ 780 if ((err = lookupname(linker, UIO_SYSSPACE, 781 FOLLOW, NULLVPP, &nvp)) != 0) { 782 uprintf("%s: not found.", brandlinker); 783 restoreexecenv(&origenv, &orig_sigaltstack); 784 return (err); 785 } 786 if (args->to_model == DATAMODEL_NATIVE) { 787 err = mapexec_brand(nvp, args, &ehdr, 788 &uphdr_vaddr, &voffset, exec_file, &interp, 789 NULL, NULL, NULL, &lddata); 790 } 791 #if defined(_LP64) 792 else { 793 Elf32_Ehdr ehdr32; 794 Elf32_Addr uphdr_vaddr32; 795 err = mapexec32_brand(nvp, args, &ehdr32, 796 &uphdr_vaddr32, &voffset, exec_file, &interp, 797 NULL, NULL, NULL, &lddata); 798 Ehdr32to64(&ehdr32, &ehdr); 799 800 if (uphdr_vaddr32 == (Elf32_Addr)-1) 801 uphdr_vaddr = (Addr)-1; 802 else 803 uphdr_vaddr = uphdr_vaddr32; 804 } 805 #endif /* _LP64 */ 806 VN_RELE(nvp); 807 if (err != 0) { 808 restoreexecenv(&origenv, &orig_sigaltstack); 809 return (err); 810 } 811 812 /* 813 * Now that we know the base address of the brand's 814 * linker, place it in the aux vector. 815 */ 816 sedp->sed_base = voffset; 817 sedp->sed_ldentry = voffset + ehdr.e_entry; 818 sedp->sed_lddata = voffset + lddata; 819 } else { 820 /* 821 * This program has no interpreter. The brand library 822 * will jump to the address in the AT_SUN_BRAND_LDENTRY 823 * aux vector, so in this case, put the entry point of 824 * the main executable there. 825 */ 826 if (ehdr.e_type == ET_EXEC) { 827 /* 828 * An executable with no interpreter, this must 829 * be a statically linked executable, which 830 * means we loaded it at the address specified 831 * in the elf header, in which case the e_entry 832 * field of the elf header is an absolute 833 * address. 834 */ 835 sedp->sed_ldentry = ehdr.e_entry; 836 sedp->sed_entry = ehdr.e_entry; 837 sedp->sed_lddata = NULL; 838 sedp->sed_base = NULL; 839 } else { 840 /* 841 * A shared object with no interpreter, we use 842 * the calculated address from above. 843 */ 844 sedp->sed_ldentry = sedp->sed_entry; 845 sedp->sed_entry = NULL; 846 sedp->sed_phdr = NULL; 847 sedp->sed_phent = NULL; 848 sedp->sed_phnum = NULL; 849 sedp->sed_lddata = NULL; 850 sedp->sed_base = voffset; 851 852 if (ehdr.e_type == ET_DYN) { 853 /* 854 * Delay setting the brkbase until the 855 * first call to brk(); see elfexec() 856 * for details. 857 */ 858 env.ex_bssbase = (caddr_t)0; 859 env.ex_brkbase = (caddr_t)0; 860 env.ex_brksize = 0; 861 } 862 } 863 } 864 865 env.ex_magic = elfmagic; 866 env.ex_vp = vp; 867 setexecenv(&env); 868 869 /* 870 * It's time to manipulate the process aux vectors. First 871 * we need to update the AT_SUN_AUXFLAGS aux vector to set 872 * the AF_SUN_NOPLM flag. 873 */ 874 if (args->to_model == DATAMODEL_NATIVE) { 875 auxv_t auxflags_auxv; 876 877 if (copyin(args->auxp_auxflags, &auxflags_auxv, 878 sizeof (auxflags_auxv)) != 0) 879 return (EFAULT); 880 881 ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS); 882 auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM; 883 if (copyout(&auxflags_auxv, args->auxp_auxflags, 884 sizeof (auxflags_auxv)) != 0) 885 return (EFAULT); 886 } 887 #if defined(_LP64) 888 else { 889 auxv32_t auxflags_auxv32; 890 891 if (copyin(args->auxp_auxflags, &auxflags_auxv32, 892 sizeof (auxflags_auxv32)) != 0) 893 return (EFAULT); 894 895 ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS); 896 auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM; 897 if (copyout(&auxflags_auxv32, args->auxp_auxflags, 898 sizeof (auxflags_auxv32)) != 0) 899 return (EFAULT); 900 } 901 #endif /* _LP64 */ 902 903 /* Second, copy out the brand specific aux vectors. */ 904 if (args->to_model == DATAMODEL_NATIVE) { 905 auxv_t brand_auxv[] = { 906 { AT_SUN_BRAND_AUX1, 0 }, 907 { AT_SUN_BRAND_AUX2, 0 }, 908 { AT_SUN_BRAND_AUX3, 0 } 909 }; 910 911 ASSERT(brand_auxv[0].a_type == 912 AT_SUN_BRAND_COMMON_LDDATA); 913 brand_auxv[0].a_un.a_val = sed.sed_lddata; 914 915 if (copyout(&brand_auxv, args->auxp_brand, 916 sizeof (brand_auxv)) != 0) 917 return (EFAULT); 918 } 919 #if defined(_LP64) 920 else { 921 auxv32_t brand_auxv32[] = { 922 { AT_SUN_BRAND_AUX1, 0 }, 923 { AT_SUN_BRAND_AUX2, 0 }, 924 { AT_SUN_BRAND_AUX3, 0 } 925 }; 926 927 ASSERT(brand_auxv32[0].a_type == AT_SUN_BRAND_COMMON_LDDATA); 928 brand_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata; 929 if (copyout(&brand_auxv32, args->auxp_brand, 930 sizeof (brand_auxv32)) != 0) 931 return (EFAULT); 932 } 933 #endif /* _LP64 */ 934 935 /* 936 * Third, the /proc aux vectors set up by elfexec() point to 937 * brand emulation library and it's linker. Copy these to the 938 * /proc brand specific aux vector, and update the regular 939 * /proc aux vectors to point to the executable (and it's 940 * linker). This will enable debuggers to access the 941 * executable via the usual /proc or elf notes aux vectors. 942 * 943 * The brand emulation library's linker will get it's aux 944 * vectors off the stack, and then update the stack with the 945 * executable's aux vectors before jumping to the executable's 946 * linker. 947 * 948 * Debugging the brand emulation library must be done from 949 * the global zone, where the librtld_db module knows how to 950 * fetch the brand specific aux vectors to access the brand 951 * emulation libraries linker. 952 */ 953 for (i = 0; i < __KERN_NAUXV_IMPL; i++) { 954 ulong_t val; 955 956 switch (up->u_auxv[i].a_type) { 957 case AT_SUN_BRAND_COMMON_LDDATA: 958 up->u_auxv[i].a_un.a_val = sed.sed_lddata; 959 continue; 960 case AT_BASE: 961 val = sedp->sed_base; 962 break; 963 case AT_ENTRY: 964 val = sedp->sed_entry; 965 break; 966 case AT_PHDR: 967 val = sedp->sed_phdr; 968 break; 969 case AT_PHENT: 970 val = sedp->sed_phent; 971 break; 972 case AT_PHNUM: 973 val = sedp->sed_phnum; 974 break; 975 case AT_SUN_LDDATA: 976 val = sedp->sed_lddata; 977 break; 978 default: 979 continue; 980 } 981 982 up->u_auxv[i].a_un.a_val = val; 983 if (val == NULL) { 984 /* Hide the entry for static binaries */ 985 up->u_auxv[i].a_type = AT_IGNORE; 986 } 987 } 988 989 /* 990 * The last thing we do here is clear spd->spd_handler. This 991 * is important because if we're already a branded process and 992 * if this exec succeeds, there is a window between when the 993 * exec() first returns to the userland of the new process and 994 * when our brand library get's initialized, during which we 995 * don't want system calls to be re-directed to our brand 996 * library since it hasn't been initialized yet. 997 */ 998 spd->spd_handler = NULL; 999 1000 return (0); 1001 } 1002 1003 void 1004 brand_solaris_exec(struct brand *pbrand) 1005 { 1006 brand_proc_data_t *spd = curproc->p_brand_data; 1007 1008 ASSERT(curproc->p_brand == pbrand); 1009 ASSERT(curproc->p_brand_data != NULL); 1010 ASSERT(ttolwp(curthread)->lwp_brand != NULL); 1011 1012 /* 1013 * We should only be called from exec(), when we know the process 1014 * is single-threaded. 1015 */ 1016 ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw); 1017 1018 /* Upon exec, reset our lwp brand data. */ 1019 (void) brand_solaris_freelwp(ttolwp(curthread), pbrand); 1020 (void) brand_solaris_initlwp(ttolwp(curthread), pbrand); 1021 1022 /* 1023 * Upon exec, reset all the proc brand data, except for the elf 1024 * data associated with the executable we are exec'ing. 1025 */ 1026 spd->spd_handler = NULL; 1027 } 1028 1029 int 1030 brand_solaris_fini(char **emul_table, struct modlinkage *modlinkage, 1031 struct brand *pbrand) 1032 { 1033 int err; 1034 1035 /* 1036 * If there are any zones using this brand, we can't allow it 1037 * to be unloaded. 1038 */ 1039 if (brand_zone_count(pbrand)) 1040 return (EBUSY); 1041 1042 kmem_free(*emul_table, NSYSCALL); 1043 *emul_table = NULL; 1044 1045 err = mod_remove(modlinkage); 1046 if (err) 1047 cmn_err(CE_WARN, "Couldn't unload brand module"); 1048 1049 return (err); 1050 } 1051 1052 /*ARGSUSED*/ 1053 void 1054 brand_solaris_forklwp(klwp_t *p, klwp_t *c, struct brand *pbrand) 1055 { 1056 ASSERT(p->lwp_procp->p_brand == pbrand); 1057 ASSERT(c->lwp_procp->p_brand == pbrand); 1058 1059 ASSERT(p->lwp_procp->p_brand_data != NULL); 1060 ASSERT(c->lwp_procp->p_brand_data != NULL); 1061 1062 /* 1063 * Both LWPs have already had been initialized via 1064 * brand_solaris_initlwp(). 1065 */ 1066 ASSERT(p->lwp_brand != NULL); 1067 ASSERT(c->lwp_brand != NULL); 1068 } 1069 1070 /*ARGSUSED*/ 1071 void 1072 brand_solaris_freelwp(klwp_t *l, struct brand *pbrand) 1073 { 1074 ASSERT(l->lwp_procp->p_brand == pbrand); 1075 ASSERT(l->lwp_procp->p_brand_data != NULL); 1076 ASSERT(l->lwp_brand != NULL); 1077 l->lwp_brand = NULL; 1078 } 1079 1080 /*ARGSUSED*/ 1081 int 1082 brand_solaris_initlwp(klwp_t *l, struct brand *pbrand) 1083 { 1084 ASSERT(l->lwp_procp->p_brand == pbrand); 1085 ASSERT(l->lwp_procp->p_brand_data != NULL); 1086 ASSERT(l->lwp_brand == NULL); 1087 l->lwp_brand = (void *)-1; 1088 return (0); 1089 } 1090 1091 /*ARGSUSED*/ 1092 void 1093 brand_solaris_lwpexit(klwp_t *l, struct brand *pbrand) 1094 { 1095 proc_t *p = l->lwp_procp; 1096 1097 ASSERT(l->lwp_procp->p_brand == pbrand); 1098 ASSERT(l->lwp_procp->p_brand_data != NULL); 1099 ASSERT(l->lwp_brand != NULL); 1100 1101 /* 1102 * We should never be called for the last thread in a process. 1103 * (That case is handled by brand_solaris_proc_exit().) 1104 * Therefore this lwp must be exiting from a multi-threaded 1105 * process. 1106 */ 1107 ASSERT(p->p_tlist != p->p_tlist->t_forw); 1108 1109 l->lwp_brand = NULL; 1110 } 1111 1112 /*ARGSUSED*/ 1113 void 1114 brand_solaris_proc_exit(struct proc *p, klwp_t *l, struct brand *pbrand) 1115 { 1116 ASSERT(p->p_brand == pbrand); 1117 ASSERT(p->p_brand_data != NULL); 1118 1119 /* 1120 * When called from proc_exit(), we know that process is 1121 * single-threaded and free our lwp brand data. 1122 * otherwise just free p_brand_data and return. 1123 */ 1124 if (l != NULL) { 1125 ASSERT(p->p_tlist == p->p_tlist->t_forw); 1126 ASSERT(p->p_tlist->t_lwp == l); 1127 (void) brand_solaris_freelwp(l, pbrand); 1128 } 1129 1130 /* upon exit, free our proc brand data */ 1131 kmem_free(p->p_brand_data, sizeof (brand_proc_data_t)); 1132 p->p_brand_data = NULL; 1133 } 1134 1135 void 1136 brand_solaris_setbrand(proc_t *p, struct brand *pbrand) 1137 { 1138 ASSERT(p->p_brand == pbrand); 1139 ASSERT(p->p_brand_data == NULL); 1140 1141 /* 1142 * We should only be called from exec(), when we know the process 1143 * is single-threaded. 1144 */ 1145 ASSERT(p->p_tlist == p->p_tlist->t_forw); 1146 1147 p->p_brand_data = kmem_zalloc(sizeof (brand_proc_data_t), KM_SLEEP); 1148 (void) brand_solaris_initlwp(p->p_tlist->t_lwp, pbrand); 1149 } 1150