1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/errno.h> 27 #include <sys/exec.h> 28 #include <sys/kmem.h> 29 #include <sys/modctl.h> 30 #include <sys/model.h> 31 #include <sys/proc.h> 32 #include <sys/syscall.h> 33 #include <sys/systm.h> 34 #include <sys/thread.h> 35 #include <sys/cmn_err.h> 36 #include <sys/archsystm.h> 37 #include <sys/pathname.h> 38 #include <sys/sunddi.h> 39 40 #include <sys/machbrand.h> 41 #include <sys/brand.h> 42 #include "s10_brand.h" 43 44 char *s10_emulation_table = NULL; 45 46 void s10_init_brand_data(zone_t *); 47 void s10_free_brand_data(zone_t *); 48 void s10_setbrand(proc_t *); 49 int s10_getattr(zone_t *, int, void *, size_t *); 50 int s10_setattr(zone_t *, int, void *, size_t); 51 int s10_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t, 52 uintptr_t, uintptr_t, uintptr_t); 53 void s10_copy_procdata(proc_t *, proc_t *); 54 void s10_proc_exit(struct proc *, klwp_t *); 55 void s10_exec(); 56 int s10_initlwp(klwp_t *); 57 void s10_forklwp(klwp_t *, klwp_t *); 58 void s10_freelwp(klwp_t *); 59 void s10_lwpexit(klwp_t *); 60 int s10_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int, 61 long *, int, caddr_t, cred_t *, int); 62 63 /* s10 brand */ 64 struct brand_ops s10_brops = { 65 s10_init_brand_data, 66 s10_free_brand_data, 67 s10_brandsys, 68 s10_setbrand, 69 s10_getattr, 70 s10_setattr, 71 s10_copy_procdata, 72 s10_proc_exit, 73 s10_exec, 74 lwp_setrval, 75 s10_initlwp, 76 s10_forklwp, 77 s10_freelwp, 78 s10_lwpexit, 79 s10_elfexec 80 }; 81 82 #ifdef sparc 83 84 struct brand_mach_ops s10_mops = { 85 s10_brand_syscall_callback, 86 s10_brand_syscall32_callback 87 }; 88 89 #else /* sparc */ 90 91 #ifdef __amd64 92 93 struct brand_mach_ops s10_mops = { 94 s10_brand_sysenter_callback, 95 NULL, 96 s10_brand_int91_callback, 97 s10_brand_syscall_callback, 98 s10_brand_syscall32_callback, 99 NULL 100 }; 101 102 #else /* ! __amd64 */ 103 104 struct brand_mach_ops s10_mops = { 105 s10_brand_sysenter_callback, 106 NULL, 107 NULL, 108 s10_brand_syscall_callback, 109 NULL, 110 NULL 111 }; 112 #endif /* __amd64 */ 113 114 #endif /* _sparc */ 115 116 struct brand s10_brand = { 117 BRAND_VER_1, 118 "solaris10", 119 &s10_brops, 120 &s10_mops 121 }; 122 123 static struct modlbrand modlbrand = { 124 &mod_brandops, /* type of module */ 125 "Solaris 10 Brand", /* description of module */ 126 &s10_brand /* driver ops */ 127 }; 128 129 static struct modlinkage modlinkage = { 130 MODREV_1, (void *)&modlbrand, NULL 131 }; 132 133 void 134 s10_setbrand(proc_t *p) 135 { 136 ASSERT(p->p_brand == &s10_brand); 137 ASSERT(p->p_brand_data == NULL); 138 139 /* 140 * We should only be called from exec(), when we know the process 141 * is single-threaded. 142 */ 143 ASSERT(p->p_tlist == p->p_tlist->t_forw); 144 145 p->p_brand_data = kmem_zalloc(sizeof (s10_proc_data_t), KM_SLEEP); 146 (void) s10_initlwp(p->p_tlist->t_lwp); 147 } 148 149 /*ARGSUSED*/ 150 int 151 s10_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize) 152 { 153 ASSERT(zone->zone_brand == &s10_brand); 154 if (attr == S10_EMUL_BITMAP) { 155 if (buf == NULL || *bufsize != sizeof (s10_emul_bitmap_t)) 156 return (EINVAL); 157 if (copyout(((s10_zone_data_t *)zone->zone_brand_data)-> 158 emul_bitmap, buf, sizeof (s10_emul_bitmap_t)) != 0) 159 return (EFAULT); 160 return (0); 161 } 162 163 return (EINVAL); 164 } 165 166 int 167 s10_setattr(zone_t *zone, int attr, void *buf, size_t bufsize) 168 { 169 ASSERT(zone->zone_brand == &s10_brand); 170 if (attr == S10_EMUL_BITMAP) { 171 if (buf == NULL || bufsize != sizeof (s10_emul_bitmap_t)) 172 return (EINVAL); 173 if (copyin(buf, ((s10_zone_data_t *)zone->zone_brand_data)-> 174 emul_bitmap, sizeof (s10_emul_bitmap_t)) != 0) 175 return (EFAULT); 176 return (0); 177 } 178 179 return (EINVAL); 180 } 181 182 #ifdef __amd64 183 /* 184 * The Nevada kernel clears %fs for threads in 64-bit x86 processes but S10's 185 * libc expects %fs to be nonzero. This causes some committed 186 * libc/libthread interfaces (e.g., thr_main()) to fail, which impacts several 187 * libraries, including libdoor. This function sets the specified LWP's %fs 188 * register to the legacy S10 selector value (LWPFS_SEL). 189 * 190 * The best solution to the aforementioned problem is backporting CRs 191 * 6467491 to Solaris 10 so that 64-bit x86 Solaris 10 processes 192 * would accept zero for %fs. Backporting the CRs is a requirement for running 193 * S10 Containers in PV domUs because 64-bit Xen clears %fsbase when %fs is 194 * nonzero. Such behavior breaks 64-bit processes because Xen has to fetch the 195 * FS segments' base addresses from the LWPs' GDTs, which are only capable of 196 * 32-bit addressing. 197 */ 198 /*ARGSUSED*/ 199 static void 200 s10_amd64_correct_fsreg(klwp_t *l) 201 { 202 if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) { 203 kpreempt_disable(); 204 l->lwp_pcb.pcb_fs = LWPFS_SEL; 205 l->lwp_pcb.pcb_rupdate = 1; 206 lwptot(l)->t_post_sys = 1; /* Guarantee update_sregs() */ 207 kpreempt_enable(); 208 } 209 } 210 #endif /* __amd64 */ 211 212 int 213 s10_native() 214 { 215 struct user *up = PTOU(curproc); 216 char *args_new, *comm_new, *p; 217 int len; 218 219 len = sizeof (S10_NATIVE_LINKER32 " ") - 1; 220 221 /* 222 * Make sure that the process' interpreter is the native dynamic linker. 223 * Convention dictates that native processes executing within solaris10- 224 * branded zones are interpreted by the native dynamic linker (the 225 * process and its arguments are specified as arguments to the dynamic 226 * linker). If this convention is violated (i.e., 227 * brandsys(B_S10_NATIVE, ...) is invoked by a process that shouldn't be 228 * native), then do nothing and silently indicate success. 229 */ 230 if (strcmp(up->u_comm, S10_LINKER_NAME) != 0) 231 return (0); 232 if (strncmp(up->u_psargs, S10_NATIVE_LINKER64 " /", len + 4) == 0) 233 len += 3; /* to account for "/64" in the path */ 234 else if (strncmp(up->u_psargs, S10_NATIVE_LINKER32 " /", len + 1) != 0) 235 return (0); 236 237 args_new = strdup(&up->u_psargs[len]); 238 if ((p = strchr(args_new, ' ')) != NULL) 239 *p = '\0'; 240 if ((comm_new = strrchr(args_new, '/')) != NULL) 241 comm_new = strdup(comm_new + 1); 242 else 243 comm_new = strdup(args_new); 244 if (p != NULL) 245 *p = ' '; 246 247 if ((strlen(args_new) != 0) && (strlen(comm_new) != 0)) { 248 mutex_enter(&curproc->p_lock); 249 (void) strlcpy(up->u_comm, comm_new, MAXCOMLEN+1); 250 (void) strlcpy(up->u_psargs, args_new, PSARGSZ); 251 mutex_exit(&curproc->p_lock); 252 } 253 254 strfree(args_new); 255 strfree(comm_new); 256 return (0); 257 } 258 259 /* 260 * Get the address of the user-space system call handler from the user 261 * process and attach it to the proc structure. 262 */ 263 /*ARGSUSED*/ 264 int 265 s10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, 266 uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6) 267 { 268 s10_proc_data_t *spd; 269 s10_brand_reg_t reg; 270 proc_t *p = curproc; 271 int err; 272 273 *rval = 0; 274 275 /* 276 * B_EXEC_BRAND is redundant 277 * since the kernel assumes a native process doing an exec 278 * in a branded zone is going to run a branded processes. 279 * hence we don't support this operation. 280 */ 281 if (cmd == B_EXEC_BRAND) 282 return (ENOSYS); 283 284 if (cmd == B_S10_NATIVE) 285 return (s10_native()); 286 287 /* For all other operations this must be a branded process. */ 288 if (p->p_brand == &native_brand) 289 return (ENOSYS); 290 291 ASSERT(p->p_brand == &s10_brand); 292 ASSERT(p->p_brand_data != NULL); 293 294 spd = (s10_proc_data_t *)p->p_brand_data; 295 296 switch (cmd) { 297 case B_EXEC_NATIVE: 298 err = exec_common( 299 (char *)arg1, (const char **)arg2, (const char **)arg3, 300 EBA_NATIVE); 301 return (err); 302 303 case B_REGISTER: 304 if (p->p_model == DATAMODEL_NATIVE) { 305 if (copyin((void *)arg1, ®, sizeof (reg)) != 0) 306 return (EFAULT); 307 #if defined(_LP64) 308 } else { 309 s10_brand_reg32_t reg32; 310 311 if (copyin((void *)arg1, ®32, sizeof (reg32)) != 0) 312 return (EFAULT); 313 reg.sbr_version = reg32.sbr_version; 314 reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler; 315 #endif /* _LP64 */ 316 } 317 318 if (reg.sbr_version != S10_VERSION) 319 return (ENOTSUP); 320 spd->spd_handler = reg.sbr_handler; 321 return (0); 322 323 case B_ELFDATA: 324 if (p->p_model == DATAMODEL_NATIVE) { 325 if (copyout(&spd->spd_elf_data, (void *)arg1, 326 sizeof (s10_elf_data_t)) != 0) 327 return (EFAULT); 328 #if defined(_LP64) 329 } else { 330 s10_elf_data32_t sed32; 331 332 sed32.sed_phdr = spd->spd_elf_data.sed_phdr; 333 sed32.sed_phent = spd->spd_elf_data.sed_phent; 334 sed32.sed_phnum = spd->spd_elf_data.sed_phnum; 335 sed32.sed_entry = spd->spd_elf_data.sed_entry; 336 sed32.sed_base = spd->spd_elf_data.sed_base; 337 sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry; 338 sed32.sed_lddata = spd->spd_elf_data.sed_lddata; 339 if (copyout(&sed32, (void *)arg1, sizeof (sed32)) != 0) 340 return (EFAULT); 341 #endif /* _LP64 */ 342 } 343 return (0); 344 345 case B_S10_PIDINFO: 346 /* 347 * The s10 brand needs to be able to get the pid of the 348 * current process and the pid of the zone's init, and it 349 * needs to do this on every process startup. Early in 350 * brand startup, we can't call getpid() because calls to 351 * getpid() represent a magical signal to some old-skool 352 * debuggers. By merging all of this into one call, we 353 * make this quite a bit cheaper and easier to handle in 354 * the brand module. 355 */ 356 if (copyout(&p->p_pid, (void *)arg1, sizeof (pid_t)) != 0) 357 return (EFAULT); 358 if (copyout(&p->p_zone->zone_proc_initpid, (void *)arg2, 359 sizeof (pid_t)) != 0) 360 return (EFAULT); 361 return (0); 362 363 case B_S10_TRUSS_POINT: 364 /* 365 * This subcommand exists so that we can see truss output 366 * from interposed system calls that return without first 367 * calling any other system call, meaning they would be 368 * invisible to truss(1). 369 * 370 * If the second argument is set non-zero, set errno to that 371 * value as well. 372 * 373 * Arguments are: 374 * 375 * arg1: syscall number 376 * arg2: errno 377 */ 378 return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2)); 379 380 #ifdef __amd64 381 case B_S10_FSREGCORRECTION: 382 /* 383 * This subcommand exists so that the SYS_lwp_private and 384 * SYS_lwp_create syscalls can manually set the current thread's 385 * %fs register to the legacy S10 selector value for 64-bit x86 386 * processes. 387 */ 388 s10_amd64_correct_fsreg(ttolwp(curthread)); 389 return (0); 390 #endif /* __amd64 */ 391 } 392 393 return (EINVAL); 394 } 395 396 /* 397 * Copy the per-process brand data from a parent proc to a child. 398 */ 399 void 400 s10_copy_procdata(proc_t *child, proc_t *parent) 401 { 402 s10_proc_data_t *spd; 403 404 ASSERT(parent->p_brand == &s10_brand); 405 ASSERT(child->p_brand == &s10_brand); 406 ASSERT(parent->p_brand_data != NULL); 407 ASSERT(child->p_brand_data == NULL); 408 409 /* Just duplicate all the proc data of the parent for the child */ 410 spd = kmem_alloc(sizeof (s10_proc_data_t), KM_SLEEP); 411 bcopy(parent->p_brand_data, spd, sizeof (s10_proc_data_t)); 412 child->p_brand_data = spd; 413 } 414 415 /*ARGSUSED*/ 416 void 417 s10_proc_exit(struct proc *p, klwp_t *l) 418 { 419 ASSERT(p->p_brand == &s10_brand); 420 ASSERT(p->p_brand_data != NULL); 421 422 /* 423 * We should only be called from proc_exit(), when we know that 424 * process is single-threaded. 425 */ 426 ASSERT(p->p_tlist == p->p_tlist->t_forw); 427 428 /* upon exit, free our lwp brand data */ 429 (void) s10_freelwp(ttolwp(curthread)); 430 431 /* upon exit, free our proc brand data */ 432 kmem_free(p->p_brand_data, sizeof (s10_proc_data_t)); 433 p->p_brand_data = NULL; 434 } 435 436 void 437 s10_exec() 438 { 439 s10_proc_data_t *spd = curproc->p_brand_data; 440 441 ASSERT(curproc->p_brand == &s10_brand); 442 ASSERT(curproc->p_brand_data != NULL); 443 ASSERT(ttolwp(curthread)->lwp_brand != NULL); 444 445 /* 446 * We should only be called from exec(), when we know the process 447 * is single-threaded. 448 */ 449 ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw); 450 451 /* Upon exec, reset our lwp brand data. */ 452 (void) s10_freelwp(ttolwp(curthread)); 453 (void) s10_initlwp(ttolwp(curthread)); 454 455 /* 456 * Upon exec, reset all the proc brand data, except for the elf 457 * data associated with the executable we are exec'ing. 458 */ 459 spd->spd_handler = NULL; 460 } 461 462 /*ARGSUSED*/ 463 int 464 s10_initlwp(klwp_t *l) 465 { 466 ASSERT(l->lwp_procp->p_brand == &s10_brand); 467 ASSERT(l->lwp_procp->p_brand_data != NULL); 468 ASSERT(l->lwp_brand == NULL); 469 l->lwp_brand = (void *)-1; 470 return (0); 471 } 472 473 /*ARGSUSED*/ 474 void 475 s10_forklwp(klwp_t *p, klwp_t *c) 476 { 477 ASSERT(p->lwp_procp->p_brand == &s10_brand); 478 ASSERT(c->lwp_procp->p_brand == &s10_brand); 479 480 ASSERT(p->lwp_procp->p_brand_data != NULL); 481 ASSERT(c->lwp_procp->p_brand_data != NULL); 482 483 /* Both LWPs have already had been initialized via s10_initlwp() */ 484 ASSERT(p->lwp_brand != NULL); 485 ASSERT(c->lwp_brand != NULL); 486 487 #ifdef __amd64 488 /* 489 * Only correct the child's %fs register if the parent's %fs register 490 * is LWPFS_SEL. If the parent's %fs register is zero, then the Solaris 491 * 10 environment that we're emulating uses a version of libc that 492 * works when %fs is zero (i.e., it contains backports of CRs 6467491 493 * and 6501650). 494 */ 495 if (p->lwp_pcb.pcb_fs == LWPFS_SEL) 496 s10_amd64_correct_fsreg(c); 497 #endif /* __amd64 */ 498 } 499 500 /*ARGSUSED*/ 501 void 502 s10_freelwp(klwp_t *l) 503 { 504 ASSERT(l->lwp_procp->p_brand == &s10_brand); 505 ASSERT(l->lwp_procp->p_brand_data != NULL); 506 ASSERT(l->lwp_brand != NULL); 507 l->lwp_brand = NULL; 508 } 509 510 /*ARGSUSED*/ 511 void 512 s10_lwpexit(klwp_t *l) 513 { 514 ASSERT(l->lwp_procp->p_brand == &s10_brand); 515 ASSERT(l->lwp_procp->p_brand_data != NULL); 516 ASSERT(l->lwp_brand != NULL); 517 518 /* 519 * We should never be called for the last thread in a process. 520 * (That case is handled by s10_proc_exit().) There for this lwp 521 * must be exiting from a multi-threaded process. 522 */ 523 ASSERT(l->lwp_procp->p_tlist != l->lwp_procp->p_tlist->t_forw); 524 525 l->lwp_brand = NULL; 526 } 527 528 void 529 s10_free_brand_data(zone_t *zone) 530 { 531 kmem_free(zone->zone_brand_data, sizeof (s10_zone_data_t)); 532 } 533 534 void 535 s10_init_brand_data(zone_t *zone) 536 { 537 ASSERT(zone->zone_brand == &s10_brand); 538 ASSERT(zone->zone_brand_data == NULL); 539 zone->zone_brand_data = kmem_zalloc(sizeof (s10_zone_data_t), KM_SLEEP); 540 } 541 542 #if defined(_LP64) 543 static void 544 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst) 545 { 546 bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident)); 547 dst->e_type = src->e_type; 548 dst->e_machine = src->e_machine; 549 dst->e_version = src->e_version; 550 dst->e_entry = src->e_entry; 551 dst->e_phoff = src->e_phoff; 552 dst->e_shoff = src->e_shoff; 553 dst->e_flags = src->e_flags; 554 dst->e_ehsize = src->e_ehsize; 555 dst->e_phentsize = src->e_phentsize; 556 dst->e_phnum = src->e_phnum; 557 dst->e_shentsize = src->e_shentsize; 558 dst->e_shnum = src->e_shnum; 559 dst->e_shstrndx = src->e_shstrndx; 560 } 561 #endif /* _LP64 */ 562 563 int 564 s10_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, 565 int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred, 566 int brand_action) 567 { 568 vnode_t *nvp; 569 Ehdr ehdr; 570 Addr uphdr_vaddr; 571 intptr_t voffset; 572 int interp; 573 int i, err; 574 struct execenv env; 575 struct user *up = PTOU(curproc); 576 s10_proc_data_t *spd; 577 s10_elf_data_t sed, *sedp; 578 char *linker; 579 uintptr_t lddata; /* lddata of executable's linker */ 580 581 ASSERT(curproc->p_brand == &s10_brand); 582 ASSERT(curproc->p_brand_data != NULL); 583 584 spd = (s10_proc_data_t *)curproc->p_brand_data; 585 sedp = &spd->spd_elf_data; 586 587 args->brandname = S10_BRANDNAME; 588 589 /* 590 * We will exec the brand library and then map in the target 591 * application and (optionally) the brand's default linker. 592 */ 593 if (args->to_model == DATAMODEL_NATIVE) { 594 args->emulator = S10_LIB; 595 linker = S10_LINKER; 596 #if defined(_LP64) 597 } else { 598 args->emulator = S10_LIB32; 599 linker = S10_LINKER32; 600 #endif /* _LP64 */ 601 } 602 603 if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW, NULLVPP, 604 &nvp)) != 0) { 605 uprintf("%s: not found.", args->emulator); 606 return (err); 607 } 608 609 if (args->to_model == DATAMODEL_NATIVE) { 610 err = elfexec(nvp, uap, args, idatap, level + 1, execsz, 611 setid, exec_file, cred, brand_action); 612 #if defined(_LP64) 613 } else { 614 err = elf32exec(nvp, uap, args, idatap, level + 1, execsz, 615 setid, exec_file, cred, brand_action); 616 #endif /* _LP64 */ 617 } 618 VN_RELE(nvp); 619 if (err != 0) 620 return (err); 621 622 /* 623 * The u_auxv vectors are set up by elfexec to point to the brand 624 * emulation library and linker. Save these so they can be copied to 625 * the specific brand aux vectors. 626 */ 627 bzero(&sed, sizeof (sed)); 628 for (i = 0; i < __KERN_NAUXV_IMPL; i++) { 629 switch (up->u_auxv[i].a_type) { 630 case AT_SUN_LDDATA: 631 sed.sed_lddata = up->u_auxv[i].a_un.a_val; 632 break; 633 case AT_BASE: 634 sed.sed_base = up->u_auxv[i].a_un.a_val; 635 break; 636 case AT_ENTRY: 637 sed.sed_entry = up->u_auxv[i].a_un.a_val; 638 break; 639 case AT_PHDR: 640 sed.sed_phdr = up->u_auxv[i].a_un.a_val; 641 break; 642 case AT_PHENT: 643 sed.sed_phent = up->u_auxv[i].a_un.a_val; 644 break; 645 case AT_PHNUM: 646 sed.sed_phnum = up->u_auxv[i].a_un.a_val; 647 break; 648 default: 649 break; 650 } 651 } 652 /* Make sure the emulator has an entry point */ 653 ASSERT(sed.sed_entry != NULL); 654 ASSERT(sed.sed_phdr != NULL); 655 656 bzero(&env, sizeof (env)); 657 if (args->to_model == DATAMODEL_NATIVE) { 658 err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset, 659 exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase, 660 &env.ex_brksize, NULL); 661 #if defined(_LP64) 662 } else { 663 Elf32_Ehdr ehdr32; 664 Elf32_Addr uphdr_vaddr32; 665 err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32, 666 &voffset, exec_file, &interp, &env.ex_bssbase, 667 &env.ex_brkbase, &env.ex_brksize, NULL); 668 Ehdr32to64(&ehdr32, &ehdr); 669 if (uphdr_vaddr32 == (Elf32_Addr)-1) 670 uphdr_vaddr = (Addr)-1; 671 else 672 uphdr_vaddr = uphdr_vaddr32; 673 #endif /* _LP64 */ 674 } 675 if (err != 0) 676 return (err); 677 678 /* 679 * Save off the important properties of the executable. The brand 680 * library will ask us for this data later, when it is initializing 681 * and getting ready to transfer control to the brand application. 682 */ 683 if (uphdr_vaddr == (Addr)-1) 684 sedp->sed_phdr = voffset + ehdr.e_phoff; 685 else 686 sedp->sed_phdr = voffset + uphdr_vaddr; 687 sedp->sed_entry = voffset + ehdr.e_entry; 688 sedp->sed_phent = ehdr.e_phentsize; 689 sedp->sed_phnum = ehdr.e_phnum; 690 691 if (interp) { 692 if (ehdr.e_type == ET_DYN) { 693 /* 694 * This is a shared object executable, so we need to 695 * pick a reasonable place to put the heap. Just don't 696 * use the first page. 697 */ 698 env.ex_brkbase = (caddr_t)PAGESIZE; 699 env.ex_bssbase = (caddr_t)PAGESIZE; 700 } 701 702 /* 703 * If the program needs an interpreter (most do), map it in and 704 * store relevant information about it in the aux vector, where 705 * the brand library can find it. 706 */ 707 if ((err = lookupname(linker, UIO_SYSSPACE, 708 FOLLOW, NULLVPP, &nvp)) != 0) { 709 uprintf("%s: not found.", S10_LINKER); 710 return (err); 711 } 712 if (args->to_model == DATAMODEL_NATIVE) { 713 err = mapexec_brand(nvp, args, &ehdr, 714 &uphdr_vaddr, &voffset, exec_file, &interp, 715 NULL, NULL, NULL, &lddata); 716 #if defined(_LP64) 717 } else { 718 Elf32_Ehdr ehdr32; 719 Elf32_Addr uphdr_vaddr32; 720 err = mapexec32_brand(nvp, args, &ehdr32, 721 &uphdr_vaddr32, &voffset, exec_file, &interp, 722 NULL, NULL, NULL, &lddata); 723 Ehdr32to64(&ehdr32, &ehdr); 724 if (uphdr_vaddr32 == (Elf32_Addr)-1) 725 uphdr_vaddr = (Addr)-1; 726 else 727 uphdr_vaddr = uphdr_vaddr32; 728 #endif /* _LP64 */ 729 } 730 VN_RELE(nvp); 731 if (err != 0) 732 return (err); 733 734 /* 735 * Now that we know the base address of the brand's linker, 736 * place it in the aux vector. 737 */ 738 sedp->sed_base = voffset; 739 sedp->sed_ldentry = voffset + ehdr.e_entry; 740 sedp->sed_lddata = voffset + lddata; 741 } else { 742 /* 743 * This program has no interpreter. The brand library will 744 * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector, 745 * so in this case, put the entry point of the main executable 746 * there. 747 */ 748 if (ehdr.e_type == ET_EXEC) { 749 /* 750 * An executable with no interpreter, this must be a 751 * statically linked executable, which means we loaded 752 * it at the address specified in the elf header, in 753 * which case the e_entry field of the elf header is an 754 * absolute address. 755 */ 756 sedp->sed_ldentry = ehdr.e_entry; 757 sedp->sed_entry = ehdr.e_entry; 758 sedp->sed_lddata = NULL; 759 sedp->sed_base = NULL; 760 } else { 761 /* 762 * A shared object with no interpreter, we use the 763 * calculated address from above. 764 */ 765 sedp->sed_ldentry = sedp->sed_entry; 766 sedp->sed_entry = NULL; 767 sedp->sed_phdr = NULL; 768 sedp->sed_phent = NULL; 769 sedp->sed_phnum = NULL; 770 sedp->sed_lddata = NULL; 771 sedp->sed_base = voffset; 772 773 if (ehdr.e_type == ET_DYN) { 774 /* 775 * Delay setting the brkbase until the first 776 * call to brk(); see elfexec() for details. 777 */ 778 env.ex_bssbase = (caddr_t)0; 779 env.ex_brkbase = (caddr_t)0; 780 env.ex_brksize = 0; 781 } 782 } 783 } 784 785 env.ex_magic = elfmagic; 786 env.ex_vp = vp; 787 setexecenv(&env); 788 789 /* 790 * It's time to manipulate the process aux vectors. First 791 * we need to update the AT_SUN_AUXFLAGS aux vector to set 792 * the AF_SUN_NOPLM flag. 793 */ 794 if (args->to_model == DATAMODEL_NATIVE) { 795 auxv_t auxflags_auxv; 796 797 if (copyin(args->auxp_auxflags, &auxflags_auxv, 798 sizeof (auxflags_auxv)) != 0) 799 return (EFAULT); 800 801 ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS); 802 auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM; 803 if (copyout(&auxflags_auxv, args->auxp_auxflags, 804 sizeof (auxflags_auxv)) != 0) 805 return (EFAULT); 806 #if defined(_LP64) 807 } else { 808 auxv32_t auxflags_auxv32; 809 810 if (copyin(args->auxp_auxflags, &auxflags_auxv32, 811 sizeof (auxflags_auxv32)) != 0) 812 return (EFAULT); 813 814 ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS); 815 auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM; 816 if (copyout(&auxflags_auxv32, args->auxp_auxflags, 817 sizeof (auxflags_auxv32)) != 0) 818 return (EFAULT); 819 #endif /* _LP64 */ 820 } 821 822 /* Second, copy out the brand specific aux vectors. */ 823 if (args->to_model == DATAMODEL_NATIVE) { 824 auxv_t s10_auxv[] = { 825 { AT_SUN_BRAND_AUX1, 0 }, 826 { AT_SUN_BRAND_AUX2, 0 }, 827 { AT_SUN_BRAND_AUX3, 0 } 828 }; 829 830 ASSERT(s10_auxv[0].a_type == AT_SUN_BRAND_S10_LDDATA); 831 s10_auxv[0].a_un.a_val = sed.sed_lddata; 832 833 if (copyout(&s10_auxv, args->auxp_brand, 834 sizeof (s10_auxv)) != 0) 835 return (EFAULT); 836 #if defined(_LP64) 837 } else { 838 auxv32_t s10_auxv32[] = { 839 { AT_SUN_BRAND_AUX1, 0 }, 840 { AT_SUN_BRAND_AUX2, 0 }, 841 { AT_SUN_BRAND_AUX3, 0 } 842 }; 843 844 ASSERT(s10_auxv32[0].a_type == AT_SUN_BRAND_S10_LDDATA); 845 s10_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata; 846 if (copyout(&s10_auxv32, args->auxp_brand, 847 sizeof (s10_auxv32)) != 0) 848 return (EFAULT); 849 #endif /* _LP64 */ 850 } 851 852 /* 853 * Third, the the /proc aux vectors set up by elfexec() point to brand 854 * emulation library and it's linker. Copy these to the /proc brand 855 * specific aux vector, and update the regular /proc aux vectors to 856 * point to the executable (and it's linker). This will enable 857 * debuggers to access the executable via the usual /proc or elf notes 858 * aux vectors. 859 * 860 * The brand emulation library's linker will get it's aux vectors off 861 * the stack, and then update the stack with the executable's aux 862 * vectors before jumping to the executable's linker. 863 * 864 * Debugging the brand emulation library must be done from 865 * the global zone, where the librtld_db module knows how to fetch the 866 * brand specific aux vectors to access the brand emulation libraries 867 * linker. 868 */ 869 for (i = 0; i < __KERN_NAUXV_IMPL; i++) { 870 ulong_t val; 871 872 switch (up->u_auxv[i].a_type) { 873 case AT_SUN_BRAND_S10_LDDATA: 874 up->u_auxv[i].a_un.a_val = sed.sed_lddata; 875 continue; 876 case AT_BASE: 877 val = sedp->sed_base; 878 break; 879 case AT_ENTRY: 880 val = sedp->sed_entry; 881 break; 882 case AT_PHDR: 883 val = sedp->sed_phdr; 884 break; 885 case AT_PHENT: 886 val = sedp->sed_phent; 887 break; 888 case AT_PHNUM: 889 val = sedp->sed_phnum; 890 break; 891 case AT_SUN_LDDATA: 892 val = sedp->sed_lddata; 893 break; 894 default: 895 continue; 896 } 897 898 up->u_auxv[i].a_un.a_val = val; 899 if (val == NULL) { 900 /* Hide the entry for static binaries */ 901 up->u_auxv[i].a_type = AT_IGNORE; 902 } 903 } 904 905 /* 906 * The last thing we do here is clear spd->spd_handler. This is 907 * important because if we're already a branded process and if this 908 * exec succeeds, there is a window between when the exec() first 909 * returns to the userland of the new process and when our brand 910 * library get's initialized, during which we don't want system 911 * calls to be re-directed to our brand library since it hasn't 912 * been initialized yet. 913 */ 914 spd->spd_handler = NULL; 915 916 return (0); 917 } 918 919 920 int 921 _init(void) 922 { 923 int err; 924 925 /* 926 * Set up the table indicating which system calls we want to 927 * interpose on. We should probably build this automatically from 928 * a list of system calls that is shared with the user-space 929 * library. 930 */ 931 s10_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP); 932 s10_emulation_table[SYS_exec] = 1; /* 11 */ 933 s10_emulation_table[SYS_ioctl] = 1; /* 54 */ 934 s10_emulation_table[SYS_execve] = 1; /* 59 */ 935 s10_emulation_table[SYS_acctctl] = 1; /* 71 */ 936 s10_emulation_table[S10_SYS_issetugid] = 1; /* 75 */ 937 s10_emulation_table[SYS_uname] = 1; /* 135 */ 938 s10_emulation_table[SYS_systeminfo] = 1; /* 139 */ 939 #ifdef __amd64 940 s10_emulation_table[SYS_lwp_create] = 1; /* 159 */ 941 s10_emulation_table[SYS_lwp_private] = 1; /* 166 */ 942 #endif /* __amd64 */ 943 s10_emulation_table[SYS_pwrite] = 1; /* 174 */ 944 s10_emulation_table[SYS_auditsys] = 1; /* 186 */ 945 s10_emulation_table[SYS_sigqueue] = 1; /* 190 */ 946 s10_emulation_table[SYS_lwp_mutex_timedlock] = 1; /* 210 */ 947 s10_emulation_table[SYS_pwrite64] = 1; /* 223 */ 948 s10_emulation_table[SYS_zone] = 1; /* 227 */ 949 s10_emulation_table[SYS_lwp_mutex_trylock] = 1; /* 251 */ 950 951 err = mod_install(&modlinkage); 952 if (err) { 953 cmn_err(CE_WARN, "Couldn't install brand module"); 954 kmem_free(s10_emulation_table, NSYSCALL); 955 } 956 957 return (err); 958 } 959 960 int 961 _info(struct modinfo *modinfop) 962 { 963 return (mod_info(&modlinkage, modinfop)); 964 } 965 966 int 967 _fini(void) 968 { 969 int err; 970 971 /* 972 * If there are any zones using this brand, we can't allow it to be 973 * unloaded. 974 */ 975 if (brand_zone_count(&s10_brand)) 976 return (EBUSY); 977 978 kmem_free(s10_emulation_table, NSYSCALL); 979 s10_emulation_table = NULL; 980 981 err = mod_remove(&modlinkage); 982 if (err) 983 cmn_err(CE_WARN, "Couldn't unload s10 brand module"); 984 985 return (err); 986 } 987