1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* Copyright (c) 1988 AT&T */ 30 /* All Rights Reserved */ 31 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 #include <sys/sysmacros.h> 36 #include <sys/systm.h> 37 #include <sys/signal.h> 38 #include <sys/cred_impl.h> 39 #include <sys/policy.h> 40 #include <sys/user.h> 41 #include <sys/errno.h> 42 #include <sys/file.h> 43 #include <sys/vfs.h> 44 #include <sys/vnode.h> 45 #include <sys/mman.h> 46 #include <sys/acct.h> 47 #include <sys/cpuvar.h> 48 #include <sys/proc.h> 49 #include <sys/cmn_err.h> 50 #include <sys/debug.h> 51 #include <sys/pathname.h> 52 #include <sys/vm.h> 53 #include <sys/vtrace.h> 54 #include <sys/exec.h> 55 #include <sys/exechdr.h> 56 #include <sys/kmem.h> 57 #include <sys/prsystm.h> 58 #include <sys/modctl.h> 59 #include <sys/vmparam.h> 60 #include <sys/schedctl.h> 61 #include <sys/utrap.h> 62 #include <sys/systeminfo.h> 63 #include <sys/stack.h> 64 #include <sys/rctl.h> 65 #include <sys/dtrace.h> 66 #include <sys/lwpchan_impl.h> 67 #include <sys/pool.h> 68 #include <sys/sdt.h> 69 70 #include <c2/audit.h> 71 72 #include <vm/hat.h> 73 #include <vm/anon.h> 74 #include <vm/as.h> 75 #include <vm/seg.h> 76 #include <vm/seg_vn.h> 77 78 #define PRIV_RESET 0x01 /* needs to reset privs */ 79 #define PRIV_SETID 0x02 /* needs to change uids */ 80 #define PRIV_SETUGID 0x04 /* is setuid/setgid/forced privs */ 81 #define PRIV_INCREASE 0x08 /* child runs with more privs */ 82 83 static int execsetid(struct vnode *, struct vattr *, uid_t *, uid_t *); 84 static int hold_execsw(struct execsw *); 85 86 uint_t auxv_hwcap = 0; /* auxv AT_SUN_HWCAP value; determined on the fly */ 87 #if defined(_SYSCALL32_IMPL) 88 uint_t auxv_hwcap32 = 0; /* 32-bit version of auxv_hwcap */ 89 #endif 90 91 #if defined(__i386) || defined(__amd64) 92 extern void ldt_free(proc_t *p); 93 extern void ldt_load(void); 94 #endif 95 96 int exec_lpg_disable = 0; 97 98 #define PSUIDFLAGS (SNOCD|SUGID) 99 100 /* 101 * exec() - wrapper around exece providing NULL environment pointer 102 */ 103 int 104 exec(const char *fname, const char **argp) 105 { 106 return (exece(fname, argp, NULL)); 107 } 108 109 /* 110 * exece() - system call wrapper around exec_common() 111 */ 112 int 113 exece(const char *fname, const char **argp, const char **envp) 114 { 115 int error; 116 117 error = exec_common(fname, argp, envp); 118 return (error ? (set_errno(error)) : 0); 119 } 120 121 int 122 exec_common(const char *fname, const char **argp, const char **envp) 123 { 124 vnode_t *vp = NULL, *dir = NULL, *tmpvp = NULL; 125 proc_t *p = ttoproc(curthread); 126 klwp_t *lwp = ttolwp(curthread); 127 struct user *up = PTOU(p); 128 long execsz; /* temporary count of exec size */ 129 int i; 130 int error; 131 char exec_file[MAXCOMLEN+1]; 132 struct pathname pn; 133 struct pathname resolvepn; 134 struct uarg args; 135 struct execa ua; 136 k_sigset_t savedmask; 137 lwpdir_t *lwpdir = NULL; 138 lwpdir_t **tidhash; 139 lwpdir_t *old_lwpdir = NULL; 140 uint_t old_lwpdir_sz; 141 lwpdir_t **old_tidhash; 142 uint_t old_tidhash_sz; 143 lwpent_t *lep; 144 145 /* 146 * exec() is not supported for the /proc agent lwp. 147 */ 148 if (curthread == p->p_agenttp) 149 return (ENOTSUP); 150 151 if ((error = secpolicy_basic_exec(CRED())) != 0) 152 return (error); 153 154 /* 155 * Inform /proc that an exec() has started. 156 * Hold signals that are ignored by default so that we will 157 * not be interrupted by a signal that will be ignored after 158 * successful completion of gexec(). 159 */ 160 mutex_enter(&p->p_lock); 161 prexecstart(); 162 schedctl_finish_sigblock(curthread); 163 savedmask = curthread->t_hold; 164 sigorset(&curthread->t_hold, &ignoredefault); 165 mutex_exit(&p->p_lock); 166 167 /* 168 * Look up path name and remember last component for later. 169 * To help coreadm expand its %d token, we attempt to save 170 * the directory containing the executable in p_execdir. The 171 * first call to lookuppn() may fail and return EINVAL because 172 * dirvpp is non-NULL. In that case, we make a second call to 173 * lookuppn() with dirvpp set to NULL; p_execdir will be NULL, 174 * but coreadm is allowed to expand %d to the empty string and 175 * there are other cases in which that failure may occur. 176 */ 177 if ((error = pn_get((char *)fname, UIO_USERSPACE, &pn)) != 0) 178 goto out; 179 pn_alloc(&resolvepn); 180 if ((error = lookuppn(&pn, &resolvepn, FOLLOW, &dir, &vp)) != 0) { 181 pn_free(&resolvepn); 182 pn_free(&pn); 183 if (error != EINVAL) 184 goto out; 185 186 dir = NULL; 187 if ((error = pn_get((char *)fname, UIO_USERSPACE, &pn)) != 0) 188 goto out; 189 pn_alloc(&resolvepn); 190 if ((error = lookuppn(&pn, &resolvepn, FOLLOW, NULLVPP, 191 &vp)) != 0) { 192 pn_free(&resolvepn); 193 pn_free(&pn); 194 goto out; 195 } 196 } 197 if (vp == NULL) { 198 if (dir != NULL) 199 VN_RELE(dir); 200 error = ENOENT; 201 pn_free(&resolvepn); 202 pn_free(&pn); 203 goto out; 204 } 205 bzero(exec_file, MAXCOMLEN+1); 206 (void) strncpy(exec_file, pn.pn_path, MAXCOMLEN); 207 bzero(&args, sizeof (args)); 208 args.pathname = resolvepn.pn_path; 209 /* don't free resolvepn until we are done with args */ 210 pn_free(&pn); 211 212 /* 213 * Specific exec handlers, or policies determined via 214 * /etc/system may override the historical default. 215 */ 216 args.stk_prot = PROT_ZFOD; 217 args.dat_prot = PROT_ZFOD; 218 219 CPU_STATS_ADD_K(sys, sysexec, 1); 220 DTRACE_PROC1(exec, char *, args.pathname); 221 222 ua.fname = fname; 223 ua.argp = argp; 224 ua.envp = envp; 225 226 if ((error = gexec(&vp, &ua, &args, NULL, 0, &execsz, 227 exec_file, p->p_cred)) != 0) { 228 VN_RELE(vp); 229 if (dir != NULL) 230 VN_RELE(dir); 231 pn_free(&resolvepn); 232 goto fail; 233 } 234 235 /* 236 * Free floating point registers (sun4u only) 237 */ 238 ASSERT(lwp != NULL); 239 lwp_freeregs(lwp, 1); 240 241 /* 242 * Free device context 243 */ 244 if (curthread->t_ctx) 245 freectx(curthread, 1); 246 247 /* 248 * Remember file name for accounting; clear any cached DTrace predicate. 249 */ 250 up->u_acflag &= ~AFORK; 251 bcopy(exec_file, up->u_comm, MAXCOMLEN+1); 252 curthread->t_predcache = NULL; 253 254 /* 255 * Clear contract template state 256 */ 257 lwp_ctmpl_clear(lwp); 258 259 /* 260 * Save the directory in which we found the executable for expanding 261 * the %d token used in core file patterns. 262 */ 263 mutex_enter(&p->p_lock); 264 tmpvp = p->p_execdir; 265 p->p_execdir = dir; 266 if (p->p_execdir != NULL) 267 VN_HOLD(p->p_execdir); 268 mutex_exit(&p->p_lock); 269 270 if (tmpvp != NULL) 271 VN_RELE(tmpvp); 272 273 /* 274 * Reset stack state to the user stack, clear set of signals 275 * caught on the signal stack, and reset list of signals that 276 * restart system calls; the new program's environment should 277 * not be affected by detritus from the old program. Any 278 * pending held signals remain held, so don't clear t_hold. 279 */ 280 mutex_enter(&p->p_lock); 281 lwp->lwp_oldcontext = 0; 282 lwp->lwp_ustack = 0; 283 lwp->lwp_old_stk_ctl = 0; 284 sigemptyset(&up->u_signodefer); 285 sigemptyset(&up->u_sigonstack); 286 sigemptyset(&up->u_sigresethand); 287 lwp->lwp_sigaltstack.ss_sp = 0; 288 lwp->lwp_sigaltstack.ss_size = 0; 289 lwp->lwp_sigaltstack.ss_flags = SS_DISABLE; 290 291 /* 292 * Make saved resource limit == current resource limit. 293 */ 294 for (i = 0; i < RLIM_NLIMITS; i++) { 295 /*CONSTCOND*/ 296 if (RLIM_SAVED(i)) { 297 (void) rctl_rlimit_get(rctlproc_legacy[i], p, 298 &up->u_saved_rlimit[i]); 299 } 300 } 301 302 /* 303 * If the action was to catch the signal, then the action 304 * must be reset to SIG_DFL. 305 */ 306 sigdefault(p); 307 p->p_flag &= ~(SNOWAIT|SJCTL); 308 p->p_flag |= (SEXECED|SMSACCT|SMSFORK); 309 up->u_signal[SIGCLD - 1] = SIG_DFL; 310 311 /* 312 * Delete the dot4 sigqueues/signotifies. 313 */ 314 sigqfree(p); 315 316 mutex_exit(&p->p_lock); 317 318 mutex_enter(&p->p_pflock); 319 p->p_prof.pr_base = NULL; 320 p->p_prof.pr_size = 0; 321 p->p_prof.pr_off = 0; 322 p->p_prof.pr_scale = 0; 323 p->p_prof.pr_samples = 0; 324 mutex_exit(&p->p_pflock); 325 326 ASSERT(curthread->t_schedctl == NULL); 327 328 #if defined(__i386) || defined(__amd64) 329 /* If the process uses a private LDT then change it to default */ 330 if (p->p_ldt) 331 ldt_free(p); 332 #endif /* __i386 || __amd64 */ 333 334 #if defined(__amd64) 335 /* 336 * Make sure the process has the correct LDT descriptor for its data 337 * model. 338 */ 339 if (p->p_model == DATAMODEL_LP64) 340 p->p_ldt_desc = ldt0_default64_desc; 341 else 342 p->p_ldt_desc = ldt0_default_desc; 343 344 /* 345 * Ensure the change of LDT is propagated into the LDTR. 346 */ 347 kpreempt_disable(); 348 ldt_load(); 349 kpreempt_enable(); 350 #endif /* __amd64 */ 351 352 #if defined(__sparc) 353 if (p->p_utraps != NULL) 354 utrap_free(p); 355 #endif /* __sparc */ 356 357 /* 358 * Close all close-on-exec files. 359 */ 360 close_exec(P_FINFO(p)); 361 TRACE_2(TR_FAC_PROC, TR_PROC_EXEC, "proc_exec:p %p up %p", p, up); 362 setregs(&args); 363 364 /* Mark this as an executable vnode */ 365 mutex_enter(&vp->v_lock); 366 vp->v_flag |= VVMEXEC; 367 mutex_exit(&vp->v_lock); 368 369 VN_RELE(vp); 370 if (dir != NULL) 371 VN_RELE(dir); 372 pn_free(&resolvepn); 373 374 /* 375 * Allocate a new lwp directory and lwpid hash table if necessary. 376 */ 377 if (curthread->t_tid != 1 || p->p_lwpdir_sz != 2) { 378 lwpdir = kmem_zalloc(2 * sizeof (lwpdir_t), KM_SLEEP); 379 lwpdir->ld_next = lwpdir + 1; 380 tidhash = kmem_zalloc(2 * sizeof (lwpdir_t *), KM_SLEEP); 381 if (p->p_lwpdir != NULL) 382 lep = p->p_lwpdir[curthread->t_dslot].ld_entry; 383 else 384 lep = kmem_zalloc(sizeof (*lep), KM_SLEEP); 385 } 386 387 mutex_enter(&p->p_lock); 388 prbarrier(p); 389 390 /* 391 * Reset lwp id to the default value of 1. 392 * This is a single-threaded process now 393 * and lwp #1 is lwp_wait()able by default. 394 * The t_unpark flag should not be inherited. 395 */ 396 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 397 curthread->t_tid = 1; 398 curthread->t_unpark = 0; 399 curthread->t_proc_flag |= TP_TWAIT; 400 curthread->t_proc_flag &= ~TP_DAEMON; /* daemons shouldn't exec */ 401 p->p_lwpdaemon = 0; /* but oh well ... */ 402 p->p_lwpid = 1; 403 404 /* 405 * Install the newly-allocated lwp directory and lwpid hash table 406 * and insert the current thread into the new hash table. 407 */ 408 if (lwpdir != NULL) { 409 old_lwpdir = p->p_lwpdir; 410 old_lwpdir_sz = p->p_lwpdir_sz; 411 old_tidhash = p->p_tidhash; 412 old_tidhash_sz = p->p_tidhash_sz; 413 p->p_lwpdir = p->p_lwpfree = lwpdir; 414 p->p_lwpdir_sz = 2; 415 p->p_tidhash = tidhash; 416 p->p_tidhash_sz = 2; 417 lep->le_thread = curthread; 418 lep->le_lwpid = curthread->t_tid; 419 lep->le_start = curthread->t_start; 420 lwp_hash_in(p, lep); 421 } 422 /* 423 * Restore the saved signal mask and 424 * inform /proc that the exec() has finished. 425 */ 426 curthread->t_hold = savedmask; 427 prexecend(); 428 mutex_exit(&p->p_lock); 429 if (old_lwpdir) { 430 kmem_free(old_lwpdir, old_lwpdir_sz * sizeof (lwpdir_t)); 431 kmem_free(old_tidhash, old_tidhash_sz * sizeof (lwpdir_t *)); 432 } 433 ASSERT(error == 0); 434 DTRACE_PROC(exec__success); 435 return (0); 436 437 fail: 438 DTRACE_PROC1(exec__failure, int, error); 439 out: /* error return */ 440 mutex_enter(&p->p_lock); 441 curthread->t_hold = savedmask; 442 prexecend(); 443 mutex_exit(&p->p_lock); 444 ASSERT(error != 0); 445 return (error); 446 } 447 448 449 /* 450 * Perform generic exec duties and switchout to object-file specific 451 * handler. 452 */ 453 int 454 gexec( 455 struct vnode **vpp, 456 struct execa *uap, 457 struct uarg *args, 458 struct intpdata *idatap, 459 int level, 460 long *execsz, 461 caddr_t exec_file, 462 struct cred *cred) 463 { 464 struct vnode *vp; 465 proc_t *pp = ttoproc(curthread); 466 struct execsw *eswp; 467 int error = 0; 468 int suidflags = 0; 469 ssize_t resid; 470 uid_t uid, gid; 471 struct vattr vattr; 472 char magbuf[MAGIC_BYTES]; 473 int setid; 474 cred_t *oldcred, *newcred = NULL; 475 int privflags = 0; 476 477 /* 478 * If the SNOCD or SUGID flag is set, turn it off and remember the 479 * previous setting so we can restore it if we encounter an error. 480 */ 481 if (level == 0 && (pp->p_flag & PSUIDFLAGS)) { 482 mutex_enter(&pp->p_lock); 483 suidflags = pp->p_flag & PSUIDFLAGS; 484 pp->p_flag &= ~PSUIDFLAGS; 485 mutex_exit(&pp->p_lock); 486 } 487 488 if ((error = execpermissions(*vpp, &vattr, args)) != 0) 489 goto bad; 490 491 /* need to open vnode for stateful file systems like rfs */ 492 if ((error = VOP_OPEN(vpp, FREAD, CRED())) != 0) 493 goto bad; 494 vp = *vpp; 495 496 /* 497 * Note: to support binary compatibility with SunOS a.out 498 * executables, we read in the first four bytes, as the 499 * magic number is in bytes 2-3. 500 */ 501 if (error = vn_rdwr(UIO_READ, vp, magbuf, sizeof (magbuf), 502 (offset_t)0, UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) 503 goto bad; 504 if (resid != 0) 505 goto bad; 506 507 if ((eswp = findexec_by_hdr(magbuf)) == NULL) 508 goto bad; 509 510 if (level == 0 && 511 (privflags = execsetid(vp, &vattr, &uid, &gid)) != 0) { 512 513 newcred = cred = crdup(cred); 514 515 /* If we can, drop the PA bit */ 516 if ((privflags & PRIV_RESET) != 0) 517 priv_adjust_PA(cred); 518 519 if (privflags & PRIV_SETID) { 520 cred->cr_uid = uid; 521 cred->cr_gid = gid; 522 cred->cr_suid = uid; 523 cred->cr_sgid = gid; 524 } 525 526 /* 527 * Implement the privilege updates: 528 * 529 * Restrict with L: 530 * 531 * I' = I & L 532 * 533 * E' = P' = (I' + F) & A 534 * 535 * But if running under ptrace, we cap I with P. 536 */ 537 if ((privflags & PRIV_RESET) != 0) { 538 if ((privflags & PRIV_INCREASE) != 0 && 539 (pp->p_proc_flag & P_PR_PTRACE) != 0) 540 priv_intersect(&CR_OPPRIV(cred), 541 &CR_IPRIV(cred)); 542 priv_intersect(&CR_LPRIV(cred), &CR_IPRIV(cred)); 543 CR_EPRIV(cred) = CR_PPRIV(cred) = CR_IPRIV(cred); 544 priv_adjust_PA(cred); 545 } 546 } 547 548 /* SunOS 4.x buy-back */ 549 if ((vp->v_vfsp->vfs_flag & VFS_NOSETUID) && 550 (vattr.va_mode & (VSUID|VSGID))) { 551 cmn_err(CE_NOTE, 552 "!%s, uid %d: setuid execution not allowed, dev=%lx", 553 exec_file, cred->cr_uid, vp->v_vfsp->vfs_dev); 554 } 555 556 /* 557 * execsetid() told us whether or not we had to change the 558 * credentials of the process. In privflags, it told us 559 * whether we gained any privileges or executed a set-uid executable. 560 */ 561 setid = (privflags & (PRIV_SETUGID|PRIV_INCREASE)); 562 563 /* 564 * Use /etc/system variable to determine if the stack 565 * should be marked as executable by default. 566 */ 567 if (noexec_user_stack) 568 args->stk_prot &= ~PROT_EXEC; 569 570 args->execswp = eswp; /* Save execsw pointer in uarg for exec_func */ 571 572 /* 573 * Traditionally, the setid flags told the sub processes whether 574 * the file just executed was set-uid or set-gid; this caused 575 * some confusion as the 'setid' flag did not match the SUGID 576 * process flag which is only set when the uids/gids do not match. 577 * A script set-gid/set-uid to the real uid/gid would start with 578 * /dev/fd/X but an executable would happily trust LD_LIBRARY_PATH. 579 * Now we flag those cases where the calling process cannot 580 * be trusted to influence the newly exec'ed process, either 581 * because it runs with more privileges or when the uids/gids 582 * do in fact not match. 583 * This also makes the runtime linker agree with the on exec 584 * values of SNOCD and SUGID. 585 */ 586 error = (*eswp->exec_func)(vp, uap, args, idatap, level, execsz, 587 (setid & PRIV_INCREASE) != 0 || 588 cred->cr_uid != cred->cr_ruid || 589 (cred->cr_rgid != cred->cr_gid && 590 !supgroupmember(cred->cr_gid, cred)), exec_file, cred); 591 rw_exit(eswp->exec_lock); 592 if (error != 0) { 593 if (newcred != NULL) 594 crfree(newcred); 595 goto bad; 596 } 597 598 if (level == 0) { 599 mutex_enter(&pp->p_crlock); 600 if (newcred != NULL) { 601 /* 602 * Free the old credentials, and set the new ones. 603 * Do this for both the process and the (single) thread. 604 */ 605 crfree(pp->p_cred); 606 pp->p_cred = cred; /* cred already held for proc */ 607 crhold(cred); /* hold new cred for thread */ 608 /* 609 * DTrace accesses t_cred in probe context. t_cred 610 * must always be either NULL, or point to a valid, 611 * allocated cred structure. 612 */ 613 oldcred = curthread->t_cred; 614 curthread->t_cred = cred; 615 crfree(oldcred); 616 } 617 /* 618 * On emerging from a successful exec(), the saved 619 * uid and gid equal the effective uid and gid. 620 */ 621 cred->cr_suid = cred->cr_uid; 622 cred->cr_sgid = cred->cr_gid; 623 624 /* 625 * If the real and effective ids do not match, this 626 * is a setuid process that should not dump core. 627 * The group comparison is tricky; we prevent the code 628 * from flagging SNOCD when executing with an effective gid 629 * which is a supplementary group. 630 */ 631 if (cred->cr_ruid != cred->cr_uid || 632 (cred->cr_rgid != cred->cr_gid && 633 !supgroupmember(cred->cr_gid, cred)) || 634 (privflags & PRIV_INCREASE) != 0) 635 suidflags = PSUIDFLAGS; 636 else 637 suidflags = 0; 638 639 mutex_exit(&pp->p_crlock); 640 if (suidflags) { 641 mutex_enter(&pp->p_lock); 642 pp->p_flag |= suidflags; 643 mutex_exit(&pp->p_lock); 644 } 645 if (setid && (pp->p_proc_flag & P_PR_PTRACE) == 0) { 646 /* 647 * If process is traced via /proc, arrange to 648 * invalidate the associated /proc vnode. 649 */ 650 if (pp->p_plist || (pp->p_proc_flag & P_PR_TRACE)) 651 args->traceinval = 1; 652 } 653 if (pp->p_proc_flag & P_PR_PTRACE) 654 psignal(pp, SIGTRAP); 655 if (args->traceinval) 656 prinvalidate(&pp->p_user); 657 } 658 659 return (0); 660 bad: 661 if (error == 0) 662 error = ENOEXEC; 663 664 if (suidflags) { 665 mutex_enter(&pp->p_lock); 666 pp->p_flag |= suidflags; 667 mutex_exit(&pp->p_lock); 668 } 669 return (error); 670 } 671 672 extern char *execswnames[]; 673 674 struct execsw * 675 allocate_execsw(char *name, char *magic, size_t magic_size) 676 { 677 int i, j; 678 char *ename; 679 char *magicp; 680 681 mutex_enter(&execsw_lock); 682 for (i = 0; i < nexectype; i++) { 683 if (execswnames[i] == NULL) { 684 ename = kmem_alloc(strlen(name) + 1, KM_SLEEP); 685 (void) strcpy(ename, name); 686 execswnames[i] = ename; 687 /* 688 * Set the magic number last so that we 689 * don't need to hold the execsw_lock in 690 * findexectype(). 691 */ 692 magicp = kmem_alloc(magic_size, KM_SLEEP); 693 for (j = 0; j < magic_size; j++) 694 magicp[j] = magic[j]; 695 execsw[i].exec_magic = magicp; 696 mutex_exit(&execsw_lock); 697 return (&execsw[i]); 698 } 699 } 700 mutex_exit(&execsw_lock); 701 return (NULL); 702 } 703 704 /* 705 * Find the exec switch table entry with the corresponding magic string. 706 */ 707 struct execsw * 708 findexecsw(char *magic) 709 { 710 struct execsw *eswp; 711 712 for (eswp = execsw; eswp < &execsw[nexectype]; eswp++) { 713 ASSERT(eswp->exec_maglen <= MAGIC_BYTES); 714 if (magic && eswp->exec_maglen != 0 && 715 bcmp(magic, eswp->exec_magic, eswp->exec_maglen) == 0) 716 return (eswp); 717 } 718 return (NULL); 719 } 720 721 /* 722 * Find the execsw[] index for the given exec header string by looking for the 723 * magic string at a specified offset and length for each kind of executable 724 * file format until one matches. If no execsw[] entry is found, try to 725 * autoload a module for this magic string. 726 */ 727 struct execsw * 728 findexec_by_hdr(char *header) 729 { 730 struct execsw *eswp; 731 732 for (eswp = execsw; eswp < &execsw[nexectype]; eswp++) { 733 ASSERT(eswp->exec_maglen <= MAGIC_BYTES); 734 if (header && eswp->exec_maglen != 0 && 735 bcmp(&header[eswp->exec_magoff], eswp->exec_magic, 736 eswp->exec_maglen) == 0) { 737 if (hold_execsw(eswp) != 0) 738 return (NULL); 739 return (eswp); 740 } 741 } 742 return (NULL); /* couldn't find the type */ 743 } 744 745 /* 746 * Find the execsw[] index for the given magic string. If no execsw[] entry 747 * is found, try to autoload a module for this magic string. 748 */ 749 struct execsw * 750 findexec_by_magic(char *magic) 751 { 752 struct execsw *eswp; 753 754 for (eswp = execsw; eswp < &execsw[nexectype]; eswp++) { 755 ASSERT(eswp->exec_maglen <= MAGIC_BYTES); 756 if (magic && eswp->exec_maglen != 0 && 757 bcmp(magic, eswp->exec_magic, eswp->exec_maglen) == 0) { 758 if (hold_execsw(eswp) != 0) 759 return (NULL); 760 return (eswp); 761 } 762 } 763 return (NULL); /* couldn't find the type */ 764 } 765 766 static int 767 hold_execsw(struct execsw *eswp) 768 { 769 char *name; 770 771 rw_enter(eswp->exec_lock, RW_READER); 772 while (!LOADED_EXEC(eswp)) { 773 rw_exit(eswp->exec_lock); 774 name = execswnames[eswp-execsw]; 775 ASSERT(name); 776 if (modload("exec", name) == -1) 777 return (-1); 778 rw_enter(eswp->exec_lock, RW_READER); 779 } 780 return (0); 781 } 782 783 static int 784 execsetid(struct vnode *vp, struct vattr *vattrp, uid_t *uidp, uid_t *gidp) 785 { 786 proc_t *pp = ttoproc(curthread); 787 uid_t uid, gid; 788 cred_t *cr = pp->p_cred; 789 int privflags = 0; 790 791 /* 792 * Remember credentials. 793 */ 794 uid = cr->cr_uid; 795 gid = cr->cr_gid; 796 797 /* Will try to reset the PRIV_AWARE bit later. */ 798 if ((CR_FLAGS(cr) & (PRIV_AWARE|PRIV_AWARE_INHERIT)) == PRIV_AWARE) 799 privflags |= PRIV_RESET; 800 801 if ((vp->v_vfsp->vfs_flag & VFS_NOSETUID) == 0) { 802 /* 803 * Set-uid root execution only allowed if the limit set 804 * holds all unsafe privileges. 805 */ 806 if ((vattrp->va_mode & VSUID) && (vattrp->va_uid != 0 || 807 priv_issubset(&priv_unsafe, &CR_LPRIV(cr)))) { 808 uid = vattrp->va_uid; 809 privflags |= PRIV_SETUGID; 810 } 811 if (vattrp->va_mode & VSGID) { 812 gid = vattrp->va_gid; 813 privflags |= PRIV_SETUGID; 814 } 815 } 816 817 /* 818 * Do we need to change our credential anyway? 819 * This is the case when E != I or P != I, as 820 * we need to do the assignments (with F empty and A full) 821 * Or when I is not a subset of L; in that case we need to 822 * enforce L. 823 * 824 * I' = L & I 825 * 826 * E' = P' = (I' + F) & A 827 * or 828 * E' = P' = I' 829 */ 830 if (!priv_isequalset(&CR_EPRIV(cr), &CR_IPRIV(cr)) || 831 !priv_issubset(&CR_IPRIV(cr), &CR_LPRIV(cr)) || 832 !priv_isequalset(&CR_PPRIV(cr), &CR_IPRIV(cr))) 833 privflags |= PRIV_RESET; 834 835 /* 836 * When we introduce the "forced" set then we will need 837 * to set PRIV_INCREASE here if I not a subset of P. 838 * If the "allowed" set is introduced we will need to do 839 * a similar thing; however, it seems more reasonable to 840 * have the allowed set reduce "L": script language interpreters 841 * would typically have an allowed set of "all". 842 */ 843 844 /* 845 * Set setuid/setgid protections if no ptrace() compatibility. 846 * For privileged processes, honor setuid/setgid even in 847 * the presence of ptrace() compatibility. 848 */ 849 if (((pp->p_proc_flag & P_PR_PTRACE) == 0 || 850 PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, (uid == 0))) && 851 (cr->cr_uid != uid || 852 cr->cr_gid != gid || 853 cr->cr_suid != uid || 854 cr->cr_sgid != gid)) { 855 *uidp = uid; 856 *gidp = gid; 857 privflags |= PRIV_SETID; 858 } 859 return (privflags); 860 } 861 862 int 863 execpermissions(struct vnode *vp, struct vattr *vattrp, struct uarg *args) 864 { 865 int error; 866 proc_t *p = ttoproc(curthread); 867 868 vattrp->va_mask = AT_MODE | AT_UID | AT_GID | AT_SIZE; 869 if (error = VOP_GETATTR(vp, vattrp, ATTR_EXEC, p->p_cred)) 870 return (error); 871 /* 872 * Check the access mode. 873 * If VPROC, ask /proc if the file is an object file. 874 */ 875 if ((error = VOP_ACCESS(vp, VEXEC, 0, p->p_cred)) != 0 || 876 !(vp->v_type == VREG || (vp->v_type == VPROC && pr_isobject(vp))) || 877 (vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0 || 878 (vattrp->va_mode & (VEXEC|(VEXEC>>3)|(VEXEC>>6))) == 0) { 879 if (error == 0) 880 error = EACCES; 881 return (error); 882 } 883 884 if ((p->p_plist || (p->p_proc_flag & (P_PR_PTRACE|P_PR_TRACE))) && 885 (error = VOP_ACCESS(vp, VREAD, 0, p->p_cred))) { 886 /* 887 * If process is under ptrace(2) compatibility, 888 * fail the exec(2). 889 */ 890 if (p->p_proc_flag & P_PR_PTRACE) 891 goto bad; 892 /* 893 * Process is traced via /proc. 894 * Arrange to invalidate the /proc vnode. 895 */ 896 args->traceinval = 1; 897 } 898 return (0); 899 bad: 900 if (error == 0) 901 error = ENOEXEC; 902 return (error); 903 } 904 905 /* 906 * Map a section of an executable file into the user's 907 * address space. 908 */ 909 int 910 execmap(struct vnode *vp, caddr_t addr, size_t len, size_t zfodlen, 911 off_t offset, int prot, int page, uint_t szc) 912 { 913 int error = 0; 914 off_t oldoffset; 915 caddr_t zfodbase, oldaddr; 916 size_t end, oldlen; 917 size_t zfoddiff; 918 label_t ljb; 919 proc_t *p = ttoproc(curthread); 920 921 oldaddr = addr; 922 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 923 if (len) { 924 oldlen = len; 925 len += ((size_t)oldaddr - (size_t)addr); 926 oldoffset = offset; 927 offset = (off_t)((uintptr_t)offset & PAGEMASK); 928 if (page) { 929 spgcnt_t prefltmem, availm, npages; 930 int preread; 931 uint_t mflag = MAP_PRIVATE | MAP_FIXED; 932 933 if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_EXEC) { 934 mflag |= MAP_TEXT; 935 } else { 936 mflag |= MAP_INITDATA; 937 } 938 939 if (valid_usr_range(addr, len, prot, p->p_as, 940 p->p_as->a_userlimit) != RANGE_OKAY) { 941 error = ENOMEM; 942 goto bad; 943 } 944 if (error = VOP_MAP(vp, (offset_t)offset, 945 p->p_as, &addr, len, prot, PROT_ALL, 946 mflag, CRED())) 947 goto bad; 948 949 /* 950 * If the segment can fit, then we prefault 951 * the entire segment in. This is based on the 952 * model that says the best working set of a 953 * small program is all of its pages. 954 */ 955 npages = (spgcnt_t)btopr(len); 956 prefltmem = freemem - desfree; 957 preread = 958 (npages < prefltmem && len < PGTHRESH) ? 1 : 0; 959 960 /* 961 * If we aren't prefaulting the segment, 962 * increment "deficit", if necessary to ensure 963 * that pages will become available when this 964 * process starts executing. 965 */ 966 availm = freemem - lotsfree; 967 if (preread == 0 && npages > availm && 968 deficit < lotsfree) { 969 deficit += MIN((pgcnt_t)(npages - availm), 970 lotsfree - deficit); 971 } 972 973 if (preread) { 974 TRACE_2(TR_FAC_PROC, TR_EXECMAP_PREREAD, 975 "execmap preread:freemem %d size %lu", 976 freemem, len); 977 (void) as_fault(p->p_as->a_hat, p->p_as, 978 (caddr_t)addr, len, F_INVAL, S_READ); 979 } 980 } else { 981 if (valid_usr_range(addr, len, prot, p->p_as, 982 p->p_as->a_userlimit) != RANGE_OKAY) { 983 error = ENOMEM; 984 goto bad; 985 } 986 987 if (error = as_map(p->p_as, addr, len, 988 segvn_create, zfod_argsp)) 989 goto bad; 990 /* 991 * Read in the segment in one big chunk. 992 */ 993 if (error = vn_rdwr(UIO_READ, vp, (caddr_t)oldaddr, 994 oldlen, (offset_t)oldoffset, UIO_USERSPACE, 0, 995 (rlim64_t)0, CRED(), (ssize_t *)0)) 996 goto bad; 997 /* 998 * Now set protections. 999 */ 1000 if (prot != PROT_ZFOD) { 1001 (void) as_setprot(p->p_as, (caddr_t)addr, 1002 len, prot); 1003 } 1004 } 1005 } 1006 1007 if (zfodlen) { 1008 end = (size_t)addr + len; 1009 zfodbase = (caddr_t)roundup(end, PAGESIZE); 1010 zfoddiff = (uintptr_t)zfodbase - end; 1011 if (zfoddiff) { 1012 if (on_fault(&ljb)) { 1013 no_fault(); 1014 error = EFAULT; 1015 goto bad; 1016 } 1017 uzero((void *)end, zfoddiff); 1018 no_fault(); 1019 } 1020 if (zfodlen > zfoddiff) { 1021 struct segvn_crargs crargs = 1022 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 1023 1024 zfodlen -= zfoddiff; 1025 if (valid_usr_range(zfodbase, zfodlen, prot, p->p_as, 1026 p->p_as->a_userlimit) != RANGE_OKAY) { 1027 error = ENOMEM; 1028 goto bad; 1029 } 1030 crargs.szc = szc; 1031 if (error = as_map(p->p_as, (caddr_t)zfodbase, 1032 zfodlen, segvn_create, &crargs)) 1033 goto bad; 1034 if (prot != PROT_ZFOD) { 1035 (void) as_setprot(p->p_as, (caddr_t)zfodbase, 1036 zfodlen, prot); 1037 } 1038 } 1039 } 1040 return (0); 1041 bad: 1042 return (error); 1043 } 1044 1045 void 1046 setexecenv(struct execenv *ep) 1047 { 1048 proc_t *p = ttoproc(curthread); 1049 klwp_t *lwp = ttolwp(curthread); 1050 struct vnode *vp; 1051 1052 p->p_bssbase = ep->ex_bssbase; 1053 p->p_brkbase = ep->ex_brkbase; 1054 p->p_brksize = ep->ex_brksize; 1055 if (p->p_exec) 1056 VN_RELE(p->p_exec); /* out with the old */ 1057 vp = p->p_exec = ep->ex_vp; 1058 if (vp != NULL) 1059 VN_HOLD(vp); /* in with the new */ 1060 1061 lwp->lwp_sigaltstack.ss_sp = 0; 1062 lwp->lwp_sigaltstack.ss_size = 0; 1063 lwp->lwp_sigaltstack.ss_flags = SS_DISABLE; 1064 } 1065 1066 int 1067 execopen(struct vnode **vpp, int *fdp) 1068 { 1069 struct vnode *vp = *vpp; 1070 file_t *fp; 1071 int error = 0; 1072 int filemode = FREAD; 1073 1074 VN_HOLD(vp); /* open reference */ 1075 if (error = falloc(NULL, filemode, &fp, fdp)) { 1076 VN_RELE(vp); 1077 *fdp = -1; /* just in case falloc changed value */ 1078 return (error); 1079 } 1080 if (error = VOP_OPEN(&vp, filemode, CRED())) { 1081 VN_RELE(vp); 1082 setf(*fdp, NULL); 1083 unfalloc(fp); 1084 *fdp = -1; 1085 return (error); 1086 } 1087 *vpp = vp; /* vnode should not have changed */ 1088 fp->f_vnode = vp; 1089 mutex_exit(&fp->f_tlock); 1090 setf(*fdp, fp); 1091 return (0); 1092 } 1093 1094 int 1095 execclose(int fd) 1096 { 1097 return (closeandsetf(fd, NULL)); 1098 } 1099 1100 1101 /* 1102 * noexec stub function. 1103 */ 1104 /*ARGSUSED*/ 1105 int 1106 noexec( 1107 struct vnode *vp, 1108 struct execa *uap, 1109 struct uarg *args, 1110 struct intpdata *idatap, 1111 int level, 1112 long *execsz, 1113 int setid, 1114 caddr_t exec_file, 1115 struct cred *cred) 1116 { 1117 cmn_err(CE_WARN, "missing exec capability for %s", uap->fname); 1118 return (ENOEXEC); 1119 } 1120 1121 /* 1122 * Support routines for building a user stack. 1123 * 1124 * execve(path, argv, envp) must construct a new stack with the specified 1125 * arguments and environment variables (see exec_args() for a description 1126 * of the user stack layout). To do this, we copy the arguments and 1127 * environment variables from the old user address space into the kernel, 1128 * free the old as, create the new as, and copy our buffered information 1129 * to the new stack. Our kernel buffer has the following structure: 1130 * 1131 * +-----------------------+ <--- stk_base + stk_size 1132 * | string offsets | 1133 * +-----------------------+ <--- stk_offp 1134 * | | 1135 * | STK_AVAIL() space | 1136 * | | 1137 * +-----------------------+ <--- stk_strp 1138 * | strings | 1139 * +-----------------------+ <--- stk_base 1140 * 1141 * When we add a string, we store the string's contents (including the null 1142 * terminator) at stk_strp, and we store the offset of the string relative to 1143 * stk_base at --stk_offp. At strings are added, stk_strp increases and 1144 * stk_offp decreases. The amount of space remaining, STK_AVAIL(), is just 1145 * the difference between these pointers. If we run out of space, we return 1146 * an error and exec_args() starts all over again with a buffer twice as large. 1147 * When we're all done, the kernel buffer looks like this: 1148 * 1149 * +-----------------------+ <--- stk_base + stk_size 1150 * | argv[0] offset | 1151 * +-----------------------+ 1152 * | ... | 1153 * +-----------------------+ 1154 * | argv[argc-1] offset | 1155 * +-----------------------+ 1156 * | envp[0] offset | 1157 * +-----------------------+ 1158 * | ... | 1159 * +-----------------------+ 1160 * | envp[envc-1] offset | 1161 * +-----------------------+ 1162 * | AT_SUN_PLATFORM offset| 1163 * +-----------------------+ 1164 * | AT_SUN_EXECNAME offset| 1165 * +-----------------------+ <--- stk_offp 1166 * | | 1167 * | STK_AVAIL() space | 1168 * | | 1169 * +-----------------------+ <--- stk_strp 1170 * | AT_SUN_EXECNAME offset| 1171 * +-----------------------+ 1172 * | AT_SUN_PLATFORM offset| 1173 * +-----------------------+ 1174 * | envp[envc-1] string | 1175 * +-----------------------+ 1176 * | ... | 1177 * +-----------------------+ 1178 * | envp[0] string | 1179 * +-----------------------+ 1180 * | argv[argc-1] string | 1181 * +-----------------------+ 1182 * | ... | 1183 * +-----------------------+ 1184 * | argv[0] string | 1185 * +-----------------------+ <--- stk_base 1186 */ 1187 1188 #define STK_AVAIL(args) ((char *)(args)->stk_offp - (args)->stk_strp) 1189 1190 /* 1191 * Add a string to the stack. 1192 */ 1193 static int 1194 stk_add(uarg_t *args, const char *sp, enum uio_seg segflg) 1195 { 1196 int error; 1197 size_t len; 1198 1199 if (STK_AVAIL(args) < sizeof (int)) 1200 return (E2BIG); 1201 *--args->stk_offp = args->stk_strp - args->stk_base; 1202 1203 if (segflg == UIO_USERSPACE) { 1204 error = copyinstr(sp, args->stk_strp, STK_AVAIL(args), &len); 1205 if (error != 0) 1206 return (error); 1207 } else { 1208 len = strlen(sp) + 1; 1209 if (len > STK_AVAIL(args)) 1210 return (E2BIG); 1211 bcopy(sp, args->stk_strp, len); 1212 } 1213 1214 args->stk_strp += len; 1215 1216 return (0); 1217 } 1218 1219 static int 1220 stk_getptr(uarg_t *args, char *src, char **dst) 1221 { 1222 int error; 1223 1224 if (args->from_model == DATAMODEL_NATIVE) { 1225 ulong_t ptr; 1226 error = fulword(src, &ptr); 1227 *dst = (caddr_t)ptr; 1228 } else { 1229 uint32_t ptr; 1230 error = fuword32(src, &ptr); 1231 *dst = (caddr_t)(uintptr_t)ptr; 1232 } 1233 return (error); 1234 } 1235 1236 static int 1237 stk_putptr(uarg_t *args, char *addr, char *value) 1238 { 1239 if (args->to_model == DATAMODEL_NATIVE) 1240 return (sulword(addr, (ulong_t)value)); 1241 else 1242 return (suword32(addr, (uint32_t)(uintptr_t)value)); 1243 } 1244 1245 static int 1246 stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) 1247 { 1248 char *sp; 1249 int argc, error; 1250 int argv_empty = 0; 1251 size_t ptrsize = args->from_ptrsize; 1252 size_t size, pad; 1253 char *argv = (char *)uap->argp; 1254 char *envp = (char *)uap->envp; 1255 1256 /* 1257 * Copy interpreter's name and argument to argv[0] and argv[1]. 1258 */ 1259 if (intp != NULL && intp->intp_name != NULL) { 1260 if ((error = stk_add(args, intp->intp_name, UIO_SYSSPACE)) != 0) 1261 return (error); 1262 if (intp->intp_arg != NULL && 1263 (error = stk_add(args, intp->intp_arg, UIO_SYSSPACE)) != 0) 1264 return (error); 1265 if (args->fname != NULL) 1266 error = stk_add(args, args->fname, UIO_SYSSPACE); 1267 else 1268 error = stk_add(args, uap->fname, UIO_USERSPACE); 1269 if (error) 1270 return (error); 1271 1272 /* 1273 * Check for an empty argv[]. 1274 */ 1275 if (stk_getptr(args, argv, &sp)) 1276 return (EFAULT); 1277 if (sp == NULL) 1278 argv_empty = 1; 1279 1280 argv += ptrsize; /* ignore original argv[0] */ 1281 } 1282 1283 if (argv_empty == 0) { 1284 /* 1285 * Add argv[] strings to the stack. 1286 */ 1287 for (;;) { 1288 if (stk_getptr(args, argv, &sp)) 1289 return (EFAULT); 1290 if (sp == NULL) 1291 break; 1292 if ((error = stk_add(args, sp, UIO_USERSPACE)) != 0) 1293 return (error); 1294 argv += ptrsize; 1295 } 1296 } 1297 argc = (int *)(args->stk_base + args->stk_size) - args->stk_offp; 1298 args->arglen = args->stk_strp - args->stk_base; 1299 1300 /* 1301 * Add environ[] strings to the stack. 1302 */ 1303 if (envp != NULL) { 1304 for (;;) { 1305 if (stk_getptr(args, envp, &sp)) 1306 return (EFAULT); 1307 if (sp == NULL) 1308 break; 1309 if ((error = stk_add(args, sp, UIO_USERSPACE)) != 0) 1310 return (error); 1311 envp += ptrsize; 1312 } 1313 } 1314 args->na = (int *)(args->stk_base + args->stk_size) - args->stk_offp; 1315 args->ne = args->na - argc; 1316 1317 /* 1318 * Add AT_SUN_PLATFORM and AT_SUN_EXECNAME strings to the stack. 1319 */ 1320 if (auxvpp != NULL && *auxvpp != NULL) { 1321 if ((error = stk_add(args, platform, UIO_SYSSPACE)) != 0) 1322 return (error); 1323 if ((error = stk_add(args, args->pathname, UIO_SYSSPACE)) != 0) 1324 return (error); 1325 } 1326 1327 /* 1328 * Compute the size of the stack. This includes all the pointers, 1329 * the space reserved for the aux vector, and all the strings. 1330 * The total number of pointers is args->na (which is argc + envc) 1331 * plus 4 more: (1) a pointer's worth of space for argc; (2) the NULL 1332 * after the last argument (i.e. argv[argc]); (3) the NULL after the 1333 * last environment variable (i.e. envp[envc]); and (4) the NULL after 1334 * all the strings, at the very top of the stack. 1335 */ 1336 size = (args->na + 4) * args->to_ptrsize + args->auxsize + 1337 (args->stk_strp - args->stk_base); 1338 1339 /* 1340 * Pad the string section with zeroes to align the stack size. 1341 */ 1342 pad = P2NPHASE(size, args->stk_align); 1343 1344 if (STK_AVAIL(args) < pad) 1345 return (E2BIG); 1346 1347 args->usrstack_size = size + pad; 1348 1349 while (pad-- != 0) 1350 *args->stk_strp++ = 0; 1351 1352 args->nc = args->stk_strp - args->stk_base; 1353 1354 return (0); 1355 } 1356 1357 static int 1358 stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up) 1359 { 1360 size_t ptrsize = args->to_ptrsize; 1361 ssize_t pslen; 1362 char *kstrp = args->stk_base; 1363 char *ustrp = usrstack - args->nc - ptrsize; 1364 char *usp = usrstack - args->usrstack_size; 1365 int *offp = (int *)(args->stk_base + args->stk_size); 1366 int envc = args->ne; 1367 int argc = args->na - envc; 1368 int i; 1369 1370 /* 1371 * Record argc for /proc. 1372 */ 1373 up->u_argc = argc; 1374 1375 /* 1376 * Put argc on the stack. Note that even though it's an int, 1377 * it always consumes ptrsize bytes (for alignment). 1378 */ 1379 if (stk_putptr(args, usp, (char *)(uintptr_t)argc)) 1380 return (-1); 1381 1382 /* 1383 * Add argc space (ptrsize) to usp and record argv for /proc. 1384 */ 1385 up->u_argv = (uintptr_t)(usp += ptrsize); 1386 1387 /* 1388 * Put the argv[] pointers on the stack. 1389 */ 1390 for (i = 0; i < argc; i++, usp += ptrsize) 1391 if (stk_putptr(args, usp, &ustrp[*--offp])) 1392 return (-1); 1393 1394 /* 1395 * Copy arguments to u_psargs. 1396 */ 1397 pslen = MIN(args->arglen, PSARGSZ) - 1; 1398 for (i = 0; i < pslen; i++) 1399 up->u_psargs[i] = (kstrp[i] == '\0' ? ' ' : kstrp[i]); 1400 while (i < PSARGSZ) 1401 up->u_psargs[i++] = '\0'; 1402 1403 /* 1404 * Add space for argv[]'s NULL terminator (ptrsize) to usp and 1405 * record envp for /proc. 1406 */ 1407 up->u_envp = (uintptr_t)(usp += ptrsize); 1408 1409 /* 1410 * Put the envp[] pointers on the stack. 1411 */ 1412 for (i = 0; i < envc; i++, usp += ptrsize) 1413 if (stk_putptr(args, usp, &ustrp[*--offp])) 1414 return (-1); 1415 1416 /* 1417 * Add space for envp[]'s NULL terminator (ptrsize) to usp and 1418 * remember where the stack ends, which is also where auxv begins. 1419 */ 1420 args->stackend = usp += ptrsize; 1421 1422 /* 1423 * Put all the argv[], envp[], and auxv strings on the stack. 1424 */ 1425 if (copyout(args->stk_base, ustrp, args->nc)) 1426 return (-1); 1427 1428 /* 1429 * Fill in the aux vector now that we know the user stack addresses 1430 * for the AT_SUN_PLATFORM and AT_SUN_EXECNAME strings. 1431 */ 1432 if (auxvpp != NULL && *auxvpp != NULL) { 1433 if (args->to_model == DATAMODEL_NATIVE) { 1434 auxv_t **a = (auxv_t **)auxvpp; 1435 ADDAUX(*a, AT_SUN_PLATFORM, (long)&ustrp[*--offp]) 1436 ADDAUX(*a, AT_SUN_EXECNAME, (long)&ustrp[*--offp]) 1437 } else { 1438 auxv32_t **a = (auxv32_t **)auxvpp; 1439 ADDAUX(*a, 1440 AT_SUN_PLATFORM, (int)(uintptr_t)&ustrp[*--offp]) 1441 ADDAUX(*a, 1442 AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp]); 1443 } 1444 } 1445 1446 return (0); 1447 } 1448 1449 #ifdef DEBUG 1450 int mpss_brkpgszsel = 0; 1451 int mpss_stkpgszsel = 0; 1452 #endif 1453 1454 /* 1455 * Initialize a new user stack with the specified arguments and environment. 1456 * The initial user stack layout is as follows: 1457 * 1458 * User Stack 1459 * +---------------+ <--- curproc->p_usrstack 1460 * | NULL | 1461 * +---------------+ 1462 * | | 1463 * | auxv strings | 1464 * | | 1465 * +---------------+ 1466 * | | 1467 * | envp strings | 1468 * | | 1469 * +---------------+ 1470 * | | 1471 * | argv strings | 1472 * | | 1473 * +---------------+ <--- ustrp 1474 * | | 1475 * | aux vector | 1476 * | | 1477 * +---------------+ <--- auxv 1478 * | NULL | 1479 * +---------------+ 1480 * | envp[envc-1] | 1481 * +---------------+ 1482 * | ... | 1483 * +---------------+ 1484 * | envp[0] | 1485 * +---------------+ <--- envp[] 1486 * | NULL | 1487 * +---------------+ 1488 * | argv[argc-1] | 1489 * +---------------+ 1490 * | ... | 1491 * +---------------+ 1492 * | argv[0] | 1493 * +---------------+ <--- argv[] 1494 * | argc | 1495 * +---------------+ <--- stack base 1496 */ 1497 int 1498 exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) 1499 { 1500 size_t size; 1501 int error; 1502 proc_t *p = ttoproc(curthread); 1503 user_t *up = PTOU(p); 1504 char *usrstack; 1505 rctl_entity_p_t e; 1506 1507 struct as *as; 1508 1509 args->from_model = p->p_model; 1510 if (p->p_model == DATAMODEL_NATIVE) { 1511 args->from_ptrsize = sizeof (long); 1512 } else { 1513 args->from_ptrsize = sizeof (int32_t); 1514 } 1515 1516 if (args->to_model == DATAMODEL_NATIVE) { 1517 args->to_ptrsize = sizeof (long); 1518 args->ncargs = NCARGS; 1519 args->stk_align = STACK_ALIGN; 1520 usrstack = (char *)USRSTACK; 1521 } else { 1522 args->to_ptrsize = sizeof (int32_t); 1523 args->ncargs = NCARGS32; 1524 args->stk_align = STACK_ALIGN32; 1525 usrstack = (char *)USRSTACK32; 1526 } 1527 1528 ASSERT(P2PHASE((uintptr_t)usrstack, args->stk_align) == 0); 1529 1530 #if defined(__sparc) 1531 /* 1532 * Make sure user register windows are empty before 1533 * attempting to make a new stack. 1534 */ 1535 (void) flush_user_windows_to_stack(NULL); 1536 #endif 1537 1538 for (size = PAGESIZE; ; size *= 2) { 1539 args->stk_size = size; 1540 args->stk_base = kmem_alloc(size, KM_SLEEP); 1541 args->stk_strp = args->stk_base; 1542 args->stk_offp = (int *)(args->stk_base + size); 1543 error = stk_copyin(uap, args, intp, auxvpp); 1544 if (error == 0) 1545 break; 1546 kmem_free(args->stk_base, size); 1547 if (error != E2BIG && error != ENAMETOOLONG) 1548 return (error); 1549 if (size >= args->ncargs) 1550 return (E2BIG); 1551 } 1552 1553 size = args->usrstack_size; 1554 1555 ASSERT(error == 0); 1556 ASSERT(P2PHASE(size, args->stk_align) == 0); 1557 ASSERT((ssize_t)STK_AVAIL(args) >= 0); 1558 1559 if (size > args->ncargs) { 1560 kmem_free(args->stk_base, args->stk_size); 1561 return (E2BIG); 1562 } 1563 1564 /* 1565 * Leave only the current lwp and force the other lwps to exit. 1566 * If another lwp beat us to the punch by calling exit(), bail out. 1567 */ 1568 if ((error = exitlwps(0)) != 0) { 1569 kmem_free(args->stk_base, args->stk_size); 1570 return (error); 1571 } 1572 1573 /* 1574 * Revoke any doors created by the process. 1575 */ 1576 if (p->p_door_list) 1577 door_exit(); 1578 1579 /* 1580 * Release schedctl data structures. 1581 */ 1582 if (p->p_pagep) 1583 schedctl_proc_cleanup(); 1584 1585 /* 1586 * Clean up any DTrace helpers for the process. 1587 */ 1588 if (p->p_dtrace_helpers != NULL) { 1589 ASSERT(dtrace_helpers_cleanup != NULL); 1590 (*dtrace_helpers_cleanup)(); 1591 } 1592 1593 mutex_enter(&p->p_lock); 1594 /* 1595 * Cleanup the DTrace provider associated with this process. 1596 */ 1597 if (p->p_dtrace_probes) { 1598 ASSERT(dtrace_fasttrap_exec_ptr != NULL); 1599 dtrace_fasttrap_exec_ptr(p); 1600 } 1601 mutex_exit(&p->p_lock); 1602 1603 /* 1604 * discard the lwpchan cache. 1605 */ 1606 if (p->p_lcp != NULL) 1607 lwpchan_destroy_cache(1); 1608 1609 /* 1610 * Delete the POSIX timers. 1611 */ 1612 if (p->p_itimer != NULL) 1613 timer_exit(); 1614 1615 #ifdef C2_AUDIT 1616 if (audit_active) 1617 audit_exec(args->stk_base, args->stk_base + args->arglen, 1618 args->na - args->ne, args->ne); 1619 #endif 1620 1621 /* 1622 * Ensure that we don't change resource associations while we 1623 * change address spaces. 1624 */ 1625 mutex_enter(&p->p_lock); 1626 pool_barrier_enter(); 1627 mutex_exit(&p->p_lock); 1628 1629 /* 1630 * Destroy the old address space and create a new one. 1631 * From here on, any errors are fatal to the exec()ing process. 1632 * On error we return -1, which means the caller must SIGKILL 1633 * the process. 1634 */ 1635 relvm(); 1636 1637 mutex_enter(&p->p_lock); 1638 pool_barrier_exit(); 1639 mutex_exit(&p->p_lock); 1640 1641 up->u_execsw = args->execswp; 1642 1643 p->p_brkbase = NULL; 1644 p->p_brksize = 0; 1645 p->p_stksize = 0; 1646 p->p_model = args->to_model; 1647 p->p_usrstack = usrstack; 1648 p->p_stkprot = args->stk_prot; 1649 p->p_datprot = args->dat_prot; 1650 1651 /* 1652 * Reset resource controls such that all controls are again active as 1653 * well as appropriate to the potentially new address model for the 1654 * process. 1655 */ 1656 e.rcep_p.proc = p; 1657 e.rcep_t = RCENTITY_PROCESS; 1658 rctl_set_reset(p->p_rctls, p, &e); 1659 1660 if (exec_lpg_disable == 0) { 1661 #ifdef DEBUG 1662 uint_t pgsizes = page_num_pagesizes(); 1663 uint_t szc; 1664 #endif 1665 p->p_brkpageszc = args->brkpageszc; 1666 p->p_stkpageszc = args->stkpageszc; 1667 1668 if (p->p_brkpageszc == 0) { 1669 p->p_brkpageszc = page_szc(map_pgsz(MAPPGSZ_HEAP, 1670 p, 0, 0, NULL)); 1671 } 1672 if (p->p_stkpageszc == 0) { 1673 p->p_stkpageszc = page_szc(map_pgsz(MAPPGSZ_STK, 1674 p, 0, 0, NULL)); 1675 } 1676 1677 #ifdef DEBUG 1678 if (mpss_brkpgszsel != 0) { 1679 if (mpss_brkpgszsel == -1) { 1680 szc = ((uint_t)gethrtime() >> 8) % pgsizes; 1681 } else { 1682 szc = mpss_brkpgszsel % pgsizes; 1683 } 1684 p->p_brkpageszc = szc; 1685 } 1686 1687 if (mpss_stkpgszsel != 0) { 1688 if (mpss_stkpgszsel == -1) { 1689 szc = ((uint_t)gethrtime() >> 7) % pgsizes; 1690 } else { 1691 szc = mpss_stkpgszsel % pgsizes; 1692 } 1693 p->p_stkpageszc = szc; 1694 } 1695 1696 #endif 1697 mutex_enter(&p->p_lock); 1698 p->p_flag |= SAUTOLPG; /* kernel controls page sizes */ 1699 mutex_exit(&p->p_lock); 1700 1701 } else { 1702 p->p_brkpageszc = 0; 1703 p->p_stkpageszc = 0; 1704 } 1705 1706 exec_set_sp(size); 1707 1708 as = as_alloc(); 1709 p->p_as = as; 1710 if (p->p_model == DATAMODEL_ILP32) 1711 as->a_userlimit = (caddr_t)USERLIMIT32; 1712 (void) hat_setup(as->a_hat, HAT_ALLOC); 1713 1714 /* 1715 * Finally, write out the contents of the new stack. 1716 */ 1717 error = stk_copyout(args, usrstack, auxvpp, up); 1718 kmem_free(args->stk_base, args->stk_size); 1719 return (error); 1720 } 1721