1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* Copyright (c) 1988 AT&T */ 29 /* All Rights Reserved */ 30 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/sysmacros.h> 35 #include <sys/systm.h> 36 #include <sys/signal.h> 37 #include <sys/cred_impl.h> 38 #include <sys/policy.h> 39 #include <sys/user.h> 40 #include <sys/errno.h> 41 #include <sys/file.h> 42 #include <sys/vfs.h> 43 #include <sys/vnode.h> 44 #include <sys/mman.h> 45 #include <sys/acct.h> 46 #include <sys/cpuvar.h> 47 #include <sys/proc.h> 48 #include <sys/cmn_err.h> 49 #include <sys/debug.h> 50 #include <sys/pathname.h> 51 #include <sys/vm.h> 52 #include <sys/vtrace.h> 53 #include <sys/exec.h> 54 #include <sys/exechdr.h> 55 #include <sys/kmem.h> 56 #include <sys/prsystm.h> 57 #include <sys/modctl.h> 58 #include <sys/vmparam.h> 59 #include <sys/schedctl.h> 60 #include <sys/utrap.h> 61 #include <sys/systeminfo.h> 62 #include <sys/stack.h> 63 #include <sys/rctl.h> 64 #include <sys/dtrace.h> 65 #include <sys/lwpchan_impl.h> 66 #include <sys/pool.h> 67 #include <sys/sdt.h> 68 69 #include <c2/audit.h> 70 71 #include <vm/hat.h> 72 #include <vm/anon.h> 73 #include <vm/as.h> 74 #include <vm/seg.h> 75 #include <vm/seg_vn.h> 76 77 #define PRIV_RESET 0x01 /* needs to reset privs */ 78 #define PRIV_SETID 0x02 /* needs to change uids */ 79 #define PRIV_SETUGID 0x04 /* is setuid/setgid/forced privs */ 80 #define PRIV_INCREASE 0x08 /* child runs with more privs */ 81 82 static int execsetid(struct vnode *, struct vattr *, uid_t *, uid_t *); 83 static int hold_execsw(struct execsw *); 84 85 uint_t auxv_hwcap = 0; /* auxv AT_SUN_HWCAP value; determined on the fly */ 86 #if defined(_SYSCALL32_IMPL) 87 uint_t auxv_hwcap32 = 0; /* 32-bit version of auxv_hwcap */ 88 #endif 89 90 #if defined(__i386) || defined(__amd64) 91 extern void ldt_free(proc_t *p); 92 extern void ldt_load(void); 93 #endif 94 95 int exec_lpg_disable = 0; 96 97 #define PSUIDFLAGS (SNOCD|SUGID) 98 99 /* 100 * exec() - wrapper around exece providing NULL environment pointer 101 */ 102 int 103 exec(const char *fname, const char **argp) 104 { 105 return (exece(fname, argp, NULL)); 106 } 107 108 /* 109 * exece() - system call wrapper around exec_common() 110 */ 111 int 112 exece(const char *fname, const char **argp, const char **envp) 113 { 114 int error; 115 116 error = exec_common(fname, argp, envp); 117 return (error ? (set_errno(error)) : 0); 118 } 119 120 int 121 exec_common(const char *fname, const char **argp, const char **envp) 122 { 123 vnode_t *vp = NULL, *dir = NULL, *tmpvp = NULL; 124 proc_t *p = ttoproc(curthread); 125 klwp_t *lwp = ttolwp(curthread); 126 struct user *up = PTOU(p); 127 long execsz; /* temporary count of exec size */ 128 int i; 129 int error; 130 char exec_file[MAXCOMLEN+1]; 131 struct pathname pn; 132 struct pathname resolvepn; 133 struct uarg args; 134 struct execa ua; 135 k_sigset_t savedmask; 136 lwpdir_t *lwpdir = NULL; 137 lwpdir_t **tidhash; 138 lwpdir_t *old_lwpdir = NULL; 139 uint_t old_lwpdir_sz; 140 lwpdir_t **old_tidhash; 141 uint_t old_tidhash_sz; 142 lwpent_t *lep; 143 144 /* 145 * exec() is not supported for the /proc agent lwp. 146 */ 147 if (curthread == p->p_agenttp) 148 return (ENOTSUP); 149 150 if ((error = secpolicy_basic_exec(CRED())) != 0) 151 return (error); 152 153 /* 154 * Inform /proc that an exec() has started. 155 * Hold signals that are ignored by default so that we will 156 * not be interrupted by a signal that will be ignored after 157 * successful completion of gexec(). 158 */ 159 mutex_enter(&p->p_lock); 160 prexecstart(); 161 schedctl_finish_sigblock(curthread); 162 savedmask = curthread->t_hold; 163 sigorset(&curthread->t_hold, &ignoredefault); 164 mutex_exit(&p->p_lock); 165 166 /* 167 * Look up path name and remember last component for later. 168 * To help coreadm expand its %d token, we attempt to save 169 * the directory containing the executable in p_execdir. The 170 * first call to lookuppn() may fail and return EINVAL because 171 * dirvpp is non-NULL. In that case, we make a second call to 172 * lookuppn() with dirvpp set to NULL; p_execdir will be NULL, 173 * but coreadm is allowed to expand %d to the empty string and 174 * there are other cases in which that failure may occur. 175 */ 176 if ((error = pn_get((char *)fname, UIO_USERSPACE, &pn)) != 0) 177 goto out; 178 pn_alloc(&resolvepn); 179 if ((error = lookuppn(&pn, &resolvepn, FOLLOW, &dir, &vp)) != 0) { 180 pn_free(&resolvepn); 181 pn_free(&pn); 182 if (error != EINVAL) 183 goto out; 184 185 dir = NULL; 186 if ((error = pn_get((char *)fname, UIO_USERSPACE, &pn)) != 0) 187 goto out; 188 pn_alloc(&resolvepn); 189 if ((error = lookuppn(&pn, &resolvepn, FOLLOW, NULLVPP, 190 &vp)) != 0) { 191 pn_free(&resolvepn); 192 pn_free(&pn); 193 goto out; 194 } 195 } 196 if (vp == NULL) { 197 if (dir != NULL) 198 VN_RELE(dir); 199 error = ENOENT; 200 pn_free(&resolvepn); 201 pn_free(&pn); 202 goto out; 203 } 204 205 /* 206 * We do not allow executing files in attribute directories. 207 * We test this by determining whether the resolved path 208 * contains a "/" when we're in an attribute directory; 209 * only if the pathname does not contain a "/" the resolved path 210 * points to a file in the current working (attribute) directory. 211 */ 212 if ((p->p_user.u_cdir->v_flag & V_XATTRDIR) != 0 && 213 strchr(resolvepn.pn_path, '/') == NULL) { 214 if (dir != NULL) 215 VN_RELE(dir); 216 error = EACCES; 217 pn_free(&resolvepn); 218 pn_free(&pn); 219 VN_RELE(vp); 220 goto out; 221 } 222 223 bzero(exec_file, MAXCOMLEN+1); 224 (void) strncpy(exec_file, pn.pn_path, MAXCOMLEN); 225 bzero(&args, sizeof (args)); 226 args.pathname = resolvepn.pn_path; 227 /* don't free resolvepn until we are done with args */ 228 pn_free(&pn); 229 230 /* 231 * Specific exec handlers, or policies determined via 232 * /etc/system may override the historical default. 233 */ 234 args.stk_prot = PROT_ZFOD; 235 args.dat_prot = PROT_ZFOD; 236 237 CPU_STATS_ADD_K(sys, sysexec, 1); 238 DTRACE_PROC1(exec, char *, args.pathname); 239 240 ua.fname = fname; 241 ua.argp = argp; 242 ua.envp = envp; 243 244 if ((error = gexec(&vp, &ua, &args, NULL, 0, &execsz, 245 exec_file, p->p_cred)) != 0) { 246 VN_RELE(vp); 247 if (dir != NULL) 248 VN_RELE(dir); 249 pn_free(&resolvepn); 250 goto fail; 251 } 252 253 /* 254 * Free floating point registers (sun4u only) 255 */ 256 ASSERT(lwp != NULL); 257 lwp_freeregs(lwp, 1); 258 259 /* 260 * Free device context 261 */ 262 if (curthread->t_ctx) 263 freectx(curthread, 1); 264 265 /* 266 * Remember file name for accounting; clear any cached DTrace predicate. 267 */ 268 up->u_acflag &= ~AFORK; 269 bcopy(exec_file, up->u_comm, MAXCOMLEN+1); 270 curthread->t_predcache = NULL; 271 272 /* 273 * Clear contract template state 274 */ 275 lwp_ctmpl_clear(lwp); 276 277 /* 278 * Save the directory in which we found the executable for expanding 279 * the %d token used in core file patterns. 280 */ 281 mutex_enter(&p->p_lock); 282 tmpvp = p->p_execdir; 283 p->p_execdir = dir; 284 if (p->p_execdir != NULL) 285 VN_HOLD(p->p_execdir); 286 mutex_exit(&p->p_lock); 287 288 if (tmpvp != NULL) 289 VN_RELE(tmpvp); 290 291 /* 292 * Reset stack state to the user stack, clear set of signals 293 * caught on the signal stack, and reset list of signals that 294 * restart system calls; the new program's environment should 295 * not be affected by detritus from the old program. Any 296 * pending held signals remain held, so don't clear t_hold. 297 */ 298 mutex_enter(&p->p_lock); 299 lwp->lwp_oldcontext = 0; 300 lwp->lwp_ustack = 0; 301 lwp->lwp_old_stk_ctl = 0; 302 sigemptyset(&up->u_signodefer); 303 sigemptyset(&up->u_sigonstack); 304 sigemptyset(&up->u_sigresethand); 305 lwp->lwp_sigaltstack.ss_sp = 0; 306 lwp->lwp_sigaltstack.ss_size = 0; 307 lwp->lwp_sigaltstack.ss_flags = SS_DISABLE; 308 309 /* 310 * Make saved resource limit == current resource limit. 311 */ 312 for (i = 0; i < RLIM_NLIMITS; i++) { 313 /*CONSTCOND*/ 314 if (RLIM_SAVED(i)) { 315 (void) rctl_rlimit_get(rctlproc_legacy[i], p, 316 &up->u_saved_rlimit[i]); 317 } 318 } 319 320 /* 321 * If the action was to catch the signal, then the action 322 * must be reset to SIG_DFL. 323 */ 324 sigdefault(p); 325 p->p_flag &= ~(SNOWAIT|SJCTL); 326 p->p_flag |= (SEXECED|SMSACCT|SMSFORK); 327 up->u_signal[SIGCLD - 1] = SIG_DFL; 328 329 /* 330 * Delete the dot4 sigqueues/signotifies. 331 */ 332 sigqfree(p); 333 334 mutex_exit(&p->p_lock); 335 336 mutex_enter(&p->p_pflock); 337 p->p_prof.pr_base = NULL; 338 p->p_prof.pr_size = 0; 339 p->p_prof.pr_off = 0; 340 p->p_prof.pr_scale = 0; 341 p->p_prof.pr_samples = 0; 342 mutex_exit(&p->p_pflock); 343 344 ASSERT(curthread->t_schedctl == NULL); 345 346 #if defined(__i386) || defined(__amd64) 347 /* If the process uses a private LDT then change it to default */ 348 if (p->p_ldt) 349 ldt_free(p); 350 #endif /* __i386 || __amd64 */ 351 352 #if defined(__amd64) 353 /* 354 * Make sure the process has the correct LDT descriptor for its data 355 * model. 356 */ 357 if (p->p_model == DATAMODEL_LP64) 358 p->p_ldt_desc = ldt0_default64_desc; 359 else 360 p->p_ldt_desc = ldt0_default_desc; 361 362 /* 363 * Ensure the change of LDT is propagated into the LDTR. 364 */ 365 kpreempt_disable(); 366 ldt_load(); 367 kpreempt_enable(); 368 #endif /* __amd64 */ 369 370 #if defined(__sparc) 371 if (p->p_utraps != NULL) 372 utrap_free(p); 373 #endif /* __sparc */ 374 375 /* 376 * Close all close-on-exec files. 377 */ 378 close_exec(P_FINFO(p)); 379 TRACE_2(TR_FAC_PROC, TR_PROC_EXEC, "proc_exec:p %p up %p", p, up); 380 setregs(&args); 381 382 /* Mark this as an executable vnode */ 383 mutex_enter(&vp->v_lock); 384 vp->v_flag |= VVMEXEC; 385 mutex_exit(&vp->v_lock); 386 387 VN_RELE(vp); 388 if (dir != NULL) 389 VN_RELE(dir); 390 pn_free(&resolvepn); 391 392 /* 393 * Allocate a new lwp directory and lwpid hash table if necessary. 394 */ 395 if (curthread->t_tid != 1 || p->p_lwpdir_sz != 2) { 396 lwpdir = kmem_zalloc(2 * sizeof (lwpdir_t), KM_SLEEP); 397 lwpdir->ld_next = lwpdir + 1; 398 tidhash = kmem_zalloc(2 * sizeof (lwpdir_t *), KM_SLEEP); 399 if (p->p_lwpdir != NULL) 400 lep = p->p_lwpdir[curthread->t_dslot].ld_entry; 401 else 402 lep = kmem_zalloc(sizeof (*lep), KM_SLEEP); 403 } 404 405 mutex_enter(&p->p_lock); 406 prbarrier(p); 407 408 /* 409 * Reset lwp id to the default value of 1. 410 * This is a single-threaded process now 411 * and lwp #1 is lwp_wait()able by default. 412 * The t_unpark flag should not be inherited. 413 */ 414 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 415 curthread->t_tid = 1; 416 curthread->t_unpark = 0; 417 curthread->t_proc_flag |= TP_TWAIT; 418 curthread->t_proc_flag &= ~TP_DAEMON; /* daemons shouldn't exec */ 419 p->p_lwpdaemon = 0; /* but oh well ... */ 420 p->p_lwpid = 1; 421 422 /* 423 * Install the newly-allocated lwp directory and lwpid hash table 424 * and insert the current thread into the new hash table. 425 */ 426 if (lwpdir != NULL) { 427 old_lwpdir = p->p_lwpdir; 428 old_lwpdir_sz = p->p_lwpdir_sz; 429 old_tidhash = p->p_tidhash; 430 old_tidhash_sz = p->p_tidhash_sz; 431 p->p_lwpdir = p->p_lwpfree = lwpdir; 432 p->p_lwpdir_sz = 2; 433 p->p_tidhash = tidhash; 434 p->p_tidhash_sz = 2; 435 lep->le_thread = curthread; 436 lep->le_lwpid = curthread->t_tid; 437 lep->le_start = curthread->t_start; 438 lwp_hash_in(p, lep); 439 } 440 /* 441 * Restore the saved signal mask and 442 * inform /proc that the exec() has finished. 443 */ 444 curthread->t_hold = savedmask; 445 prexecend(); 446 mutex_exit(&p->p_lock); 447 if (old_lwpdir) { 448 kmem_free(old_lwpdir, old_lwpdir_sz * sizeof (lwpdir_t)); 449 kmem_free(old_tidhash, old_tidhash_sz * sizeof (lwpdir_t *)); 450 } 451 ASSERT(error == 0); 452 DTRACE_PROC(exec__success); 453 return (0); 454 455 fail: 456 DTRACE_PROC1(exec__failure, int, error); 457 out: /* error return */ 458 mutex_enter(&p->p_lock); 459 curthread->t_hold = savedmask; 460 prexecend(); 461 mutex_exit(&p->p_lock); 462 ASSERT(error != 0); 463 return (error); 464 } 465 466 467 /* 468 * Perform generic exec duties and switchout to object-file specific 469 * handler. 470 */ 471 int 472 gexec( 473 struct vnode **vpp, 474 struct execa *uap, 475 struct uarg *args, 476 struct intpdata *idatap, 477 int level, 478 long *execsz, 479 caddr_t exec_file, 480 struct cred *cred) 481 { 482 struct vnode *vp; 483 proc_t *pp = ttoproc(curthread); 484 struct execsw *eswp; 485 int error = 0; 486 int suidflags = 0; 487 ssize_t resid; 488 uid_t uid, gid; 489 struct vattr vattr; 490 char magbuf[MAGIC_BYTES]; 491 int setid; 492 cred_t *oldcred, *newcred = NULL; 493 int privflags = 0; 494 495 /* 496 * If the SNOCD or SUGID flag is set, turn it off and remember the 497 * previous setting so we can restore it if we encounter an error. 498 */ 499 if (level == 0 && (pp->p_flag & PSUIDFLAGS)) { 500 mutex_enter(&pp->p_lock); 501 suidflags = pp->p_flag & PSUIDFLAGS; 502 pp->p_flag &= ~PSUIDFLAGS; 503 mutex_exit(&pp->p_lock); 504 } 505 506 if ((error = execpermissions(*vpp, &vattr, args)) != 0) 507 goto bad; 508 509 /* need to open vnode for stateful file systems like rfs */ 510 if ((error = VOP_OPEN(vpp, FREAD, CRED())) != 0) 511 goto bad; 512 vp = *vpp; 513 514 /* 515 * Note: to support binary compatibility with SunOS a.out 516 * executables, we read in the first four bytes, as the 517 * magic number is in bytes 2-3. 518 */ 519 if (error = vn_rdwr(UIO_READ, vp, magbuf, sizeof (magbuf), 520 (offset_t)0, UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) 521 goto bad; 522 if (resid != 0) 523 goto bad; 524 525 if ((eswp = findexec_by_hdr(magbuf)) == NULL) 526 goto bad; 527 528 if (level == 0 && 529 (privflags = execsetid(vp, &vattr, &uid, &gid)) != 0) { 530 531 newcred = cred = crdup(cred); 532 533 /* If we can, drop the PA bit */ 534 if ((privflags & PRIV_RESET) != 0) 535 priv_adjust_PA(cred); 536 537 if (privflags & PRIV_SETID) { 538 cred->cr_uid = uid; 539 cred->cr_gid = gid; 540 cred->cr_suid = uid; 541 cred->cr_sgid = gid; 542 } 543 544 /* 545 * Implement the privilege updates: 546 * 547 * Restrict with L: 548 * 549 * I' = I & L 550 * 551 * E' = P' = (I' + F) & A 552 * 553 * But if running under ptrace, we cap I with P. 554 */ 555 if ((privflags & PRIV_RESET) != 0) { 556 if ((privflags & PRIV_INCREASE) != 0 && 557 (pp->p_proc_flag & P_PR_PTRACE) != 0) 558 priv_intersect(&CR_OPPRIV(cred), 559 &CR_IPRIV(cred)); 560 priv_intersect(&CR_LPRIV(cred), &CR_IPRIV(cred)); 561 CR_EPRIV(cred) = CR_PPRIV(cred) = CR_IPRIV(cred); 562 priv_adjust_PA(cred); 563 } 564 } 565 566 /* SunOS 4.x buy-back */ 567 if ((vp->v_vfsp->vfs_flag & VFS_NOSETUID) && 568 (vattr.va_mode & (VSUID|VSGID))) { 569 cmn_err(CE_NOTE, 570 "!%s, uid %d: setuid execution not allowed, dev=%lx", 571 exec_file, cred->cr_uid, vp->v_vfsp->vfs_dev); 572 } 573 574 /* 575 * execsetid() told us whether or not we had to change the 576 * credentials of the process. In privflags, it told us 577 * whether we gained any privileges or executed a set-uid executable. 578 */ 579 setid = (privflags & (PRIV_SETUGID|PRIV_INCREASE)); 580 581 /* 582 * Use /etc/system variable to determine if the stack 583 * should be marked as executable by default. 584 */ 585 if (noexec_user_stack) 586 args->stk_prot &= ~PROT_EXEC; 587 588 args->execswp = eswp; /* Save execsw pointer in uarg for exec_func */ 589 590 /* 591 * Traditionally, the setid flags told the sub processes whether 592 * the file just executed was set-uid or set-gid; this caused 593 * some confusion as the 'setid' flag did not match the SUGID 594 * process flag which is only set when the uids/gids do not match. 595 * A script set-gid/set-uid to the real uid/gid would start with 596 * /dev/fd/X but an executable would happily trust LD_LIBRARY_PATH. 597 * Now we flag those cases where the calling process cannot 598 * be trusted to influence the newly exec'ed process, either 599 * because it runs with more privileges or when the uids/gids 600 * do in fact not match. 601 * This also makes the runtime linker agree with the on exec 602 * values of SNOCD and SUGID. 603 */ 604 error = (*eswp->exec_func)(vp, uap, args, idatap, level, execsz, 605 (setid & PRIV_INCREASE) != 0 || 606 cred->cr_uid != cred->cr_ruid || 607 (cred->cr_rgid != cred->cr_gid && 608 !supgroupmember(cred->cr_gid, cred)), exec_file, cred); 609 rw_exit(eswp->exec_lock); 610 if (error != 0) { 611 if (newcred != NULL) 612 crfree(newcred); 613 goto bad; 614 } 615 616 if (level == 0) { 617 mutex_enter(&pp->p_crlock); 618 if (newcred != NULL) { 619 /* 620 * Free the old credentials, and set the new ones. 621 * Do this for both the process and the (single) thread. 622 */ 623 crfree(pp->p_cred); 624 pp->p_cred = cred; /* cred already held for proc */ 625 crhold(cred); /* hold new cred for thread */ 626 /* 627 * DTrace accesses t_cred in probe context. t_cred 628 * must always be either NULL, or point to a valid, 629 * allocated cred structure. 630 */ 631 oldcred = curthread->t_cred; 632 curthread->t_cred = cred; 633 crfree(oldcred); 634 } 635 /* 636 * On emerging from a successful exec(), the saved 637 * uid and gid equal the effective uid and gid. 638 */ 639 cred->cr_suid = cred->cr_uid; 640 cred->cr_sgid = cred->cr_gid; 641 642 /* 643 * If the real and effective ids do not match, this 644 * is a setuid process that should not dump core. 645 * The group comparison is tricky; we prevent the code 646 * from flagging SNOCD when executing with an effective gid 647 * which is a supplementary group. 648 */ 649 if (cred->cr_ruid != cred->cr_uid || 650 (cred->cr_rgid != cred->cr_gid && 651 !supgroupmember(cred->cr_gid, cred)) || 652 (privflags & PRIV_INCREASE) != 0) 653 suidflags = PSUIDFLAGS; 654 else 655 suidflags = 0; 656 657 mutex_exit(&pp->p_crlock); 658 if (suidflags) { 659 mutex_enter(&pp->p_lock); 660 pp->p_flag |= suidflags; 661 mutex_exit(&pp->p_lock); 662 } 663 if (setid && (pp->p_proc_flag & P_PR_PTRACE) == 0) { 664 /* 665 * If process is traced via /proc, arrange to 666 * invalidate the associated /proc vnode. 667 */ 668 if (pp->p_plist || (pp->p_proc_flag & P_PR_TRACE)) 669 args->traceinval = 1; 670 } 671 if (pp->p_proc_flag & P_PR_PTRACE) 672 psignal(pp, SIGTRAP); 673 if (args->traceinval) 674 prinvalidate(&pp->p_user); 675 } 676 677 return (0); 678 bad: 679 if (error == 0) 680 error = ENOEXEC; 681 682 if (suidflags) { 683 mutex_enter(&pp->p_lock); 684 pp->p_flag |= suidflags; 685 mutex_exit(&pp->p_lock); 686 } 687 return (error); 688 } 689 690 extern char *execswnames[]; 691 692 struct execsw * 693 allocate_execsw(char *name, char *magic, size_t magic_size) 694 { 695 int i, j; 696 char *ename; 697 char *magicp; 698 699 mutex_enter(&execsw_lock); 700 for (i = 0; i < nexectype; i++) { 701 if (execswnames[i] == NULL) { 702 ename = kmem_alloc(strlen(name) + 1, KM_SLEEP); 703 (void) strcpy(ename, name); 704 execswnames[i] = ename; 705 /* 706 * Set the magic number last so that we 707 * don't need to hold the execsw_lock in 708 * findexectype(). 709 */ 710 magicp = kmem_alloc(magic_size, KM_SLEEP); 711 for (j = 0; j < magic_size; j++) 712 magicp[j] = magic[j]; 713 execsw[i].exec_magic = magicp; 714 mutex_exit(&execsw_lock); 715 return (&execsw[i]); 716 } 717 } 718 mutex_exit(&execsw_lock); 719 return (NULL); 720 } 721 722 /* 723 * Find the exec switch table entry with the corresponding magic string. 724 */ 725 struct execsw * 726 findexecsw(char *magic) 727 { 728 struct execsw *eswp; 729 730 for (eswp = execsw; eswp < &execsw[nexectype]; eswp++) { 731 ASSERT(eswp->exec_maglen <= MAGIC_BYTES); 732 if (magic && eswp->exec_maglen != 0 && 733 bcmp(magic, eswp->exec_magic, eswp->exec_maglen) == 0) 734 return (eswp); 735 } 736 return (NULL); 737 } 738 739 /* 740 * Find the execsw[] index for the given exec header string by looking for the 741 * magic string at a specified offset and length for each kind of executable 742 * file format until one matches. If no execsw[] entry is found, try to 743 * autoload a module for this magic string. 744 */ 745 struct execsw * 746 findexec_by_hdr(char *header) 747 { 748 struct execsw *eswp; 749 750 for (eswp = execsw; eswp < &execsw[nexectype]; eswp++) { 751 ASSERT(eswp->exec_maglen <= MAGIC_BYTES); 752 if (header && eswp->exec_maglen != 0 && 753 bcmp(&header[eswp->exec_magoff], eswp->exec_magic, 754 eswp->exec_maglen) == 0) { 755 if (hold_execsw(eswp) != 0) 756 return (NULL); 757 return (eswp); 758 } 759 } 760 return (NULL); /* couldn't find the type */ 761 } 762 763 /* 764 * Find the execsw[] index for the given magic string. If no execsw[] entry 765 * is found, try to autoload a module for this magic string. 766 */ 767 struct execsw * 768 findexec_by_magic(char *magic) 769 { 770 struct execsw *eswp; 771 772 for (eswp = execsw; eswp < &execsw[nexectype]; eswp++) { 773 ASSERT(eswp->exec_maglen <= MAGIC_BYTES); 774 if (magic && eswp->exec_maglen != 0 && 775 bcmp(magic, eswp->exec_magic, eswp->exec_maglen) == 0) { 776 if (hold_execsw(eswp) != 0) 777 return (NULL); 778 return (eswp); 779 } 780 } 781 return (NULL); /* couldn't find the type */ 782 } 783 784 static int 785 hold_execsw(struct execsw *eswp) 786 { 787 char *name; 788 789 rw_enter(eswp->exec_lock, RW_READER); 790 while (!LOADED_EXEC(eswp)) { 791 rw_exit(eswp->exec_lock); 792 name = execswnames[eswp-execsw]; 793 ASSERT(name); 794 if (modload("exec", name) == -1) 795 return (-1); 796 rw_enter(eswp->exec_lock, RW_READER); 797 } 798 return (0); 799 } 800 801 static int 802 execsetid(struct vnode *vp, struct vattr *vattrp, uid_t *uidp, uid_t *gidp) 803 { 804 proc_t *pp = ttoproc(curthread); 805 uid_t uid, gid; 806 cred_t *cr = pp->p_cred; 807 int privflags = 0; 808 809 /* 810 * Remember credentials. 811 */ 812 uid = cr->cr_uid; 813 gid = cr->cr_gid; 814 815 /* Will try to reset the PRIV_AWARE bit later. */ 816 if ((CR_FLAGS(cr) & (PRIV_AWARE|PRIV_AWARE_INHERIT)) == PRIV_AWARE) 817 privflags |= PRIV_RESET; 818 819 if ((vp->v_vfsp->vfs_flag & VFS_NOSETUID) == 0) { 820 /* 821 * Set-uid root execution only allowed if the limit set 822 * holds all unsafe privileges. 823 */ 824 if ((vattrp->va_mode & VSUID) && (vattrp->va_uid != 0 || 825 priv_issubset(&priv_unsafe, &CR_LPRIV(cr)))) { 826 uid = vattrp->va_uid; 827 privflags |= PRIV_SETUGID; 828 } 829 if (vattrp->va_mode & VSGID) { 830 gid = vattrp->va_gid; 831 privflags |= PRIV_SETUGID; 832 } 833 } 834 835 /* 836 * Do we need to change our credential anyway? 837 * This is the case when E != I or P != I, as 838 * we need to do the assignments (with F empty and A full) 839 * Or when I is not a subset of L; in that case we need to 840 * enforce L. 841 * 842 * I' = L & I 843 * 844 * E' = P' = (I' + F) & A 845 * or 846 * E' = P' = I' 847 */ 848 if (!priv_isequalset(&CR_EPRIV(cr), &CR_IPRIV(cr)) || 849 !priv_issubset(&CR_IPRIV(cr), &CR_LPRIV(cr)) || 850 !priv_isequalset(&CR_PPRIV(cr), &CR_IPRIV(cr))) 851 privflags |= PRIV_RESET; 852 853 /* 854 * When we introduce the "forced" set then we will need 855 * to set PRIV_INCREASE here if I not a subset of P. 856 * If the "allowed" set is introduced we will need to do 857 * a similar thing; however, it seems more reasonable to 858 * have the allowed set reduce "L": script language interpreters 859 * would typically have an allowed set of "all". 860 */ 861 862 /* 863 * Set setuid/setgid protections if no ptrace() compatibility. 864 * For privileged processes, honor setuid/setgid even in 865 * the presence of ptrace() compatibility. 866 */ 867 if (((pp->p_proc_flag & P_PR_PTRACE) == 0 || 868 PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, (uid == 0))) && 869 (cr->cr_uid != uid || 870 cr->cr_gid != gid || 871 cr->cr_suid != uid || 872 cr->cr_sgid != gid)) { 873 *uidp = uid; 874 *gidp = gid; 875 privflags |= PRIV_SETID; 876 } 877 return (privflags); 878 } 879 880 int 881 execpermissions(struct vnode *vp, struct vattr *vattrp, struct uarg *args) 882 { 883 int error; 884 proc_t *p = ttoproc(curthread); 885 886 vattrp->va_mask = AT_MODE | AT_UID | AT_GID | AT_SIZE; 887 if (error = VOP_GETATTR(vp, vattrp, ATTR_EXEC, p->p_cred)) 888 return (error); 889 /* 890 * Check the access mode. 891 * If VPROC, ask /proc if the file is an object file. 892 */ 893 if ((error = VOP_ACCESS(vp, VEXEC, 0, p->p_cred)) != 0 || 894 !(vp->v_type == VREG || (vp->v_type == VPROC && pr_isobject(vp))) || 895 (vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0 || 896 (vattrp->va_mode & (VEXEC|(VEXEC>>3)|(VEXEC>>6))) == 0) { 897 if (error == 0) 898 error = EACCES; 899 return (error); 900 } 901 902 if ((p->p_plist || (p->p_proc_flag & (P_PR_PTRACE|P_PR_TRACE))) && 903 (error = VOP_ACCESS(vp, VREAD, 0, p->p_cred))) { 904 /* 905 * If process is under ptrace(2) compatibility, 906 * fail the exec(2). 907 */ 908 if (p->p_proc_flag & P_PR_PTRACE) 909 goto bad; 910 /* 911 * Process is traced via /proc. 912 * Arrange to invalidate the /proc vnode. 913 */ 914 args->traceinval = 1; 915 } 916 return (0); 917 bad: 918 if (error == 0) 919 error = ENOEXEC; 920 return (error); 921 } 922 923 /* 924 * Map a section of an executable file into the user's 925 * address space. 926 */ 927 int 928 execmap(struct vnode *vp, caddr_t addr, size_t len, size_t zfodlen, 929 off_t offset, int prot, int page, uint_t szc) 930 { 931 int error = 0; 932 off_t oldoffset; 933 caddr_t zfodbase, oldaddr; 934 size_t end, oldlen; 935 size_t zfoddiff; 936 label_t ljb; 937 proc_t *p = ttoproc(curthread); 938 939 oldaddr = addr; 940 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 941 if (len) { 942 oldlen = len; 943 len += ((size_t)oldaddr - (size_t)addr); 944 oldoffset = offset; 945 offset = (off_t)((uintptr_t)offset & PAGEMASK); 946 if (page) { 947 spgcnt_t prefltmem, availm, npages; 948 int preread; 949 uint_t mflag = MAP_PRIVATE | MAP_FIXED; 950 951 if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_EXEC) { 952 mflag |= MAP_TEXT; 953 } else { 954 mflag |= MAP_INITDATA; 955 } 956 957 if (valid_usr_range(addr, len, prot, p->p_as, 958 p->p_as->a_userlimit) != RANGE_OKAY) { 959 error = ENOMEM; 960 goto bad; 961 } 962 if (error = VOP_MAP(vp, (offset_t)offset, 963 p->p_as, &addr, len, prot, PROT_ALL, 964 mflag, CRED())) 965 goto bad; 966 967 /* 968 * If the segment can fit, then we prefault 969 * the entire segment in. This is based on the 970 * model that says the best working set of a 971 * small program is all of its pages. 972 */ 973 npages = (spgcnt_t)btopr(len); 974 prefltmem = freemem - desfree; 975 preread = 976 (npages < prefltmem && len < PGTHRESH) ? 1 : 0; 977 978 /* 979 * If we aren't prefaulting the segment, 980 * increment "deficit", if necessary to ensure 981 * that pages will become available when this 982 * process starts executing. 983 */ 984 availm = freemem - lotsfree; 985 if (preread == 0 && npages > availm && 986 deficit < lotsfree) { 987 deficit += MIN((pgcnt_t)(npages - availm), 988 lotsfree - deficit); 989 } 990 991 if (preread) { 992 TRACE_2(TR_FAC_PROC, TR_EXECMAP_PREREAD, 993 "execmap preread:freemem %d size %lu", 994 freemem, len); 995 (void) as_fault(p->p_as->a_hat, p->p_as, 996 (caddr_t)addr, len, F_INVAL, S_READ); 997 } 998 } else { 999 if (valid_usr_range(addr, len, prot, p->p_as, 1000 p->p_as->a_userlimit) != RANGE_OKAY) { 1001 error = ENOMEM; 1002 goto bad; 1003 } 1004 1005 if (error = as_map(p->p_as, addr, len, 1006 segvn_create, zfod_argsp)) 1007 goto bad; 1008 /* 1009 * Read in the segment in one big chunk. 1010 */ 1011 if (error = vn_rdwr(UIO_READ, vp, (caddr_t)oldaddr, 1012 oldlen, (offset_t)oldoffset, UIO_USERSPACE, 0, 1013 (rlim64_t)0, CRED(), (ssize_t *)0)) 1014 goto bad; 1015 /* 1016 * Now set protections. 1017 */ 1018 if (prot != PROT_ZFOD) { 1019 (void) as_setprot(p->p_as, (caddr_t)addr, 1020 len, prot); 1021 } 1022 } 1023 } 1024 1025 if (zfodlen) { 1026 end = (size_t)addr + len; 1027 zfodbase = (caddr_t)roundup(end, PAGESIZE); 1028 zfoddiff = (uintptr_t)zfodbase - end; 1029 if (zfoddiff) { 1030 if (on_fault(&ljb)) { 1031 no_fault(); 1032 error = EFAULT; 1033 goto bad; 1034 } 1035 uzero((void *)end, zfoddiff); 1036 no_fault(); 1037 } 1038 if (zfodlen > zfoddiff) { 1039 struct segvn_crargs crargs = 1040 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 1041 1042 zfodlen -= zfoddiff; 1043 if (valid_usr_range(zfodbase, zfodlen, prot, p->p_as, 1044 p->p_as->a_userlimit) != RANGE_OKAY) { 1045 error = ENOMEM; 1046 goto bad; 1047 } 1048 crargs.szc = szc; 1049 if (error = as_map(p->p_as, (caddr_t)zfodbase, 1050 zfodlen, segvn_create, &crargs)) 1051 goto bad; 1052 if (prot != PROT_ZFOD) { 1053 (void) as_setprot(p->p_as, (caddr_t)zfodbase, 1054 zfodlen, prot); 1055 } 1056 } 1057 } 1058 return (0); 1059 bad: 1060 return (error); 1061 } 1062 1063 void 1064 setexecenv(struct execenv *ep) 1065 { 1066 proc_t *p = ttoproc(curthread); 1067 klwp_t *lwp = ttolwp(curthread); 1068 struct vnode *vp; 1069 1070 p->p_bssbase = ep->ex_bssbase; 1071 p->p_brkbase = ep->ex_brkbase; 1072 p->p_brksize = ep->ex_brksize; 1073 if (p->p_exec) 1074 VN_RELE(p->p_exec); /* out with the old */ 1075 vp = p->p_exec = ep->ex_vp; 1076 if (vp != NULL) 1077 VN_HOLD(vp); /* in with the new */ 1078 1079 lwp->lwp_sigaltstack.ss_sp = 0; 1080 lwp->lwp_sigaltstack.ss_size = 0; 1081 lwp->lwp_sigaltstack.ss_flags = SS_DISABLE; 1082 } 1083 1084 int 1085 execopen(struct vnode **vpp, int *fdp) 1086 { 1087 struct vnode *vp = *vpp; 1088 file_t *fp; 1089 int error = 0; 1090 int filemode = FREAD; 1091 1092 VN_HOLD(vp); /* open reference */ 1093 if (error = falloc(NULL, filemode, &fp, fdp)) { 1094 VN_RELE(vp); 1095 *fdp = -1; /* just in case falloc changed value */ 1096 return (error); 1097 } 1098 if (error = VOP_OPEN(&vp, filemode, CRED())) { 1099 VN_RELE(vp); 1100 setf(*fdp, NULL); 1101 unfalloc(fp); 1102 *fdp = -1; 1103 return (error); 1104 } 1105 *vpp = vp; /* vnode should not have changed */ 1106 fp->f_vnode = vp; 1107 mutex_exit(&fp->f_tlock); 1108 setf(*fdp, fp); 1109 return (0); 1110 } 1111 1112 int 1113 execclose(int fd) 1114 { 1115 return (closeandsetf(fd, NULL)); 1116 } 1117 1118 1119 /* 1120 * noexec stub function. 1121 */ 1122 /*ARGSUSED*/ 1123 int 1124 noexec( 1125 struct vnode *vp, 1126 struct execa *uap, 1127 struct uarg *args, 1128 struct intpdata *idatap, 1129 int level, 1130 long *execsz, 1131 int setid, 1132 caddr_t exec_file, 1133 struct cred *cred) 1134 { 1135 cmn_err(CE_WARN, "missing exec capability for %s", uap->fname); 1136 return (ENOEXEC); 1137 } 1138 1139 /* 1140 * Support routines for building a user stack. 1141 * 1142 * execve(path, argv, envp) must construct a new stack with the specified 1143 * arguments and environment variables (see exec_args() for a description 1144 * of the user stack layout). To do this, we copy the arguments and 1145 * environment variables from the old user address space into the kernel, 1146 * free the old as, create the new as, and copy our buffered information 1147 * to the new stack. Our kernel buffer has the following structure: 1148 * 1149 * +-----------------------+ <--- stk_base + stk_size 1150 * | string offsets | 1151 * +-----------------------+ <--- stk_offp 1152 * | | 1153 * | STK_AVAIL() space | 1154 * | | 1155 * +-----------------------+ <--- stk_strp 1156 * | strings | 1157 * +-----------------------+ <--- stk_base 1158 * 1159 * When we add a string, we store the string's contents (including the null 1160 * terminator) at stk_strp, and we store the offset of the string relative to 1161 * stk_base at --stk_offp. At strings are added, stk_strp increases and 1162 * stk_offp decreases. The amount of space remaining, STK_AVAIL(), is just 1163 * the difference between these pointers. If we run out of space, we return 1164 * an error and exec_args() starts all over again with a buffer twice as large. 1165 * When we're all done, the kernel buffer looks like this: 1166 * 1167 * +-----------------------+ <--- stk_base + stk_size 1168 * | argv[0] offset | 1169 * +-----------------------+ 1170 * | ... | 1171 * +-----------------------+ 1172 * | argv[argc-1] offset | 1173 * +-----------------------+ 1174 * | envp[0] offset | 1175 * +-----------------------+ 1176 * | ... | 1177 * +-----------------------+ 1178 * | envp[envc-1] offset | 1179 * +-----------------------+ 1180 * | AT_SUN_PLATFORM offset| 1181 * +-----------------------+ 1182 * | AT_SUN_EXECNAME offset| 1183 * +-----------------------+ <--- stk_offp 1184 * | | 1185 * | STK_AVAIL() space | 1186 * | | 1187 * +-----------------------+ <--- stk_strp 1188 * | AT_SUN_EXECNAME offset| 1189 * +-----------------------+ 1190 * | AT_SUN_PLATFORM offset| 1191 * +-----------------------+ 1192 * | envp[envc-1] string | 1193 * +-----------------------+ 1194 * | ... | 1195 * +-----------------------+ 1196 * | envp[0] string | 1197 * +-----------------------+ 1198 * | argv[argc-1] string | 1199 * +-----------------------+ 1200 * | ... | 1201 * +-----------------------+ 1202 * | argv[0] string | 1203 * +-----------------------+ <--- stk_base 1204 */ 1205 1206 #define STK_AVAIL(args) ((char *)(args)->stk_offp - (args)->stk_strp) 1207 1208 /* 1209 * Add a string to the stack. 1210 */ 1211 static int 1212 stk_add(uarg_t *args, const char *sp, enum uio_seg segflg) 1213 { 1214 int error; 1215 size_t len; 1216 1217 if (STK_AVAIL(args) < sizeof (int)) 1218 return (E2BIG); 1219 *--args->stk_offp = args->stk_strp - args->stk_base; 1220 1221 if (segflg == UIO_USERSPACE) { 1222 error = copyinstr(sp, args->stk_strp, STK_AVAIL(args), &len); 1223 if (error != 0) 1224 return (error); 1225 } else { 1226 len = strlen(sp) + 1; 1227 if (len > STK_AVAIL(args)) 1228 return (E2BIG); 1229 bcopy(sp, args->stk_strp, len); 1230 } 1231 1232 args->stk_strp += len; 1233 1234 return (0); 1235 } 1236 1237 static int 1238 stk_getptr(uarg_t *args, char *src, char **dst) 1239 { 1240 int error; 1241 1242 if (args->from_model == DATAMODEL_NATIVE) { 1243 ulong_t ptr; 1244 error = fulword(src, &ptr); 1245 *dst = (caddr_t)ptr; 1246 } else { 1247 uint32_t ptr; 1248 error = fuword32(src, &ptr); 1249 *dst = (caddr_t)(uintptr_t)ptr; 1250 } 1251 return (error); 1252 } 1253 1254 static int 1255 stk_putptr(uarg_t *args, char *addr, char *value) 1256 { 1257 if (args->to_model == DATAMODEL_NATIVE) 1258 return (sulword(addr, (ulong_t)value)); 1259 else 1260 return (suword32(addr, (uint32_t)(uintptr_t)value)); 1261 } 1262 1263 static int 1264 stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) 1265 { 1266 char *sp; 1267 int argc, error; 1268 int argv_empty = 0; 1269 size_t ptrsize = args->from_ptrsize; 1270 size_t size, pad; 1271 char *argv = (char *)uap->argp; 1272 char *envp = (char *)uap->envp; 1273 1274 /* 1275 * Copy interpreter's name and argument to argv[0] and argv[1]. 1276 */ 1277 if (intp != NULL && intp->intp_name != NULL) { 1278 if ((error = stk_add(args, intp->intp_name, UIO_SYSSPACE)) != 0) 1279 return (error); 1280 if (intp->intp_arg != NULL && 1281 (error = stk_add(args, intp->intp_arg, UIO_SYSSPACE)) != 0) 1282 return (error); 1283 if (args->fname != NULL) 1284 error = stk_add(args, args->fname, UIO_SYSSPACE); 1285 else 1286 error = stk_add(args, uap->fname, UIO_USERSPACE); 1287 if (error) 1288 return (error); 1289 1290 /* 1291 * Check for an empty argv[]. 1292 */ 1293 if (stk_getptr(args, argv, &sp)) 1294 return (EFAULT); 1295 if (sp == NULL) 1296 argv_empty = 1; 1297 1298 argv += ptrsize; /* ignore original argv[0] */ 1299 } 1300 1301 if (argv_empty == 0) { 1302 /* 1303 * Add argv[] strings to the stack. 1304 */ 1305 for (;;) { 1306 if (stk_getptr(args, argv, &sp)) 1307 return (EFAULT); 1308 if (sp == NULL) 1309 break; 1310 if ((error = stk_add(args, sp, UIO_USERSPACE)) != 0) 1311 return (error); 1312 argv += ptrsize; 1313 } 1314 } 1315 argc = (int *)(args->stk_base + args->stk_size) - args->stk_offp; 1316 args->arglen = args->stk_strp - args->stk_base; 1317 1318 /* 1319 * Add environ[] strings to the stack. 1320 */ 1321 if (envp != NULL) { 1322 for (;;) { 1323 if (stk_getptr(args, envp, &sp)) 1324 return (EFAULT); 1325 if (sp == NULL) 1326 break; 1327 if ((error = stk_add(args, sp, UIO_USERSPACE)) != 0) 1328 return (error); 1329 envp += ptrsize; 1330 } 1331 } 1332 args->na = (int *)(args->stk_base + args->stk_size) - args->stk_offp; 1333 args->ne = args->na - argc; 1334 1335 /* 1336 * Add AT_SUN_PLATFORM and AT_SUN_EXECNAME strings to the stack. 1337 */ 1338 if (auxvpp != NULL && *auxvpp != NULL) { 1339 if ((error = stk_add(args, platform, UIO_SYSSPACE)) != 0) 1340 return (error); 1341 if ((error = stk_add(args, args->pathname, UIO_SYSSPACE)) != 0) 1342 return (error); 1343 } 1344 1345 /* 1346 * Compute the size of the stack. This includes all the pointers, 1347 * the space reserved for the aux vector, and all the strings. 1348 * The total number of pointers is args->na (which is argc + envc) 1349 * plus 4 more: (1) a pointer's worth of space for argc; (2) the NULL 1350 * after the last argument (i.e. argv[argc]); (3) the NULL after the 1351 * last environment variable (i.e. envp[envc]); and (4) the NULL after 1352 * all the strings, at the very top of the stack. 1353 */ 1354 size = (args->na + 4) * args->to_ptrsize + args->auxsize + 1355 (args->stk_strp - args->stk_base); 1356 1357 /* 1358 * Pad the string section with zeroes to align the stack size. 1359 */ 1360 pad = P2NPHASE(size, args->stk_align); 1361 1362 if (STK_AVAIL(args) < pad) 1363 return (E2BIG); 1364 1365 args->usrstack_size = size + pad; 1366 1367 while (pad-- != 0) 1368 *args->stk_strp++ = 0; 1369 1370 args->nc = args->stk_strp - args->stk_base; 1371 1372 return (0); 1373 } 1374 1375 static int 1376 stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up) 1377 { 1378 size_t ptrsize = args->to_ptrsize; 1379 ssize_t pslen; 1380 char *kstrp = args->stk_base; 1381 char *ustrp = usrstack - args->nc - ptrsize; 1382 char *usp = usrstack - args->usrstack_size; 1383 int *offp = (int *)(args->stk_base + args->stk_size); 1384 int envc = args->ne; 1385 int argc = args->na - envc; 1386 int i; 1387 1388 /* 1389 * Record argc for /proc. 1390 */ 1391 up->u_argc = argc; 1392 1393 /* 1394 * Put argc on the stack. Note that even though it's an int, 1395 * it always consumes ptrsize bytes (for alignment). 1396 */ 1397 if (stk_putptr(args, usp, (char *)(uintptr_t)argc)) 1398 return (-1); 1399 1400 /* 1401 * Add argc space (ptrsize) to usp and record argv for /proc. 1402 */ 1403 up->u_argv = (uintptr_t)(usp += ptrsize); 1404 1405 /* 1406 * Put the argv[] pointers on the stack. 1407 */ 1408 for (i = 0; i < argc; i++, usp += ptrsize) 1409 if (stk_putptr(args, usp, &ustrp[*--offp])) 1410 return (-1); 1411 1412 /* 1413 * Copy arguments to u_psargs. 1414 */ 1415 pslen = MIN(args->arglen, PSARGSZ) - 1; 1416 for (i = 0; i < pslen; i++) 1417 up->u_psargs[i] = (kstrp[i] == '\0' ? ' ' : kstrp[i]); 1418 while (i < PSARGSZ) 1419 up->u_psargs[i++] = '\0'; 1420 1421 /* 1422 * Add space for argv[]'s NULL terminator (ptrsize) to usp and 1423 * record envp for /proc. 1424 */ 1425 up->u_envp = (uintptr_t)(usp += ptrsize); 1426 1427 /* 1428 * Put the envp[] pointers on the stack. 1429 */ 1430 for (i = 0; i < envc; i++, usp += ptrsize) 1431 if (stk_putptr(args, usp, &ustrp[*--offp])) 1432 return (-1); 1433 1434 /* 1435 * Add space for envp[]'s NULL terminator (ptrsize) to usp and 1436 * remember where the stack ends, which is also where auxv begins. 1437 */ 1438 args->stackend = usp += ptrsize; 1439 1440 /* 1441 * Put all the argv[], envp[], and auxv strings on the stack. 1442 */ 1443 if (copyout(args->stk_base, ustrp, args->nc)) 1444 return (-1); 1445 1446 /* 1447 * Fill in the aux vector now that we know the user stack addresses 1448 * for the AT_SUN_PLATFORM and AT_SUN_EXECNAME strings. 1449 */ 1450 if (auxvpp != NULL && *auxvpp != NULL) { 1451 if (args->to_model == DATAMODEL_NATIVE) { 1452 auxv_t **a = (auxv_t **)auxvpp; 1453 ADDAUX(*a, AT_SUN_PLATFORM, (long)&ustrp[*--offp]) 1454 ADDAUX(*a, AT_SUN_EXECNAME, (long)&ustrp[*--offp]) 1455 } else { 1456 auxv32_t **a = (auxv32_t **)auxvpp; 1457 ADDAUX(*a, 1458 AT_SUN_PLATFORM, (int)(uintptr_t)&ustrp[*--offp]) 1459 ADDAUX(*a, 1460 AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp]); 1461 } 1462 } 1463 1464 return (0); 1465 } 1466 1467 #ifdef DEBUG 1468 int mpss_brkpgszsel = 0; 1469 int mpss_stkpgszsel = 0; 1470 #endif 1471 1472 /* 1473 * Initialize a new user stack with the specified arguments and environment. 1474 * The initial user stack layout is as follows: 1475 * 1476 * User Stack 1477 * +---------------+ <--- curproc->p_usrstack 1478 * | NULL | 1479 * +---------------+ 1480 * | | 1481 * | auxv strings | 1482 * | | 1483 * +---------------+ 1484 * | | 1485 * | envp strings | 1486 * | | 1487 * +---------------+ 1488 * | | 1489 * | argv strings | 1490 * | | 1491 * +---------------+ <--- ustrp 1492 * | | 1493 * | aux vector | 1494 * | | 1495 * +---------------+ <--- auxv 1496 * | NULL | 1497 * +---------------+ 1498 * | envp[envc-1] | 1499 * +---------------+ 1500 * | ... | 1501 * +---------------+ 1502 * | envp[0] | 1503 * +---------------+ <--- envp[] 1504 * | NULL | 1505 * +---------------+ 1506 * | argv[argc-1] | 1507 * +---------------+ 1508 * | ... | 1509 * +---------------+ 1510 * | argv[0] | 1511 * +---------------+ <--- argv[] 1512 * | argc | 1513 * +---------------+ <--- stack base 1514 */ 1515 int 1516 exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) 1517 { 1518 size_t size; 1519 int error; 1520 proc_t *p = ttoproc(curthread); 1521 user_t *up = PTOU(p); 1522 char *usrstack; 1523 rctl_entity_p_t e; 1524 1525 struct as *as; 1526 1527 args->from_model = p->p_model; 1528 if (p->p_model == DATAMODEL_NATIVE) { 1529 args->from_ptrsize = sizeof (long); 1530 } else { 1531 args->from_ptrsize = sizeof (int32_t); 1532 } 1533 1534 if (args->to_model == DATAMODEL_NATIVE) { 1535 args->to_ptrsize = sizeof (long); 1536 args->ncargs = NCARGS; 1537 args->stk_align = STACK_ALIGN; 1538 usrstack = (char *)USRSTACK; 1539 } else { 1540 args->to_ptrsize = sizeof (int32_t); 1541 args->ncargs = NCARGS32; 1542 args->stk_align = STACK_ALIGN32; 1543 usrstack = (char *)USRSTACK32; 1544 } 1545 1546 ASSERT(P2PHASE((uintptr_t)usrstack, args->stk_align) == 0); 1547 1548 #if defined(__sparc) 1549 /* 1550 * Make sure user register windows are empty before 1551 * attempting to make a new stack. 1552 */ 1553 (void) flush_user_windows_to_stack(NULL); 1554 #endif 1555 1556 for (size = PAGESIZE; ; size *= 2) { 1557 args->stk_size = size; 1558 args->stk_base = kmem_alloc(size, KM_SLEEP); 1559 args->stk_strp = args->stk_base; 1560 args->stk_offp = (int *)(args->stk_base + size); 1561 error = stk_copyin(uap, args, intp, auxvpp); 1562 if (error == 0) 1563 break; 1564 kmem_free(args->stk_base, size); 1565 if (error != E2BIG && error != ENAMETOOLONG) 1566 return (error); 1567 if (size >= args->ncargs) 1568 return (E2BIG); 1569 } 1570 1571 size = args->usrstack_size; 1572 1573 ASSERT(error == 0); 1574 ASSERT(P2PHASE(size, args->stk_align) == 0); 1575 ASSERT((ssize_t)STK_AVAIL(args) >= 0); 1576 1577 if (size > args->ncargs) { 1578 kmem_free(args->stk_base, args->stk_size); 1579 return (E2BIG); 1580 } 1581 1582 /* 1583 * Leave only the current lwp and force the other lwps to exit. 1584 * If another lwp beat us to the punch by calling exit(), bail out. 1585 */ 1586 if ((error = exitlwps(0)) != 0) { 1587 kmem_free(args->stk_base, args->stk_size); 1588 return (error); 1589 } 1590 1591 /* 1592 * Revoke any doors created by the process. 1593 */ 1594 if (p->p_door_list) 1595 door_exit(); 1596 1597 /* 1598 * Release schedctl data structures. 1599 */ 1600 if (p->p_pagep) 1601 schedctl_proc_cleanup(); 1602 1603 /* 1604 * Clean up any DTrace helpers for the process. 1605 */ 1606 if (p->p_dtrace_helpers != NULL) { 1607 ASSERT(dtrace_helpers_cleanup != NULL); 1608 (*dtrace_helpers_cleanup)(); 1609 } 1610 1611 mutex_enter(&p->p_lock); 1612 /* 1613 * Cleanup the DTrace provider associated with this process. 1614 */ 1615 if (p->p_dtrace_probes) { 1616 ASSERT(dtrace_fasttrap_exec_ptr != NULL); 1617 dtrace_fasttrap_exec_ptr(p); 1618 } 1619 mutex_exit(&p->p_lock); 1620 1621 /* 1622 * discard the lwpchan cache. 1623 */ 1624 if (p->p_lcp != NULL) 1625 lwpchan_destroy_cache(1); 1626 1627 /* 1628 * Delete the POSIX timers. 1629 */ 1630 if (p->p_itimer != NULL) 1631 timer_exit(); 1632 1633 #ifdef C2_AUDIT 1634 if (audit_active) 1635 audit_exec(args->stk_base, args->stk_base + args->arglen, 1636 args->na - args->ne, args->ne); 1637 #endif 1638 1639 /* 1640 * Ensure that we don't change resource associations while we 1641 * change address spaces. 1642 */ 1643 mutex_enter(&p->p_lock); 1644 pool_barrier_enter(); 1645 mutex_exit(&p->p_lock); 1646 1647 /* 1648 * Destroy the old address space and create a new one. 1649 * From here on, any errors are fatal to the exec()ing process. 1650 * On error we return -1, which means the caller must SIGKILL 1651 * the process. 1652 */ 1653 relvm(); 1654 1655 mutex_enter(&p->p_lock); 1656 pool_barrier_exit(); 1657 mutex_exit(&p->p_lock); 1658 1659 up->u_execsw = args->execswp; 1660 1661 p->p_brkbase = NULL; 1662 p->p_brksize = 0; 1663 p->p_stksize = 0; 1664 p->p_model = args->to_model; 1665 p->p_usrstack = usrstack; 1666 p->p_stkprot = args->stk_prot; 1667 p->p_datprot = args->dat_prot; 1668 1669 /* 1670 * Reset resource controls such that all controls are again active as 1671 * well as appropriate to the potentially new address model for the 1672 * process. 1673 */ 1674 e.rcep_p.proc = p; 1675 e.rcep_t = RCENTITY_PROCESS; 1676 rctl_set_reset(p->p_rctls, p, &e); 1677 1678 if (exec_lpg_disable == 0) { 1679 #ifdef DEBUG 1680 uint_t pgsizes = page_num_pagesizes(); 1681 uint_t szc; 1682 #endif 1683 p->p_brkpageszc = args->brkpageszc; 1684 p->p_stkpageszc = args->stkpageszc; 1685 1686 if (p->p_brkpageszc == 0) { 1687 p->p_brkpageszc = page_szc(map_pgsz(MAPPGSZ_HEAP, 1688 p, 0, 0, NULL)); 1689 } 1690 if (p->p_stkpageszc == 0) { 1691 p->p_stkpageszc = page_szc(map_pgsz(MAPPGSZ_STK, 1692 p, 0, 0, NULL)); 1693 } 1694 1695 #ifdef DEBUG 1696 if (mpss_brkpgszsel != 0) { 1697 if (mpss_brkpgszsel == -1) { 1698 szc = ((uint_t)gethrtime() >> 8) % pgsizes; 1699 } else { 1700 szc = mpss_brkpgszsel % pgsizes; 1701 } 1702 p->p_brkpageszc = szc; 1703 } 1704 1705 if (mpss_stkpgszsel != 0) { 1706 if (mpss_stkpgszsel == -1) { 1707 szc = ((uint_t)gethrtime() >> 7) % pgsizes; 1708 } else { 1709 szc = mpss_stkpgszsel % pgsizes; 1710 } 1711 p->p_stkpageszc = szc; 1712 } 1713 1714 #endif 1715 mutex_enter(&p->p_lock); 1716 p->p_flag |= SAUTOLPG; /* kernel controls page sizes */ 1717 mutex_exit(&p->p_lock); 1718 1719 } else { 1720 p->p_brkpageszc = 0; 1721 p->p_stkpageszc = 0; 1722 } 1723 1724 exec_set_sp(size); 1725 1726 as = as_alloc(); 1727 p->p_as = as; 1728 if (p->p_model == DATAMODEL_ILP32) 1729 as->a_userlimit = (caddr_t)USERLIMIT32; 1730 (void) hat_setup(as->a_hat, HAT_ALLOC); 1731 1732 /* 1733 * Finally, write out the contents of the new stack. 1734 */ 1735 error = stk_copyout(args, usrstack, auxvpp, up); 1736 kmem_free(args->stk_base, args->stk_size); 1737 return (error); 1738 } 1739