1 /*- 2 * Copyright (c) 1993, David Greenman 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_capsicum.h" 31 #include "opt_hwpmc_hooks.h" 32 #include "opt_kdtrace.h" 33 #include "opt_ktrace.h" 34 #include "opt_vm.h" 35 36 #include <sys/param.h> 37 #include <sys/capability.h> 38 #include <sys/systm.h> 39 #include <sys/capability.h> 40 #include <sys/eventhandler.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/sysproto.h> 44 #include <sys/signalvar.h> 45 #include <sys/kernel.h> 46 #include <sys/mount.h> 47 #include <sys/filedesc.h> 48 #include <sys/fcntl.h> 49 #include <sys/acct.h> 50 #include <sys/exec.h> 51 #include <sys/imgact.h> 52 #include <sys/imgact_elf.h> 53 #include <sys/wait.h> 54 #include <sys/malloc.h> 55 #include <sys/priv.h> 56 #include <sys/proc.h> 57 #include <sys/pioctl.h> 58 #include <sys/namei.h> 59 #include <sys/resourcevar.h> 60 #include <sys/sched.h> 61 #include <sys/sdt.h> 62 #include <sys/sf_buf.h> 63 #include <sys/syscallsubr.h> 64 #include <sys/sysent.h> 65 #include <sys/shm.h> 66 #include <sys/sysctl.h> 67 #include <sys/vnode.h> 68 #include <sys/stat.h> 69 #ifdef KTRACE 70 #include <sys/ktrace.h> 71 #endif 72 73 #include <vm/vm.h> 74 #include <vm/vm_param.h> 75 #include <vm/pmap.h> 76 #include <vm/vm_page.h> 77 #include <vm/vm_map.h> 78 #include <vm/vm_kern.h> 79 #include <vm/vm_extern.h> 80 #include <vm/vm_object.h> 81 #include <vm/vm_pager.h> 82 83 #ifdef HWPMC_HOOKS 84 #include <sys/pmckern.h> 85 #endif 86 87 #include <machine/reg.h> 88 89 #include <security/audit/audit.h> 90 #include <security/mac/mac_framework.h> 91 92 #ifdef KDTRACE_HOOKS 93 #include <sys/dtrace_bsd.h> 94 dtrace_execexit_func_t dtrace_fasttrap_exec; 95 #endif 96 97 SDT_PROVIDER_DECLARE(proc); 98 SDT_PROBE_DEFINE(proc, kernel, , exec, exec); 99 SDT_PROBE_ARGTYPE(proc, kernel, , exec, 0, "char *"); 100 SDT_PROBE_DEFINE(proc, kernel, , exec_failure, exec-failure); 101 SDT_PROBE_ARGTYPE(proc, kernel, , exec_failure, 0, "int"); 102 SDT_PROBE_DEFINE(proc, kernel, , exec_success, exec-success); 103 SDT_PROBE_ARGTYPE(proc, kernel, , exec_success, 0, "char *"); 104 105 MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); 106 107 static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS); 108 static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS); 109 static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS); 110 static int do_execve(struct thread *td, struct image_args *args, 111 struct mac *mac_p); 112 113 /* XXX This should be vm_size_t. */ 114 SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD, 115 NULL, 0, sysctl_kern_ps_strings, "LU", ""); 116 117 /* XXX This should be vm_size_t. */ 118 SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD| 119 CTLFLAG_CAPRD, NULL, 0, sysctl_kern_usrstack, "LU", ""); 120 121 SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD, 122 NULL, 0, sysctl_kern_stackprot, "I", ""); 123 124 u_long ps_arg_cache_limit = PAGE_SIZE / 16; 125 SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, 126 &ps_arg_cache_limit, 0, ""); 127 128 static int map_at_zero = 0; 129 TUNABLE_INT("security.bsd.map_at_zero", &map_at_zero); 130 SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RW, &map_at_zero, 0, 131 "Permit processes to map an object at virtual address 0."); 132 133 static int 134 sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS) 135 { 136 struct proc *p; 137 int error; 138 139 p = curproc; 140 #ifdef SCTL_MASK32 141 if (req->flags & SCTL_MASK32) { 142 unsigned int val; 143 val = (unsigned int)p->p_sysent->sv_psstrings; 144 error = SYSCTL_OUT(req, &val, sizeof(val)); 145 } else 146 #endif 147 error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings, 148 sizeof(p->p_sysent->sv_psstrings)); 149 return error; 150 } 151 152 static int 153 sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) 154 { 155 struct proc *p; 156 int error; 157 158 p = curproc; 159 #ifdef SCTL_MASK32 160 if (req->flags & SCTL_MASK32) { 161 unsigned int val; 162 val = (unsigned int)p->p_sysent->sv_usrstack; 163 error = SYSCTL_OUT(req, &val, sizeof(val)); 164 } else 165 #endif 166 error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack, 167 sizeof(p->p_sysent->sv_usrstack)); 168 return error; 169 } 170 171 static int 172 sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS) 173 { 174 struct proc *p; 175 176 p = curproc; 177 return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot, 178 sizeof(p->p_sysent->sv_stackprot))); 179 } 180 181 /* 182 * Each of the items is a pointer to a `const struct execsw', hence the 183 * double pointer here. 184 */ 185 static const struct execsw **execsw; 186 187 #ifndef _SYS_SYSPROTO_H_ 188 struct execve_args { 189 char *fname; 190 char **argv; 191 char **envv; 192 }; 193 #endif 194 195 int 196 sys_execve(td, uap) 197 struct thread *td; 198 struct execve_args /* { 199 char *fname; 200 char **argv; 201 char **envv; 202 } */ *uap; 203 { 204 int error; 205 struct image_args args; 206 207 error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, 208 uap->argv, uap->envv); 209 if (error == 0) 210 error = kern_execve(td, &args, NULL); 211 return (error); 212 } 213 214 #ifndef _SYS_SYSPROTO_H_ 215 struct fexecve_args { 216 int fd; 217 char **argv; 218 char **envv; 219 } 220 #endif 221 int 222 sys_fexecve(struct thread *td, struct fexecve_args *uap) 223 { 224 int error; 225 struct image_args args; 226 227 error = exec_copyin_args(&args, NULL, UIO_SYSSPACE, 228 uap->argv, uap->envv); 229 if (error == 0) { 230 args.fd = uap->fd; 231 error = kern_execve(td, &args, NULL); 232 } 233 return (error); 234 } 235 236 #ifndef _SYS_SYSPROTO_H_ 237 struct __mac_execve_args { 238 char *fname; 239 char **argv; 240 char **envv; 241 struct mac *mac_p; 242 }; 243 #endif 244 245 int 246 sys___mac_execve(td, uap) 247 struct thread *td; 248 struct __mac_execve_args /* { 249 char *fname; 250 char **argv; 251 char **envv; 252 struct mac *mac_p; 253 } */ *uap; 254 { 255 #ifdef MAC 256 int error; 257 struct image_args args; 258 259 error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, 260 uap->argv, uap->envv); 261 if (error == 0) 262 error = kern_execve(td, &args, uap->mac_p); 263 return (error); 264 #else 265 return (ENOSYS); 266 #endif 267 } 268 269 /* 270 * XXX: kern_execve has the astonishing property of not always returning to 271 * the caller. If sufficiently bad things happen during the call to 272 * do_execve(), it can end up calling exit1(); as a result, callers must 273 * avoid doing anything which they might need to undo (e.g., allocating 274 * memory). 275 */ 276 int 277 kern_execve(td, args, mac_p) 278 struct thread *td; 279 struct image_args *args; 280 struct mac *mac_p; 281 { 282 struct proc *p = td->td_proc; 283 int error; 284 285 AUDIT_ARG_ARGV(args->begin_argv, args->argc, 286 args->begin_envv - args->begin_argv); 287 AUDIT_ARG_ENVV(args->begin_envv, args->envc, 288 args->endp - args->begin_envv); 289 if (p->p_flag & P_HADTHREADS) { 290 PROC_LOCK(p); 291 if (thread_single(SINGLE_BOUNDARY)) { 292 PROC_UNLOCK(p); 293 exec_free_args(args); 294 return (ERESTART); /* Try again later. */ 295 } 296 PROC_UNLOCK(p); 297 } 298 299 error = do_execve(td, args, mac_p); 300 301 if (p->p_flag & P_HADTHREADS) { 302 PROC_LOCK(p); 303 /* 304 * If success, we upgrade to SINGLE_EXIT state to 305 * force other threads to suicide. 306 */ 307 if (error == 0) 308 thread_single(SINGLE_EXIT); 309 else 310 thread_single_end(); 311 PROC_UNLOCK(p); 312 } 313 314 return (error); 315 } 316 317 /* 318 * In-kernel implementation of execve(). All arguments are assumed to be 319 * userspace pointers from the passed thread. 320 */ 321 static int 322 do_execve(td, args, mac_p) 323 struct thread *td; 324 struct image_args *args; 325 struct mac *mac_p; 326 { 327 struct proc *p = td->td_proc; 328 struct nameidata nd; 329 struct ucred *newcred = NULL, *oldcred; 330 struct uidinfo *euip; 331 register_t *stack_base; 332 int error, i; 333 struct image_params image_params, *imgp; 334 struct vattr attr; 335 int (*img_first)(struct image_params *); 336 struct pargs *oldargs = NULL, *newargs = NULL; 337 struct sigacts *oldsigacts, *newsigacts; 338 #ifdef KTRACE 339 struct vnode *tracevp = NULL; 340 struct ucred *tracecred = NULL; 341 #endif 342 struct vnode *textvp = NULL, *binvp = NULL; 343 int credential_changing; 344 int vfslocked; 345 int textset; 346 #ifdef MAC 347 struct label *interpvplabel = NULL; 348 int will_transition; 349 #endif 350 #ifdef HWPMC_HOOKS 351 struct pmckern_procexec pe; 352 #endif 353 static const char fexecv_proc_title[] = "(fexecv)"; 354 355 vfslocked = 0; 356 imgp = &image_params; 357 358 /* 359 * Lock the process and set the P_INEXEC flag to indicate that 360 * it should be left alone until we're done here. This is 361 * necessary to avoid race conditions - e.g. in ptrace() - 362 * that might allow a local user to illicitly obtain elevated 363 * privileges. 364 */ 365 PROC_LOCK(p); 366 KASSERT((p->p_flag & P_INEXEC) == 0, 367 ("%s(): process already has P_INEXEC flag", __func__)); 368 p->p_flag |= P_INEXEC; 369 PROC_UNLOCK(p); 370 371 /* 372 * Initialize part of the common data 373 */ 374 imgp->proc = p; 375 imgp->execlabel = NULL; 376 imgp->attr = &attr; 377 imgp->entry_addr = 0; 378 imgp->reloc_base = 0; 379 imgp->vmspace_destroyed = 0; 380 imgp->interpreted = 0; 381 imgp->opened = 0; 382 imgp->interpreter_name = NULL; 383 imgp->auxargs = NULL; 384 imgp->vp = NULL; 385 imgp->object = NULL; 386 imgp->firstpage = NULL; 387 imgp->ps_strings = 0; 388 imgp->auxarg_size = 0; 389 imgp->args = args; 390 imgp->execpath = imgp->freepath = NULL; 391 imgp->execpathp = 0; 392 imgp->canary = 0; 393 imgp->canarylen = 0; 394 imgp->pagesizes = 0; 395 imgp->pagesizeslen = 0; 396 imgp->stack_prot = 0; 397 398 #ifdef MAC 399 error = mac_execve_enter(imgp, mac_p); 400 if (error) 401 goto exec_fail; 402 #endif 403 404 imgp->image_header = NULL; 405 406 /* 407 * Translate the file name. namei() returns a vnode pointer 408 * in ni_vp amoung other things. 409 * 410 * XXXAUDIT: It would be desirable to also audit the name of the 411 * interpreter if this is an interpreted binary. 412 */ 413 if (args->fname != NULL) { 414 NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME 415 | MPSAFE | AUDITVNODE1, UIO_SYSSPACE, args->fname, td); 416 } 417 418 SDT_PROBE(proc, kernel, , exec, args->fname, 0, 0, 0, 0 ); 419 420 interpret: 421 if (args->fname != NULL) { 422 #ifdef CAPABILITY_MODE 423 /* 424 * While capability mode can't reach this point via direct 425 * path arguments to execve(), we also don't allow 426 * interpreters to be used in capability mode (for now). 427 * Catch indirect lookups and return a permissions error. 428 */ 429 if (IN_CAPABILITY_MODE(td)) { 430 error = ECAPMODE; 431 goto exec_fail; 432 } 433 #endif 434 error = namei(&nd); 435 if (error) 436 goto exec_fail; 437 438 vfslocked = NDHASGIANT(&nd); 439 binvp = nd.ni_vp; 440 imgp->vp = binvp; 441 } else { 442 AUDIT_ARG_FD(args->fd); 443 /* 444 * Some might argue that CAP_READ and/or CAP_MMAP should also 445 * be required here; such arguments will be entertained. 446 */ 447 error = fgetvp_read(td, args->fd, CAP_FEXECVE, &binvp); 448 if (error) 449 goto exec_fail; 450 vfslocked = VFS_LOCK_GIANT(binvp->v_mount); 451 vn_lock(binvp, LK_EXCLUSIVE | LK_RETRY); 452 AUDIT_ARG_VNODE1(binvp); 453 imgp->vp = binvp; 454 } 455 456 /* 457 * Check file permissions (also 'opens' file) 458 */ 459 error = exec_check_permissions(imgp); 460 if (error) 461 goto exec_fail_dealloc; 462 463 imgp->object = imgp->vp->v_object; 464 if (imgp->object != NULL) 465 vm_object_reference(imgp->object); 466 467 /* 468 * Set VV_TEXT now so no one can write to the executable while we're 469 * activating it. 470 * 471 * Remember if this was set before and unset it in case this is not 472 * actually an executable image. 473 */ 474 textset = imgp->vp->v_vflag & VV_TEXT; 475 ASSERT_VOP_ELOCKED(imgp->vp, "vv_text"); 476 imgp->vp->v_vflag |= VV_TEXT; 477 478 error = exec_map_first_page(imgp); 479 if (error) 480 goto exec_fail_dealloc; 481 482 imgp->proc->p_osrel = 0; 483 /* 484 * If the current process has a special image activator it 485 * wants to try first, call it. For example, emulating shell 486 * scripts differently. 487 */ 488 error = -1; 489 if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) 490 error = img_first(imgp); 491 492 /* 493 * Loop through the list of image activators, calling each one. 494 * An activator returns -1 if there is no match, 0 on success, 495 * and an error otherwise. 496 */ 497 for (i = 0; error == -1 && execsw[i]; ++i) { 498 if (execsw[i]->ex_imgact == NULL || 499 execsw[i]->ex_imgact == img_first) { 500 continue; 501 } 502 error = (*execsw[i]->ex_imgact)(imgp); 503 } 504 505 if (error) { 506 if (error == -1) { 507 if (textset == 0) { 508 ASSERT_VOP_ELOCKED(imgp->vp, "vv_text"); 509 imgp->vp->v_vflag &= ~VV_TEXT; 510 } 511 error = ENOEXEC; 512 } 513 goto exec_fail_dealloc; 514 } 515 516 /* 517 * Special interpreter operation, cleanup and loop up to try to 518 * activate the interpreter. 519 */ 520 if (imgp->interpreted) { 521 exec_unmap_first_page(imgp); 522 /* 523 * VV_TEXT needs to be unset for scripts. There is a short 524 * period before we determine that something is a script where 525 * VV_TEXT will be set. The vnode lock is held over this 526 * entire period so nothing should illegitimately be blocked. 527 */ 528 imgp->vp->v_vflag &= ~VV_TEXT; 529 /* free name buffer and old vnode */ 530 if (args->fname != NULL) 531 NDFREE(&nd, NDF_ONLY_PNBUF); 532 #ifdef MAC 533 mac_execve_interpreter_enter(binvp, &interpvplabel); 534 #endif 535 if (imgp->opened) { 536 VOP_CLOSE(binvp, FREAD, td->td_ucred, td); 537 imgp->opened = 0; 538 } 539 vput(binvp); 540 vm_object_deallocate(imgp->object); 541 imgp->object = NULL; 542 VFS_UNLOCK_GIANT(vfslocked); 543 vfslocked = 0; 544 /* set new name to that of the interpreter */ 545 NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME | MPSAFE, 546 UIO_SYSSPACE, imgp->interpreter_name, td); 547 args->fname = imgp->interpreter_name; 548 goto interpret; 549 } 550 551 /* 552 * NB: We unlock the vnode here because it is believed that none 553 * of the sv_copyout_strings/sv_fixup operations require the vnode. 554 */ 555 VOP_UNLOCK(imgp->vp, 0); 556 557 /* 558 * Do the best to calculate the full path to the image file. 559 */ 560 if (imgp->auxargs != NULL && 561 ((args->fname != NULL && args->fname[0] == '/') || 562 vn_fullpath(td, imgp->vp, &imgp->execpath, &imgp->freepath) != 0)) 563 imgp->execpath = args->fname; 564 565 /* 566 * Copy out strings (args and env) and initialize stack base 567 */ 568 if (p->p_sysent->sv_copyout_strings) 569 stack_base = (*p->p_sysent->sv_copyout_strings)(imgp); 570 else 571 stack_base = exec_copyout_strings(imgp); 572 573 /* 574 * If custom stack fixup routine present for this process 575 * let it do the stack setup. 576 * Else stuff argument count as first item on stack 577 */ 578 if (p->p_sysent->sv_fixup != NULL) 579 (*p->p_sysent->sv_fixup)(&stack_base, imgp); 580 else 581 suword(--stack_base, imgp->args->argc); 582 583 /* 584 * For security and other reasons, the file descriptor table cannot 585 * be shared after an exec. 586 */ 587 fdunshare(p, td); 588 589 /* 590 * Malloc things before we need locks. 591 */ 592 newcred = crget(); 593 euip = uifind(attr.va_uid); 594 i = imgp->args->begin_envv - imgp->args->begin_argv; 595 /* Cache arguments if they fit inside our allowance */ 596 if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { 597 newargs = pargs_alloc(i); 598 bcopy(imgp->args->begin_argv, newargs->ar_args, i); 599 } 600 601 /* close files on exec */ 602 fdcloseexec(td); 603 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 604 605 /* Get a reference to the vnode prior to locking the proc */ 606 VREF(binvp); 607 608 /* 609 * For security and other reasons, signal handlers cannot 610 * be shared after an exec. The new process gets a copy of the old 611 * handlers. In execsigs(), the new process will have its signals 612 * reset. 613 */ 614 PROC_LOCK(p); 615 oldcred = crcopysafe(p, newcred); 616 if (sigacts_shared(p->p_sigacts)) { 617 oldsigacts = p->p_sigacts; 618 PROC_UNLOCK(p); 619 newsigacts = sigacts_alloc(); 620 sigacts_copy(newsigacts, oldsigacts); 621 PROC_LOCK(p); 622 p->p_sigacts = newsigacts; 623 } else 624 oldsigacts = NULL; 625 626 /* Stop profiling */ 627 stopprofclock(p); 628 629 /* reset caught signals */ 630 execsigs(p); 631 632 /* name this process - nameiexec(p, ndp) */ 633 bzero(p->p_comm, sizeof(p->p_comm)); 634 if (args->fname) 635 bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, 636 min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); 637 else if (vn_commname(binvp, p->p_comm, sizeof(p->p_comm)) != 0) 638 bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); 639 bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); 640 #ifdef KTR 641 sched_clear_tdname(td); 642 #endif 643 644 /* 645 * mark as execed, wakeup the process that vforked (if any) and tell 646 * it that it now has its own resources back 647 */ 648 p->p_flag |= P_EXEC; 649 if (p->p_pptr && (p->p_flag & P_PPWAIT)) { 650 p->p_flag &= ~P_PPWAIT; 651 cv_broadcast(&p->p_pwait); 652 } 653 654 /* 655 * Implement image setuid/setgid. 656 * 657 * Don't honor setuid/setgid if the filesystem prohibits it or if 658 * the process is being traced. 659 * 660 * We disable setuid/setgid/etc in compatibility mode on the basis 661 * that most setugid applications are not written with that 662 * environment in mind, and will therefore almost certainly operate 663 * incorrectly. In principle there's no reason that setugid 664 * applications might not be useful in capability mode, so we may want 665 * to reconsider this conservative design choice in the future. 666 * 667 * XXXMAC: For the time being, use NOSUID to also prohibit 668 * transitions on the file system. 669 */ 670 credential_changing = 0; 671 credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != 672 attr.va_uid; 673 credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != 674 attr.va_gid; 675 #ifdef MAC 676 will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, 677 interpvplabel, imgp); 678 credential_changing |= will_transition; 679 #endif 680 681 if (credential_changing && 682 #ifdef CAPABILITY_MODE 683 ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && 684 #endif 685 (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && 686 (p->p_flag & P_TRACED) == 0) { 687 /* 688 * Turn off syscall tracing for set-id programs, except for 689 * root. Record any set-id flags first to make sure that 690 * we do not regain any tracing during a possible block. 691 */ 692 setsugid(p); 693 694 #ifdef KTRACE 695 if (priv_check_cred(oldcred, PRIV_DEBUG_DIFFCRED, 0)) 696 ktrprocexec(p, &tracecred, &tracevp); 697 #endif 698 /* 699 * Close any file descriptors 0..2 that reference procfs, 700 * then make sure file descriptors 0..2 are in use. 701 * 702 * setugidsafety() may call closef() and then pfind() 703 * which may grab the process lock. 704 * fdcheckstd() may call falloc() which may block to 705 * allocate memory, so temporarily drop the process lock. 706 */ 707 PROC_UNLOCK(p); 708 VOP_UNLOCK(imgp->vp, 0); 709 setugidsafety(td); 710 error = fdcheckstd(td); 711 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 712 if (error != 0) 713 goto done1; 714 PROC_LOCK(p); 715 /* 716 * Set the new credentials. 717 */ 718 if (attr.va_mode & S_ISUID) 719 change_euid(newcred, euip); 720 if (attr.va_mode & S_ISGID) 721 change_egid(newcred, attr.va_gid); 722 #ifdef MAC 723 if (will_transition) { 724 mac_vnode_execve_transition(oldcred, newcred, imgp->vp, 725 interpvplabel, imgp); 726 } 727 #endif 728 /* 729 * Implement correct POSIX saved-id behavior. 730 * 731 * XXXMAC: Note that the current logic will save the 732 * uid and gid if a MAC domain transition occurs, even 733 * though maybe it shouldn't. 734 */ 735 change_svuid(newcred, newcred->cr_uid); 736 change_svgid(newcred, newcred->cr_gid); 737 p->p_ucred = newcred; 738 newcred = NULL; 739 } else { 740 if (oldcred->cr_uid == oldcred->cr_ruid && 741 oldcred->cr_gid == oldcred->cr_rgid) 742 p->p_flag &= ~P_SUGID; 743 /* 744 * Implement correct POSIX saved-id behavior. 745 * 746 * XXX: It's not clear that the existing behavior is 747 * POSIX-compliant. A number of sources indicate that the 748 * saved uid/gid should only be updated if the new ruid is 749 * not equal to the old ruid, or the new euid is not equal 750 * to the old euid and the new euid is not equal to the old 751 * ruid. The FreeBSD code always updates the saved uid/gid. 752 * Also, this code uses the new (replaced) euid and egid as 753 * the source, which may or may not be the right ones to use. 754 */ 755 if (oldcred->cr_svuid != oldcred->cr_uid || 756 oldcred->cr_svgid != oldcred->cr_gid) { 757 change_svuid(newcred, newcred->cr_uid); 758 change_svgid(newcred, newcred->cr_gid); 759 p->p_ucred = newcred; 760 newcred = NULL; 761 } 762 } 763 764 /* 765 * Store the vp for use in procfs. This vnode was referenced prior 766 * to locking the proc lock. 767 */ 768 textvp = p->p_textvp; 769 p->p_textvp = binvp; 770 771 #ifdef KDTRACE_HOOKS 772 /* 773 * Tell the DTrace fasttrap provider about the exec if it 774 * has declared an interest. 775 */ 776 if (dtrace_fasttrap_exec) 777 dtrace_fasttrap_exec(p); 778 #endif 779 780 /* 781 * Notify others that we exec'd, and clear the P_INEXEC flag 782 * as we're now a bona fide freshly-execed process. 783 */ 784 KNOTE_LOCKED(&p->p_klist, NOTE_EXEC); 785 p->p_flag &= ~P_INEXEC; 786 787 /* clear "fork but no exec" flag, as we _are_ execing */ 788 p->p_acflag &= ~AFORK; 789 790 /* 791 * Free any previous argument cache and replace it with 792 * the new argument cache, if any. 793 */ 794 oldargs = p->p_args; 795 p->p_args = newargs; 796 newargs = NULL; 797 798 #ifdef HWPMC_HOOKS 799 /* 800 * Check if system-wide sampling is in effect or if the 801 * current process is using PMCs. If so, do exec() time 802 * processing. This processing needs to happen AFTER the 803 * P_INEXEC flag is cleared. 804 * 805 * The proc lock needs to be released before taking the PMC 806 * SX. 807 */ 808 if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) { 809 PROC_UNLOCK(p); 810 VOP_UNLOCK(imgp->vp, 0); 811 pe.pm_credentialschanged = credential_changing; 812 pe.pm_entryaddr = imgp->entry_addr; 813 814 PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe); 815 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 816 } else 817 PROC_UNLOCK(p); 818 #else /* !HWPMC_HOOKS */ 819 PROC_UNLOCK(p); 820 #endif 821 822 /* Set values passed into the program in registers. */ 823 if (p->p_sysent->sv_setregs) 824 (*p->p_sysent->sv_setregs)(td, imgp, 825 (u_long)(uintptr_t)stack_base); 826 else 827 exec_setregs(td, imgp, (u_long)(uintptr_t)stack_base); 828 829 vfs_mark_atime(imgp->vp, td->td_ucred); 830 831 SDT_PROBE(proc, kernel, , exec_success, args->fname, 0, 0, 0, 0); 832 833 done1: 834 /* 835 * Free any resources malloc'd earlier that we didn't use. 836 */ 837 uifree(euip); 838 if (newcred == NULL) 839 crfree(oldcred); 840 else 841 crfree(newcred); 842 VOP_UNLOCK(imgp->vp, 0); 843 844 /* 845 * Handle deferred decrement of ref counts. 846 */ 847 if (textvp != NULL) { 848 int tvfslocked; 849 850 tvfslocked = VFS_LOCK_GIANT(textvp->v_mount); 851 vrele(textvp); 852 VFS_UNLOCK_GIANT(tvfslocked); 853 } 854 if (binvp && error != 0) 855 vrele(binvp); 856 #ifdef KTRACE 857 if (tracevp != NULL) { 858 int tvfslocked; 859 860 tvfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 861 vrele(tracevp); 862 VFS_UNLOCK_GIANT(tvfslocked); 863 } 864 if (tracecred != NULL) 865 crfree(tracecred); 866 #endif 867 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 868 pargs_drop(oldargs); 869 pargs_drop(newargs); 870 if (oldsigacts != NULL) 871 sigacts_free(oldsigacts); 872 873 exec_fail_dealloc: 874 875 /* 876 * free various allocated resources 877 */ 878 if (imgp->firstpage != NULL) 879 exec_unmap_first_page(imgp); 880 881 if (imgp->vp != NULL) { 882 if (args->fname) 883 NDFREE(&nd, NDF_ONLY_PNBUF); 884 if (imgp->opened) 885 VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td); 886 vput(imgp->vp); 887 } 888 889 if (imgp->object != NULL) 890 vm_object_deallocate(imgp->object); 891 892 free(imgp->freepath, M_TEMP); 893 894 if (error == 0) { 895 PROC_LOCK(p); 896 td->td_dbgflags |= TDB_EXEC; 897 PROC_UNLOCK(p); 898 899 /* 900 * Stop the process here if its stop event mask has 901 * the S_EXEC bit set. 902 */ 903 STOPEVENT(p, S_EXEC, 0); 904 goto done2; 905 } 906 907 exec_fail: 908 /* we're done here, clear P_INEXEC */ 909 PROC_LOCK(p); 910 p->p_flag &= ~P_INEXEC; 911 PROC_UNLOCK(p); 912 913 SDT_PROBE(proc, kernel, , exec_failure, error, 0, 0, 0, 0); 914 915 done2: 916 #ifdef MAC 917 mac_execve_exit(imgp); 918 mac_execve_interpreter_exit(interpvplabel); 919 #endif 920 VFS_UNLOCK_GIANT(vfslocked); 921 exec_free_args(args); 922 923 if (error && imgp->vmspace_destroyed) { 924 /* sorry, no more process anymore. exit gracefully */ 925 exit1(td, W_EXITCODE(0, SIGABRT)); 926 /* NOT REACHED */ 927 } 928 929 #ifdef KTRACE 930 if (error == 0) 931 ktrprocctor(p); 932 #endif 933 934 return (error); 935 } 936 937 int 938 exec_map_first_page(imgp) 939 struct image_params *imgp; 940 { 941 int rv, i; 942 int initial_pagein; 943 vm_page_t ma[VM_INITIAL_PAGEIN]; 944 vm_object_t object; 945 946 if (imgp->firstpage != NULL) 947 exec_unmap_first_page(imgp); 948 949 object = imgp->vp->v_object; 950 if (object == NULL) 951 return (EACCES); 952 VM_OBJECT_LOCK(object); 953 #if VM_NRESERVLEVEL > 0 954 if ((object->flags & OBJ_COLORED) == 0) { 955 object->flags |= OBJ_COLORED; 956 object->pg_color = 0; 957 } 958 #endif 959 ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 960 if (ma[0]->valid != VM_PAGE_BITS_ALL) { 961 initial_pagein = VM_INITIAL_PAGEIN; 962 if (initial_pagein > object->size) 963 initial_pagein = object->size; 964 for (i = 1; i < initial_pagein; i++) { 965 if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { 966 if (ma[i]->valid) 967 break; 968 if ((ma[i]->oflags & VPO_BUSY) || ma[i]->busy) 969 break; 970 vm_page_busy(ma[i]); 971 } else { 972 ma[i] = vm_page_alloc(object, i, 973 VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED); 974 if (ma[i] == NULL) 975 break; 976 } 977 } 978 initial_pagein = i; 979 rv = vm_pager_get_pages(object, ma, initial_pagein, 0); 980 ma[0] = vm_page_lookup(object, 0); 981 if ((rv != VM_PAGER_OK) || (ma[0] == NULL)) { 982 if (ma[0] != NULL) { 983 vm_page_lock(ma[0]); 984 vm_page_free(ma[0]); 985 vm_page_unlock(ma[0]); 986 } 987 VM_OBJECT_UNLOCK(object); 988 return (EIO); 989 } 990 } 991 vm_page_lock(ma[0]); 992 vm_page_hold(ma[0]); 993 vm_page_unlock(ma[0]); 994 vm_page_wakeup(ma[0]); 995 VM_OBJECT_UNLOCK(object); 996 997 imgp->firstpage = sf_buf_alloc(ma[0], 0); 998 imgp->image_header = (char *)sf_buf_kva(imgp->firstpage); 999 1000 return (0); 1001 } 1002 1003 void 1004 exec_unmap_first_page(imgp) 1005 struct image_params *imgp; 1006 { 1007 vm_page_t m; 1008 1009 if (imgp->firstpage != NULL) { 1010 m = sf_buf_page(imgp->firstpage); 1011 sf_buf_free(imgp->firstpage); 1012 imgp->firstpage = NULL; 1013 vm_page_lock(m); 1014 vm_page_unhold(m); 1015 vm_page_unlock(m); 1016 } 1017 } 1018 1019 /* 1020 * Destroy old address space, and allocate a new stack 1021 * The new stack is only SGROWSIZ large because it is grown 1022 * automatically in trap.c. 1023 */ 1024 int 1025 exec_new_vmspace(imgp, sv) 1026 struct image_params *imgp; 1027 struct sysentvec *sv; 1028 { 1029 int error; 1030 struct proc *p = imgp->proc; 1031 struct vmspace *vmspace = p->p_vmspace; 1032 vm_object_t obj; 1033 vm_offset_t sv_minuser, stack_addr; 1034 vm_map_t map; 1035 u_long ssiz; 1036 1037 imgp->vmspace_destroyed = 1; 1038 imgp->sysent = sv; 1039 1040 /* May be called with Giant held */ 1041 EVENTHANDLER_INVOKE(process_exec, p, imgp); 1042 1043 /* 1044 * Blow away entire process VM, if address space not shared, 1045 * otherwise, create a new VM space so that other threads are 1046 * not disrupted 1047 */ 1048 map = &vmspace->vm_map; 1049 if (map_at_zero) 1050 sv_minuser = sv->sv_minuser; 1051 else 1052 sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE); 1053 if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser && 1054 vm_map_max(map) == sv->sv_maxuser) { 1055 shmexit(vmspace); 1056 pmap_remove_pages(vmspace_pmap(vmspace)); 1057 vm_map_remove(map, vm_map_min(map), vm_map_max(map)); 1058 } else { 1059 error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); 1060 if (error) 1061 return (error); 1062 vmspace = p->p_vmspace; 1063 map = &vmspace->vm_map; 1064 } 1065 1066 /* Map a shared page */ 1067 obj = sv->sv_shared_page_obj; 1068 if (obj != NULL) { 1069 vm_object_reference(obj); 1070 error = vm_map_fixed(map, obj, 0, 1071 sv->sv_shared_page_base, sv->sv_shared_page_len, 1072 VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL, 1073 MAP_COPY_ON_WRITE | MAP_ACC_NO_CHARGE); 1074 if (error) { 1075 vm_object_deallocate(obj); 1076 return (error); 1077 } 1078 } 1079 1080 /* Allocate a new stack */ 1081 if (sv->sv_maxssiz != NULL) 1082 ssiz = *sv->sv_maxssiz; 1083 else 1084 ssiz = maxssiz; 1085 stack_addr = sv->sv_usrstack - ssiz; 1086 error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, 1087 obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : 1088 sv->sv_stackprot, 1089 VM_PROT_ALL, MAP_STACK_GROWS_DOWN); 1090 if (error) 1091 return (error); 1092 1093 #ifdef __ia64__ 1094 /* Allocate a new register stack */ 1095 stack_addr = IA64_BACKINGSTORE; 1096 error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, 1097 sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_UP); 1098 if (error) 1099 return (error); 1100 #endif 1101 1102 /* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the 1103 * VM_STACK case, but they are still used to monitor the size of the 1104 * process stack so we can check the stack rlimit. 1105 */ 1106 vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; 1107 vmspace->vm_maxsaddr = (char *)sv->sv_usrstack - ssiz; 1108 1109 return (0); 1110 } 1111 1112 /* 1113 * Copy out argument and environment strings from the old process address 1114 * space into the temporary string buffer. 1115 */ 1116 int 1117 exec_copyin_args(struct image_args *args, char *fname, 1118 enum uio_seg segflg, char **argv, char **envv) 1119 { 1120 char *argp, *envp; 1121 int error; 1122 size_t length; 1123 1124 bzero(args, sizeof(*args)); 1125 if (argv == NULL) 1126 return (EFAULT); 1127 1128 /* 1129 * Allocate demand-paged memory for the file name, argument, and 1130 * environment strings. 1131 */ 1132 error = exec_alloc_args(args); 1133 if (error != 0) 1134 return (error); 1135 1136 /* 1137 * Copy the file name. 1138 */ 1139 if (fname != NULL) { 1140 args->fname = args->buf; 1141 error = (segflg == UIO_SYSSPACE) ? 1142 copystr(fname, args->fname, PATH_MAX, &length) : 1143 copyinstr(fname, args->fname, PATH_MAX, &length); 1144 if (error != 0) 1145 goto err_exit; 1146 } else 1147 length = 0; 1148 1149 args->begin_argv = args->buf + length; 1150 args->endp = args->begin_argv; 1151 args->stringspace = ARG_MAX; 1152 1153 /* 1154 * extract arguments first 1155 */ 1156 while ((argp = (caddr_t) (intptr_t) fuword(argv++))) { 1157 if (argp == (caddr_t) -1) { 1158 error = EFAULT; 1159 goto err_exit; 1160 } 1161 if ((error = copyinstr(argp, args->endp, 1162 args->stringspace, &length))) { 1163 if (error == ENAMETOOLONG) 1164 error = E2BIG; 1165 goto err_exit; 1166 } 1167 args->stringspace -= length; 1168 args->endp += length; 1169 args->argc++; 1170 } 1171 1172 args->begin_envv = args->endp; 1173 1174 /* 1175 * extract environment strings 1176 */ 1177 if (envv) { 1178 while ((envp = (caddr_t)(intptr_t)fuword(envv++))) { 1179 if (envp == (caddr_t)-1) { 1180 error = EFAULT; 1181 goto err_exit; 1182 } 1183 if ((error = copyinstr(envp, args->endp, 1184 args->stringspace, &length))) { 1185 if (error == ENAMETOOLONG) 1186 error = E2BIG; 1187 goto err_exit; 1188 } 1189 args->stringspace -= length; 1190 args->endp += length; 1191 args->envc++; 1192 } 1193 } 1194 1195 return (0); 1196 1197 err_exit: 1198 exec_free_args(args); 1199 return (error); 1200 } 1201 1202 /* 1203 * Allocate temporary demand-paged, zero-filled memory for the file name, 1204 * argument, and environment strings. Returns zero if the allocation succeeds 1205 * and ENOMEM otherwise. 1206 */ 1207 int 1208 exec_alloc_args(struct image_args *args) 1209 { 1210 1211 args->buf = (char *)kmem_alloc_wait(exec_map, PATH_MAX + ARG_MAX); 1212 return (args->buf != NULL ? 0 : ENOMEM); 1213 } 1214 1215 void 1216 exec_free_args(struct image_args *args) 1217 { 1218 1219 if (args->buf != NULL) { 1220 kmem_free_wakeup(exec_map, (vm_offset_t)args->buf, 1221 PATH_MAX + ARG_MAX); 1222 args->buf = NULL; 1223 } 1224 if (args->fname_buf != NULL) { 1225 free(args->fname_buf, M_TEMP); 1226 args->fname_buf = NULL; 1227 } 1228 } 1229 1230 /* 1231 * Copy strings out to the new process address space, constructing new arg 1232 * and env vector tables. Return a pointer to the base so that it can be used 1233 * as the initial stack pointer. 1234 */ 1235 register_t * 1236 exec_copyout_strings(imgp) 1237 struct image_params *imgp; 1238 { 1239 int argc, envc; 1240 char **vectp; 1241 char *stringp, *destp; 1242 register_t *stack_base; 1243 struct ps_strings *arginfo; 1244 struct proc *p; 1245 size_t execpath_len; 1246 int szsigcode, szps; 1247 char canary[sizeof(long) * 8]; 1248 1249 szps = sizeof(pagesizes[0]) * MAXPAGESIZES; 1250 /* 1251 * Calculate string base and vector table pointers. 1252 * Also deal with signal trampoline code for this exec type. 1253 */ 1254 if (imgp->execpath != NULL && imgp->auxargs != NULL) 1255 execpath_len = strlen(imgp->execpath) + 1; 1256 else 1257 execpath_len = 0; 1258 p = imgp->proc; 1259 szsigcode = 0; 1260 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 1261 if (p->p_sysent->sv_sigcode_base == 0) { 1262 if (p->p_sysent->sv_szsigcode != NULL) 1263 szsigcode = *(p->p_sysent->sv_szsigcode); 1264 } 1265 destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE - 1266 roundup(execpath_len, sizeof(char *)) - 1267 roundup(sizeof(canary), sizeof(char *)) - 1268 roundup(szps, sizeof(char *)) - 1269 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 1270 1271 /* 1272 * install sigcode 1273 */ 1274 if (szsigcode != 0) 1275 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo - 1276 szsigcode), szsigcode); 1277 1278 /* 1279 * Copy the image path for the rtld. 1280 */ 1281 if (execpath_len != 0) { 1282 imgp->execpathp = (uintptr_t)arginfo - szsigcode - execpath_len; 1283 copyout(imgp->execpath, (void *)imgp->execpathp, 1284 execpath_len); 1285 } 1286 1287 /* 1288 * Prepare the canary for SSP. 1289 */ 1290 arc4rand(canary, sizeof(canary), 0); 1291 imgp->canary = (uintptr_t)arginfo - szsigcode - execpath_len - 1292 sizeof(canary); 1293 copyout(canary, (void *)imgp->canary, sizeof(canary)); 1294 imgp->canarylen = sizeof(canary); 1295 1296 /* 1297 * Prepare the pagesizes array. 1298 */ 1299 imgp->pagesizes = (uintptr_t)arginfo - szsigcode - execpath_len - 1300 roundup(sizeof(canary), sizeof(char *)) - szps; 1301 copyout(pagesizes, (void *)imgp->pagesizes, szps); 1302 imgp->pagesizeslen = szps; 1303 1304 /* 1305 * If we have a valid auxargs ptr, prepare some room 1306 * on the stack. 1307 */ 1308 if (imgp->auxargs) { 1309 /* 1310 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 1311 * lower compatibility. 1312 */ 1313 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 1314 (AT_COUNT * 2); 1315 /* 1316 * The '+ 2' is for the null pointers at the end of each of 1317 * the arg and env vector sets,and imgp->auxarg_size is room 1318 * for argument of Runtime loader. 1319 */ 1320 vectp = (char **)(destp - (imgp->args->argc + 1321 imgp->args->envc + 2 + imgp->auxarg_size) 1322 * sizeof(char *)); 1323 } else { 1324 /* 1325 * The '+ 2' is for the null pointers at the end of each of 1326 * the arg and env vector sets 1327 */ 1328 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * 1329 sizeof(char *)); 1330 } 1331 1332 /* 1333 * vectp also becomes our initial stack base 1334 */ 1335 stack_base = (register_t *)vectp; 1336 1337 stringp = imgp->args->begin_argv; 1338 argc = imgp->args->argc; 1339 envc = imgp->args->envc; 1340 1341 /* 1342 * Copy out strings - arguments and environment. 1343 */ 1344 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 1345 1346 /* 1347 * Fill in "ps_strings" struct for ps, w, etc. 1348 */ 1349 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); 1350 suword32(&arginfo->ps_nargvstr, argc); 1351 1352 /* 1353 * Fill in argument portion of vector table. 1354 */ 1355 for (; argc > 0; --argc) { 1356 suword(vectp++, (long)(intptr_t)destp); 1357 while (*stringp++ != 0) 1358 destp++; 1359 destp++; 1360 } 1361 1362 /* a null vector table pointer separates the argp's from the envp's */ 1363 suword(vectp++, 0); 1364 1365 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); 1366 suword32(&arginfo->ps_nenvstr, envc); 1367 1368 /* 1369 * Fill in environment portion of vector table. 1370 */ 1371 for (; envc > 0; --envc) { 1372 suword(vectp++, (long)(intptr_t)destp); 1373 while (*stringp++ != 0) 1374 destp++; 1375 destp++; 1376 } 1377 1378 /* end of vector table is a null pointer */ 1379 suword(vectp, 0); 1380 1381 return (stack_base); 1382 } 1383 1384 /* 1385 * Check permissions of file to execute. 1386 * Called with imgp->vp locked. 1387 * Return 0 for success or error code on failure. 1388 */ 1389 int 1390 exec_check_permissions(imgp) 1391 struct image_params *imgp; 1392 { 1393 struct vnode *vp = imgp->vp; 1394 struct vattr *attr = imgp->attr; 1395 struct thread *td; 1396 int error; 1397 1398 td = curthread; 1399 1400 /* Get file attributes */ 1401 error = VOP_GETATTR(vp, attr, td->td_ucred); 1402 if (error) 1403 return (error); 1404 1405 #ifdef MAC 1406 error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp); 1407 if (error) 1408 return (error); 1409 #endif 1410 1411 /* 1412 * 1) Check if file execution is disabled for the filesystem that 1413 * this file resides on. 1414 * 2) Ensure that at least one execute bit is on. Otherwise, a 1415 * privileged user will always succeed, and we don't want this 1416 * to happen unless the file really is executable. 1417 * 3) Ensure that the file is a regular file. 1418 */ 1419 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 1420 (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 || 1421 (attr->va_type != VREG)) 1422 return (EACCES); 1423 1424 /* 1425 * Zero length files can't be exec'd 1426 */ 1427 if (attr->va_size == 0) 1428 return (ENOEXEC); 1429 1430 /* 1431 * Check for execute permission to file based on current credentials. 1432 */ 1433 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 1434 if (error) 1435 return (error); 1436 1437 /* 1438 * Check number of open-for-writes on the file and deny execution 1439 * if there are any. 1440 */ 1441 if (vp->v_writecount) 1442 return (ETXTBSY); 1443 1444 /* 1445 * Call filesystem specific open routine (which does nothing in the 1446 * general case). 1447 */ 1448 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 1449 if (error == 0) 1450 imgp->opened = 1; 1451 return (error); 1452 } 1453 1454 /* 1455 * Exec handler registration 1456 */ 1457 int 1458 exec_register(execsw_arg) 1459 const struct execsw *execsw_arg; 1460 { 1461 const struct execsw **es, **xs, **newexecsw; 1462 int count = 2; /* New slot and trailing NULL */ 1463 1464 if (execsw) 1465 for (es = execsw; *es; es++) 1466 count++; 1467 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); 1468 if (newexecsw == NULL) 1469 return (ENOMEM); 1470 xs = newexecsw; 1471 if (execsw) 1472 for (es = execsw; *es; es++) 1473 *xs++ = *es; 1474 *xs++ = execsw_arg; 1475 *xs = NULL; 1476 if (execsw) 1477 free(execsw, M_TEMP); 1478 execsw = newexecsw; 1479 return (0); 1480 } 1481 1482 int 1483 exec_unregister(execsw_arg) 1484 const struct execsw *execsw_arg; 1485 { 1486 const struct execsw **es, **xs, **newexecsw; 1487 int count = 1; 1488 1489 if (execsw == NULL) 1490 panic("unregister with no handlers left?\n"); 1491 1492 for (es = execsw; *es; es++) { 1493 if (*es == execsw_arg) 1494 break; 1495 } 1496 if (*es == NULL) 1497 return (ENOENT); 1498 for (es = execsw; *es; es++) 1499 if (*es != execsw_arg) 1500 count++; 1501 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); 1502 if (newexecsw == NULL) 1503 return (ENOMEM); 1504 xs = newexecsw; 1505 for (es = execsw; *es; es++) 1506 if (*es != execsw_arg) 1507 *xs++ = *es; 1508 *xs = NULL; 1509 if (execsw) 1510 free(execsw, M_TEMP); 1511 execsw = newexecsw; 1512 return (0); 1513 } 1514 1515 static vm_object_t shared_page_obj; 1516 static int shared_page_free; 1517 1518 int 1519 shared_page_fill(int size, int align, const char *data) 1520 { 1521 vm_page_t m; 1522 struct sf_buf *s; 1523 vm_offset_t sk; 1524 int res; 1525 1526 VM_OBJECT_LOCK(shared_page_obj); 1527 m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_RETRY); 1528 res = roundup(shared_page_free, align); 1529 if (res + size >= IDX_TO_OFF(shared_page_obj->size)) 1530 res = -1; 1531 else { 1532 VM_OBJECT_UNLOCK(shared_page_obj); 1533 s = sf_buf_alloc(m, SFB_DEFAULT); 1534 sk = sf_buf_kva(s); 1535 bcopy(data, (void *)(sk + res), size); 1536 shared_page_free = res + size; 1537 sf_buf_free(s); 1538 VM_OBJECT_LOCK(shared_page_obj); 1539 } 1540 vm_page_wakeup(m); 1541 VM_OBJECT_UNLOCK(shared_page_obj); 1542 return (res); 1543 } 1544 1545 static void 1546 shared_page_init(void *dummy __unused) 1547 { 1548 vm_page_t m; 1549 1550 shared_page_obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE, 1551 VM_PROT_DEFAULT, 0, NULL); 1552 VM_OBJECT_LOCK(shared_page_obj); 1553 m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_RETRY | VM_ALLOC_NOBUSY | 1554 VM_ALLOC_ZERO); 1555 m->valid = VM_PAGE_BITS_ALL; 1556 VM_OBJECT_UNLOCK(shared_page_obj); 1557 } 1558 1559 SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)shared_page_init, 1560 NULL); 1561 1562 void 1563 exec_sysvec_init(void *param) 1564 { 1565 struct sysentvec *sv; 1566 1567 sv = (struct sysentvec *)param; 1568 1569 if ((sv->sv_flags & SV_SHP) == 0) 1570 return; 1571 sv->sv_shared_page_obj = shared_page_obj; 1572 sv->sv_sigcode_base = sv->sv_shared_page_base + 1573 shared_page_fill(*(sv->sv_szsigcode), 16, sv->sv_sigcode); 1574 } 1575