1 /*- 2 * Copyright (c) 1993, David Greenman 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_capsicum.h" 31 #include "opt_hwpmc_hooks.h" 32 #include "opt_ktrace.h" 33 #include "opt_vm.h" 34 35 #include <sys/param.h> 36 #include <sys/capsicum.h> 37 #include <sys/systm.h> 38 #include <sys/eventhandler.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/sysproto.h> 42 #include <sys/signalvar.h> 43 #include <sys/kernel.h> 44 #include <sys/mount.h> 45 #include <sys/filedesc.h> 46 #include <sys/fcntl.h> 47 #include <sys/acct.h> 48 #include <sys/exec.h> 49 #include <sys/imgact.h> 50 #include <sys/imgact_elf.h> 51 #include <sys/wait.h> 52 #include <sys/malloc.h> 53 #include <sys/priv.h> 54 #include <sys/proc.h> 55 #include <sys/pioctl.h> 56 #include <sys/namei.h> 57 #include <sys/resourcevar.h> 58 #include <sys/rwlock.h> 59 #include <sys/sched.h> 60 #include <sys/sdt.h> 61 #include <sys/sf_buf.h> 62 #include <sys/syscallsubr.h> 63 #include <sys/sysent.h> 64 #include <sys/shm.h> 65 #include <sys/sysctl.h> 66 #include <sys/vnode.h> 67 #include <sys/stat.h> 68 #ifdef KTRACE 69 #include <sys/ktrace.h> 70 #endif 71 72 #include <vm/vm.h> 73 #include <vm/vm_param.h> 74 #include <vm/pmap.h> 75 #include <vm/vm_page.h> 76 #include <vm/vm_map.h> 77 #include <vm/vm_kern.h> 78 #include <vm/vm_extern.h> 79 #include <vm/vm_object.h> 80 #include <vm/vm_pager.h> 81 82 #ifdef HWPMC_HOOKS 83 #include <sys/pmckern.h> 84 #endif 85 86 #include <machine/reg.h> 87 88 #include <security/audit/audit.h> 89 #include <security/mac/mac_framework.h> 90 91 #ifdef KDTRACE_HOOKS 92 #include <sys/dtrace_bsd.h> 93 dtrace_execexit_func_t dtrace_fasttrap_exec; 94 #endif 95 96 SDT_PROVIDER_DECLARE(proc); 97 SDT_PROBE_DEFINE1(proc, kernel, , exec, "char *"); 98 SDT_PROBE_DEFINE1(proc, kernel, , exec__failure, "int"); 99 SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *"); 100 101 MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); 102 103 static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS); 104 static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS); 105 static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS); 106 static int do_execve(struct thread *td, struct image_args *args, 107 struct mac *mac_p); 108 109 /* XXX This should be vm_size_t. */ 110 SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD, 111 NULL, 0, sysctl_kern_ps_strings, "LU", ""); 112 113 /* XXX This should be vm_size_t. */ 114 SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD| 115 CTLFLAG_CAPRD, NULL, 0, sysctl_kern_usrstack, "LU", ""); 116 117 SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD, 118 NULL, 0, sysctl_kern_stackprot, "I", ""); 119 120 u_long ps_arg_cache_limit = PAGE_SIZE / 16; 121 SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, 122 &ps_arg_cache_limit, 0, ""); 123 124 static int disallow_high_osrel; 125 SYSCTL_INT(_kern, OID_AUTO, disallow_high_osrel, CTLFLAG_RW, 126 &disallow_high_osrel, 0, 127 "Disallow execution of binaries built for higher version of the world"); 128 129 static int map_at_zero = 0; 130 TUNABLE_INT("security.bsd.map_at_zero", &map_at_zero); 131 SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RW, &map_at_zero, 0, 132 "Permit processes to map an object at virtual address 0."); 133 134 static int 135 sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS) 136 { 137 struct proc *p; 138 int error; 139 140 p = curproc; 141 #ifdef SCTL_MASK32 142 if (req->flags & SCTL_MASK32) { 143 unsigned int val; 144 val = (unsigned int)p->p_sysent->sv_psstrings; 145 error = SYSCTL_OUT(req, &val, sizeof(val)); 146 } else 147 #endif 148 error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings, 149 sizeof(p->p_sysent->sv_psstrings)); 150 return error; 151 } 152 153 static int 154 sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) 155 { 156 struct proc *p; 157 int error; 158 159 p = curproc; 160 #ifdef SCTL_MASK32 161 if (req->flags & SCTL_MASK32) { 162 unsigned int val; 163 val = (unsigned int)p->p_sysent->sv_usrstack; 164 error = SYSCTL_OUT(req, &val, sizeof(val)); 165 } else 166 #endif 167 error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack, 168 sizeof(p->p_sysent->sv_usrstack)); 169 return error; 170 } 171 172 static int 173 sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS) 174 { 175 struct proc *p; 176 177 p = curproc; 178 return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot, 179 sizeof(p->p_sysent->sv_stackprot))); 180 } 181 182 /* 183 * Each of the items is a pointer to a `const struct execsw', hence the 184 * double pointer here. 185 */ 186 static const struct execsw **execsw; 187 188 #ifndef _SYS_SYSPROTO_H_ 189 struct execve_args { 190 char *fname; 191 char **argv; 192 char **envv; 193 }; 194 #endif 195 196 int 197 sys_execve(td, uap) 198 struct thread *td; 199 struct execve_args /* { 200 char *fname; 201 char **argv; 202 char **envv; 203 } */ *uap; 204 { 205 int error; 206 struct image_args args; 207 208 error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, 209 uap->argv, uap->envv); 210 if (error == 0) 211 error = kern_execve(td, &args, NULL); 212 return (error); 213 } 214 215 #ifndef _SYS_SYSPROTO_H_ 216 struct fexecve_args { 217 int fd; 218 char **argv; 219 char **envv; 220 } 221 #endif 222 int 223 sys_fexecve(struct thread *td, struct fexecve_args *uap) 224 { 225 int error; 226 struct image_args args; 227 228 error = exec_copyin_args(&args, NULL, UIO_SYSSPACE, 229 uap->argv, uap->envv); 230 if (error == 0) { 231 args.fd = uap->fd; 232 error = kern_execve(td, &args, NULL); 233 } 234 return (error); 235 } 236 237 #ifndef _SYS_SYSPROTO_H_ 238 struct __mac_execve_args { 239 char *fname; 240 char **argv; 241 char **envv; 242 struct mac *mac_p; 243 }; 244 #endif 245 246 int 247 sys___mac_execve(td, uap) 248 struct thread *td; 249 struct __mac_execve_args /* { 250 char *fname; 251 char **argv; 252 char **envv; 253 struct mac *mac_p; 254 } */ *uap; 255 { 256 #ifdef MAC 257 int error; 258 struct image_args args; 259 260 error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, 261 uap->argv, uap->envv); 262 if (error == 0) 263 error = kern_execve(td, &args, uap->mac_p); 264 return (error); 265 #else 266 return (ENOSYS); 267 #endif 268 } 269 270 /* 271 * XXX: kern_execve has the astonishing property of not always returning to 272 * the caller. If sufficiently bad things happen during the call to 273 * do_execve(), it can end up calling exit1(); as a result, callers must 274 * avoid doing anything which they might need to undo (e.g., allocating 275 * memory). 276 */ 277 int 278 kern_execve(td, args, mac_p) 279 struct thread *td; 280 struct image_args *args; 281 struct mac *mac_p; 282 { 283 struct proc *p = td->td_proc; 284 struct vmspace *oldvmspace; 285 int error; 286 287 AUDIT_ARG_ARGV(args->begin_argv, args->argc, 288 args->begin_envv - args->begin_argv); 289 AUDIT_ARG_ENVV(args->begin_envv, args->envc, 290 args->endp - args->begin_envv); 291 if (p->p_flag & P_HADTHREADS) { 292 PROC_LOCK(p); 293 if (thread_single(SINGLE_BOUNDARY)) { 294 PROC_UNLOCK(p); 295 exec_free_args(args); 296 return (ERESTART); /* Try again later. */ 297 } 298 PROC_UNLOCK(p); 299 } 300 301 KASSERT((td->td_pflags & TDP_EXECVMSPC) == 0, ("nested execve")); 302 oldvmspace = td->td_proc->p_vmspace; 303 error = do_execve(td, args, mac_p); 304 305 if (p->p_flag & P_HADTHREADS) { 306 PROC_LOCK(p); 307 /* 308 * If success, we upgrade to SINGLE_EXIT state to 309 * force other threads to suicide. 310 */ 311 if (error == 0) 312 thread_single(SINGLE_EXIT); 313 else 314 thread_single_end(); 315 PROC_UNLOCK(p); 316 } 317 if ((td->td_pflags & TDP_EXECVMSPC) != 0) { 318 KASSERT(td->td_proc->p_vmspace != oldvmspace, 319 ("oldvmspace still used")); 320 vmspace_free(oldvmspace); 321 td->td_pflags &= ~TDP_EXECVMSPC; 322 } 323 324 return (error); 325 } 326 327 /* 328 * In-kernel implementation of execve(). All arguments are assumed to be 329 * userspace pointers from the passed thread. 330 */ 331 static int 332 do_execve(td, args, mac_p) 333 struct thread *td; 334 struct image_args *args; 335 struct mac *mac_p; 336 { 337 struct proc *p = td->td_proc; 338 struct nameidata nd; 339 struct ucred *newcred = NULL, *oldcred; 340 struct uidinfo *euip; 341 register_t *stack_base; 342 int error, i; 343 struct image_params image_params, *imgp; 344 struct vattr attr; 345 int (*img_first)(struct image_params *); 346 struct pargs *oldargs = NULL, *newargs = NULL; 347 struct sigacts *oldsigacts, *newsigacts; 348 #ifdef KTRACE 349 struct vnode *tracevp = NULL; 350 struct ucred *tracecred = NULL; 351 #endif 352 struct vnode *textvp = NULL, *binvp = NULL; 353 cap_rights_t rights; 354 int credential_changing; 355 int textset; 356 #ifdef MAC 357 struct label *interpvplabel = NULL; 358 int will_transition; 359 #endif 360 #ifdef HWPMC_HOOKS 361 struct pmckern_procexec pe; 362 #endif 363 static const char fexecv_proc_title[] = "(fexecv)"; 364 365 imgp = &image_params; 366 367 /* 368 * Lock the process and set the P_INEXEC flag to indicate that 369 * it should be left alone until we're done here. This is 370 * necessary to avoid race conditions - e.g. in ptrace() - 371 * that might allow a local user to illicitly obtain elevated 372 * privileges. 373 */ 374 PROC_LOCK(p); 375 KASSERT((p->p_flag & P_INEXEC) == 0, 376 ("%s(): process already has P_INEXEC flag", __func__)); 377 p->p_flag |= P_INEXEC; 378 PROC_UNLOCK(p); 379 380 /* 381 * Initialize part of the common data 382 */ 383 imgp->proc = p; 384 imgp->execlabel = NULL; 385 imgp->attr = &attr; 386 imgp->entry_addr = 0; 387 imgp->reloc_base = 0; 388 imgp->vmspace_destroyed = 0; 389 imgp->interpreted = 0; 390 imgp->opened = 0; 391 imgp->interpreter_name = NULL; 392 imgp->auxargs = NULL; 393 imgp->vp = NULL; 394 imgp->object = NULL; 395 imgp->firstpage = NULL; 396 imgp->ps_strings = 0; 397 imgp->auxarg_size = 0; 398 imgp->args = args; 399 imgp->execpath = imgp->freepath = NULL; 400 imgp->execpathp = 0; 401 imgp->canary = 0; 402 imgp->canarylen = 0; 403 imgp->pagesizes = 0; 404 imgp->pagesizeslen = 0; 405 imgp->stack_prot = 0; 406 407 #ifdef MAC 408 error = mac_execve_enter(imgp, mac_p); 409 if (error) 410 goto exec_fail; 411 #endif 412 413 imgp->image_header = NULL; 414 415 /* 416 * Translate the file name. namei() returns a vnode pointer 417 * in ni_vp amoung other things. 418 * 419 * XXXAUDIT: It would be desirable to also audit the name of the 420 * interpreter if this is an interpreted binary. 421 */ 422 if (args->fname != NULL) { 423 NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME 424 | AUDITVNODE1, UIO_SYSSPACE, args->fname, td); 425 } 426 427 SDT_PROBE(proc, kernel, , exec, args->fname, 0, 0, 0, 0 ); 428 429 interpret: 430 if (args->fname != NULL) { 431 #ifdef CAPABILITY_MODE 432 /* 433 * While capability mode can't reach this point via direct 434 * path arguments to execve(), we also don't allow 435 * interpreters to be used in capability mode (for now). 436 * Catch indirect lookups and return a permissions error. 437 */ 438 if (IN_CAPABILITY_MODE(td)) { 439 error = ECAPMODE; 440 goto exec_fail; 441 } 442 #endif 443 error = namei(&nd); 444 if (error) 445 goto exec_fail; 446 447 binvp = nd.ni_vp; 448 imgp->vp = binvp; 449 } else { 450 AUDIT_ARG_FD(args->fd); 451 /* 452 * Descriptors opened only with O_EXEC or O_RDONLY are allowed. 453 */ 454 error = fgetvp_exec(td, args->fd, 455 cap_rights_init(&rights, CAP_FEXECVE), &binvp); 456 if (error) 457 goto exec_fail; 458 vn_lock(binvp, LK_EXCLUSIVE | LK_RETRY); 459 AUDIT_ARG_VNODE1(binvp); 460 imgp->vp = binvp; 461 } 462 463 /* 464 * Check file permissions (also 'opens' file) 465 */ 466 error = exec_check_permissions(imgp); 467 if (error) 468 goto exec_fail_dealloc; 469 470 imgp->object = imgp->vp->v_object; 471 if (imgp->object != NULL) 472 vm_object_reference(imgp->object); 473 474 /* 475 * Set VV_TEXT now so no one can write to the executable while we're 476 * activating it. 477 * 478 * Remember if this was set before and unset it in case this is not 479 * actually an executable image. 480 */ 481 textset = VOP_IS_TEXT(imgp->vp); 482 VOP_SET_TEXT(imgp->vp); 483 484 error = exec_map_first_page(imgp); 485 if (error) 486 goto exec_fail_dealloc; 487 488 imgp->proc->p_osrel = 0; 489 /* 490 * If the current process has a special image activator it 491 * wants to try first, call it. For example, emulating shell 492 * scripts differently. 493 */ 494 error = -1; 495 if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) 496 error = img_first(imgp); 497 498 /* 499 * Loop through the list of image activators, calling each one. 500 * An activator returns -1 if there is no match, 0 on success, 501 * and an error otherwise. 502 */ 503 for (i = 0; error == -1 && execsw[i]; ++i) { 504 if (execsw[i]->ex_imgact == NULL || 505 execsw[i]->ex_imgact == img_first) { 506 continue; 507 } 508 error = (*execsw[i]->ex_imgact)(imgp); 509 } 510 511 if (error) { 512 if (error == -1) { 513 if (textset == 0) 514 VOP_UNSET_TEXT(imgp->vp); 515 error = ENOEXEC; 516 } 517 goto exec_fail_dealloc; 518 } 519 520 /* 521 * Special interpreter operation, cleanup and loop up to try to 522 * activate the interpreter. 523 */ 524 if (imgp->interpreted) { 525 exec_unmap_first_page(imgp); 526 /* 527 * VV_TEXT needs to be unset for scripts. There is a short 528 * period before we determine that something is a script where 529 * VV_TEXT will be set. The vnode lock is held over this 530 * entire period so nothing should illegitimately be blocked. 531 */ 532 VOP_UNSET_TEXT(imgp->vp); 533 /* free name buffer and old vnode */ 534 if (args->fname != NULL) 535 NDFREE(&nd, NDF_ONLY_PNBUF); 536 #ifdef MAC 537 mac_execve_interpreter_enter(binvp, &interpvplabel); 538 #endif 539 if (imgp->opened) { 540 VOP_CLOSE(binvp, FREAD, td->td_ucred, td); 541 imgp->opened = 0; 542 } 543 vput(binvp); 544 vm_object_deallocate(imgp->object); 545 imgp->object = NULL; 546 /* set new name to that of the interpreter */ 547 NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, 548 UIO_SYSSPACE, imgp->interpreter_name, td); 549 args->fname = imgp->interpreter_name; 550 goto interpret; 551 } 552 553 /* 554 * NB: We unlock the vnode here because it is believed that none 555 * of the sv_copyout_strings/sv_fixup operations require the vnode. 556 */ 557 VOP_UNLOCK(imgp->vp, 0); 558 559 /* 560 * Do the best to calculate the full path to the image file. 561 */ 562 if (imgp->auxargs != NULL && 563 ((args->fname != NULL && args->fname[0] == '/') || 564 vn_fullpath(td, imgp->vp, &imgp->execpath, &imgp->freepath) != 0)) 565 imgp->execpath = args->fname; 566 567 if (disallow_high_osrel && 568 P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) { 569 error = ENOEXEC; 570 uprintf("Osrel %d for image %s too high\n", p->p_osrel, 571 imgp->execpath != NULL ? imgp->execpath : "<unresolved>"); 572 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 573 goto exec_fail_dealloc; 574 } 575 576 /* 577 * Copy out strings (args and env) and initialize stack base 578 */ 579 if (p->p_sysent->sv_copyout_strings) 580 stack_base = (*p->p_sysent->sv_copyout_strings)(imgp); 581 else 582 stack_base = exec_copyout_strings(imgp); 583 584 /* 585 * If custom stack fixup routine present for this process 586 * let it do the stack setup. 587 * Else stuff argument count as first item on stack 588 */ 589 if (p->p_sysent->sv_fixup != NULL) 590 (*p->p_sysent->sv_fixup)(&stack_base, imgp); 591 else 592 suword(--stack_base, imgp->args->argc); 593 594 /* 595 * For security and other reasons, the file descriptor table cannot 596 * be shared after an exec. 597 */ 598 fdunshare(p, td); 599 600 /* 601 * Malloc things before we need locks. 602 */ 603 newcred = crget(); 604 euip = uifind(attr.va_uid); 605 i = imgp->args->begin_envv - imgp->args->begin_argv; 606 /* Cache arguments if they fit inside our allowance */ 607 if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { 608 newargs = pargs_alloc(i); 609 bcopy(imgp->args->begin_argv, newargs->ar_args, i); 610 } 611 612 /* close files on exec */ 613 fdcloseexec(td); 614 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 615 616 /* Get a reference to the vnode prior to locking the proc */ 617 VREF(binvp); 618 619 /* 620 * For security and other reasons, signal handlers cannot 621 * be shared after an exec. The new process gets a copy of the old 622 * handlers. In execsigs(), the new process will have its signals 623 * reset. 624 */ 625 PROC_LOCK(p); 626 oldcred = crcopysafe(p, newcred); 627 if (sigacts_shared(p->p_sigacts)) { 628 oldsigacts = p->p_sigacts; 629 PROC_UNLOCK(p); 630 newsigacts = sigacts_alloc(); 631 sigacts_copy(newsigacts, oldsigacts); 632 PROC_LOCK(p); 633 p->p_sigacts = newsigacts; 634 } else 635 oldsigacts = NULL; 636 637 /* Stop profiling */ 638 stopprofclock(p); 639 640 /* reset caught signals */ 641 execsigs(p); 642 643 /* name this process - nameiexec(p, ndp) */ 644 bzero(p->p_comm, sizeof(p->p_comm)); 645 if (args->fname) 646 bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, 647 min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); 648 else if (vn_commname(binvp, p->p_comm, sizeof(p->p_comm)) != 0) 649 bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); 650 bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); 651 #ifdef KTR 652 sched_clear_tdname(td); 653 #endif 654 655 /* 656 * mark as execed, wakeup the process that vforked (if any) and tell 657 * it that it now has its own resources back 658 */ 659 p->p_flag |= P_EXEC; 660 if (p->p_pptr && (p->p_flag & P_PPWAIT)) { 661 p->p_flag &= ~(P_PPWAIT | P_PPTRACE); 662 cv_broadcast(&p->p_pwait); 663 } 664 665 /* 666 * Implement image setuid/setgid. 667 * 668 * Don't honor setuid/setgid if the filesystem prohibits it or if 669 * the process is being traced. 670 * 671 * We disable setuid/setgid/etc in compatibility mode on the basis 672 * that most setugid applications are not written with that 673 * environment in mind, and will therefore almost certainly operate 674 * incorrectly. In principle there's no reason that setugid 675 * applications might not be useful in capability mode, so we may want 676 * to reconsider this conservative design choice in the future. 677 * 678 * XXXMAC: For the time being, use NOSUID to also prohibit 679 * transitions on the file system. 680 */ 681 credential_changing = 0; 682 credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != 683 attr.va_uid; 684 credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != 685 attr.va_gid; 686 #ifdef MAC 687 will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, 688 interpvplabel, imgp); 689 credential_changing |= will_transition; 690 #endif 691 692 if (credential_changing && 693 #ifdef CAPABILITY_MODE 694 ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && 695 #endif 696 (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && 697 (p->p_flag & P_TRACED) == 0) { 698 /* 699 * Turn off syscall tracing for set-id programs, except for 700 * root. Record any set-id flags first to make sure that 701 * we do not regain any tracing during a possible block. 702 */ 703 setsugid(p); 704 705 #ifdef KTRACE 706 if (p->p_tracecred != NULL && 707 priv_check_cred(p->p_tracecred, PRIV_DEBUG_DIFFCRED, 0)) 708 ktrprocexec(p, &tracecred, &tracevp); 709 #endif 710 /* 711 * Close any file descriptors 0..2 that reference procfs, 712 * then make sure file descriptors 0..2 are in use. 713 * 714 * setugidsafety() may call closef() and then pfind() 715 * which may grab the process lock. 716 * fdcheckstd() may call falloc() which may block to 717 * allocate memory, so temporarily drop the process lock. 718 */ 719 PROC_UNLOCK(p); 720 VOP_UNLOCK(imgp->vp, 0); 721 setugidsafety(td); 722 error = fdcheckstd(td); 723 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 724 if (error != 0) 725 goto done1; 726 PROC_LOCK(p); 727 /* 728 * Set the new credentials. 729 */ 730 if (attr.va_mode & S_ISUID) 731 change_euid(newcred, euip); 732 if (attr.va_mode & S_ISGID) 733 change_egid(newcred, attr.va_gid); 734 #ifdef MAC 735 if (will_transition) { 736 mac_vnode_execve_transition(oldcred, newcred, imgp->vp, 737 interpvplabel, imgp); 738 } 739 #endif 740 /* 741 * Implement correct POSIX saved-id behavior. 742 * 743 * XXXMAC: Note that the current logic will save the 744 * uid and gid if a MAC domain transition occurs, even 745 * though maybe it shouldn't. 746 */ 747 change_svuid(newcred, newcred->cr_uid); 748 change_svgid(newcred, newcred->cr_gid); 749 p->p_ucred = newcred; 750 newcred = NULL; 751 } else { 752 if (oldcred->cr_uid == oldcred->cr_ruid && 753 oldcred->cr_gid == oldcred->cr_rgid) 754 p->p_flag &= ~P_SUGID; 755 /* 756 * Implement correct POSIX saved-id behavior. 757 * 758 * XXX: It's not clear that the existing behavior is 759 * POSIX-compliant. A number of sources indicate that the 760 * saved uid/gid should only be updated if the new ruid is 761 * not equal to the old ruid, or the new euid is not equal 762 * to the old euid and the new euid is not equal to the old 763 * ruid. The FreeBSD code always updates the saved uid/gid. 764 * Also, this code uses the new (replaced) euid and egid as 765 * the source, which may or may not be the right ones to use. 766 */ 767 if (oldcred->cr_svuid != oldcred->cr_uid || 768 oldcred->cr_svgid != oldcred->cr_gid) { 769 change_svuid(newcred, newcred->cr_uid); 770 change_svgid(newcred, newcred->cr_gid); 771 p->p_ucred = newcred; 772 newcred = NULL; 773 } 774 } 775 776 /* 777 * Store the vp for use in procfs. This vnode was referenced prior 778 * to locking the proc lock. 779 */ 780 textvp = p->p_textvp; 781 p->p_textvp = binvp; 782 783 #ifdef KDTRACE_HOOKS 784 /* 785 * Tell the DTrace fasttrap provider about the exec if it 786 * has declared an interest. 787 */ 788 if (dtrace_fasttrap_exec) 789 dtrace_fasttrap_exec(p); 790 #endif 791 792 /* 793 * Notify others that we exec'd, and clear the P_INEXEC flag 794 * as we're now a bona fide freshly-execed process. 795 */ 796 KNOTE_LOCKED(&p->p_klist, NOTE_EXEC); 797 p->p_flag &= ~P_INEXEC; 798 799 /* clear "fork but no exec" flag, as we _are_ execing */ 800 p->p_acflag &= ~AFORK; 801 802 /* 803 * Free any previous argument cache and replace it with 804 * the new argument cache, if any. 805 */ 806 oldargs = p->p_args; 807 p->p_args = newargs; 808 newargs = NULL; 809 810 #ifdef HWPMC_HOOKS 811 /* 812 * Check if system-wide sampling is in effect or if the 813 * current process is using PMCs. If so, do exec() time 814 * processing. This processing needs to happen AFTER the 815 * P_INEXEC flag is cleared. 816 * 817 * The proc lock needs to be released before taking the PMC 818 * SX. 819 */ 820 if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) { 821 PROC_UNLOCK(p); 822 VOP_UNLOCK(imgp->vp, 0); 823 pe.pm_credentialschanged = credential_changing; 824 pe.pm_entryaddr = imgp->entry_addr; 825 826 PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe); 827 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 828 } else 829 PROC_UNLOCK(p); 830 #else /* !HWPMC_HOOKS */ 831 PROC_UNLOCK(p); 832 #endif 833 834 /* Set values passed into the program in registers. */ 835 if (p->p_sysent->sv_setregs) 836 (*p->p_sysent->sv_setregs)(td, imgp, 837 (u_long)(uintptr_t)stack_base); 838 else 839 exec_setregs(td, imgp, (u_long)(uintptr_t)stack_base); 840 841 vfs_mark_atime(imgp->vp, td->td_ucred); 842 843 SDT_PROBE(proc, kernel, , exec__success, args->fname, 0, 0, 0, 0); 844 845 done1: 846 /* 847 * Free any resources malloc'd earlier that we didn't use. 848 */ 849 uifree(euip); 850 if (newcred == NULL) 851 crfree(oldcred); 852 else 853 crfree(newcred); 854 VOP_UNLOCK(imgp->vp, 0); 855 856 /* 857 * Handle deferred decrement of ref counts. 858 */ 859 if (textvp != NULL) 860 vrele(textvp); 861 if (binvp && error != 0) 862 vrele(binvp); 863 #ifdef KTRACE 864 if (tracevp != NULL) 865 vrele(tracevp); 866 if (tracecred != NULL) 867 crfree(tracecred); 868 #endif 869 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 870 pargs_drop(oldargs); 871 pargs_drop(newargs); 872 if (oldsigacts != NULL) 873 sigacts_free(oldsigacts); 874 875 exec_fail_dealloc: 876 877 /* 878 * free various allocated resources 879 */ 880 if (imgp->firstpage != NULL) 881 exec_unmap_first_page(imgp); 882 883 if (imgp->vp != NULL) { 884 if (args->fname) 885 NDFREE(&nd, NDF_ONLY_PNBUF); 886 if (imgp->opened) 887 VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td); 888 vput(imgp->vp); 889 } 890 891 if (imgp->object != NULL) 892 vm_object_deallocate(imgp->object); 893 894 free(imgp->freepath, M_TEMP); 895 896 if (error == 0) { 897 PROC_LOCK(p); 898 td->td_dbgflags |= TDB_EXEC; 899 PROC_UNLOCK(p); 900 901 /* 902 * Stop the process here if its stop event mask has 903 * the S_EXEC bit set. 904 */ 905 STOPEVENT(p, S_EXEC, 0); 906 goto done2; 907 } 908 909 exec_fail: 910 /* we're done here, clear P_INEXEC */ 911 PROC_LOCK(p); 912 p->p_flag &= ~P_INEXEC; 913 PROC_UNLOCK(p); 914 915 SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0); 916 917 done2: 918 #ifdef MAC 919 mac_execve_exit(imgp); 920 mac_execve_interpreter_exit(interpvplabel); 921 #endif 922 exec_free_args(args); 923 924 if (error && imgp->vmspace_destroyed) { 925 /* sorry, no more process anymore. exit gracefully */ 926 exit1(td, W_EXITCODE(0, SIGABRT)); 927 /* NOT REACHED */ 928 } 929 930 #ifdef KTRACE 931 if (error == 0) 932 ktrprocctor(p); 933 #endif 934 935 return (error); 936 } 937 938 int 939 exec_map_first_page(imgp) 940 struct image_params *imgp; 941 { 942 int rv, i; 943 int initial_pagein; 944 vm_page_t ma[VM_INITIAL_PAGEIN]; 945 vm_object_t object; 946 947 if (imgp->firstpage != NULL) 948 exec_unmap_first_page(imgp); 949 950 object = imgp->vp->v_object; 951 if (object == NULL) 952 return (EACCES); 953 VM_OBJECT_WLOCK(object); 954 #if VM_NRESERVLEVEL > 0 955 if ((object->flags & OBJ_COLORED) == 0) { 956 object->flags |= OBJ_COLORED; 957 object->pg_color = 0; 958 } 959 #endif 960 ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL); 961 if (ma[0]->valid != VM_PAGE_BITS_ALL) { 962 initial_pagein = VM_INITIAL_PAGEIN; 963 if (initial_pagein > object->size) 964 initial_pagein = object->size; 965 for (i = 1; i < initial_pagein; i++) { 966 if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { 967 if (ma[i]->valid) 968 break; 969 if (vm_page_tryxbusy(ma[i])) 970 break; 971 } else { 972 ma[i] = vm_page_alloc(object, i, 973 VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED); 974 if (ma[i] == NULL) 975 break; 976 } 977 } 978 initial_pagein = i; 979 rv = vm_pager_get_pages(object, ma, initial_pagein, 0); 980 ma[0] = vm_page_lookup(object, 0); 981 if ((rv != VM_PAGER_OK) || (ma[0] == NULL)) { 982 if (ma[0] != NULL) { 983 vm_page_lock(ma[0]); 984 vm_page_free(ma[0]); 985 vm_page_unlock(ma[0]); 986 } 987 VM_OBJECT_WUNLOCK(object); 988 return (EIO); 989 } 990 } 991 vm_page_xunbusy(ma[0]); 992 vm_page_lock(ma[0]); 993 vm_page_hold(ma[0]); 994 vm_page_unlock(ma[0]); 995 VM_OBJECT_WUNLOCK(object); 996 997 imgp->firstpage = sf_buf_alloc(ma[0], 0); 998 imgp->image_header = (char *)sf_buf_kva(imgp->firstpage); 999 1000 return (0); 1001 } 1002 1003 void 1004 exec_unmap_first_page(imgp) 1005 struct image_params *imgp; 1006 { 1007 vm_page_t m; 1008 1009 if (imgp->firstpage != NULL) { 1010 m = sf_buf_page(imgp->firstpage); 1011 sf_buf_free(imgp->firstpage); 1012 imgp->firstpage = NULL; 1013 vm_page_lock(m); 1014 vm_page_unhold(m); 1015 vm_page_unlock(m); 1016 } 1017 } 1018 1019 /* 1020 * Destroy old address space, and allocate a new stack 1021 * The new stack is only SGROWSIZ large because it is grown 1022 * automatically in trap.c. 1023 */ 1024 int 1025 exec_new_vmspace(imgp, sv) 1026 struct image_params *imgp; 1027 struct sysentvec *sv; 1028 { 1029 int error; 1030 struct proc *p = imgp->proc; 1031 struct vmspace *vmspace = p->p_vmspace; 1032 vm_object_t obj; 1033 vm_offset_t sv_minuser, stack_addr; 1034 vm_map_t map; 1035 u_long ssiz; 1036 1037 imgp->vmspace_destroyed = 1; 1038 imgp->sysent = sv; 1039 1040 /* May be called with Giant held */ 1041 EVENTHANDLER_INVOKE(process_exec, p, imgp); 1042 1043 /* 1044 * Blow away entire process VM, if address space not shared, 1045 * otherwise, create a new VM space so that other threads are 1046 * not disrupted 1047 */ 1048 map = &vmspace->vm_map; 1049 if (map_at_zero) 1050 sv_minuser = sv->sv_minuser; 1051 else 1052 sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE); 1053 if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser && 1054 vm_map_max(map) == sv->sv_maxuser) { 1055 shmexit(vmspace); 1056 pmap_remove_pages(vmspace_pmap(vmspace)); 1057 vm_map_remove(map, vm_map_min(map), vm_map_max(map)); 1058 } else { 1059 error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); 1060 if (error) 1061 return (error); 1062 vmspace = p->p_vmspace; 1063 map = &vmspace->vm_map; 1064 } 1065 1066 /* Map a shared page */ 1067 obj = sv->sv_shared_page_obj; 1068 if (obj != NULL) { 1069 vm_object_reference(obj); 1070 error = vm_map_fixed(map, obj, 0, 1071 sv->sv_shared_page_base, sv->sv_shared_page_len, 1072 VM_PROT_READ | VM_PROT_EXECUTE, 1073 VM_PROT_READ | VM_PROT_EXECUTE, 1074 MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE); 1075 if (error) { 1076 vm_object_deallocate(obj); 1077 return (error); 1078 } 1079 } 1080 1081 /* Allocate a new stack */ 1082 if (sv->sv_maxssiz != NULL) 1083 ssiz = *sv->sv_maxssiz; 1084 else 1085 ssiz = maxssiz; 1086 stack_addr = sv->sv_usrstack - ssiz; 1087 error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, 1088 obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : 1089 sv->sv_stackprot, 1090 VM_PROT_ALL, MAP_STACK_GROWS_DOWN); 1091 if (error) 1092 return (error); 1093 1094 #ifdef __ia64__ 1095 /* Allocate a new register stack */ 1096 stack_addr = IA64_BACKINGSTORE; 1097 error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, 1098 sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_UP); 1099 if (error) 1100 return (error); 1101 #endif 1102 1103 /* 1104 * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they 1105 * are still used to enforce the stack rlimit on the process stack. 1106 */ 1107 vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; 1108 vmspace->vm_maxsaddr = (char *)sv->sv_usrstack - ssiz; 1109 1110 return (0); 1111 } 1112 1113 /* 1114 * Copy out argument and environment strings from the old process address 1115 * space into the temporary string buffer. 1116 */ 1117 int 1118 exec_copyin_args(struct image_args *args, char *fname, 1119 enum uio_seg segflg, char **argv, char **envv) 1120 { 1121 char *argp, *envp; 1122 int error; 1123 size_t length; 1124 1125 bzero(args, sizeof(*args)); 1126 if (argv == NULL) 1127 return (EFAULT); 1128 1129 /* 1130 * Allocate demand-paged memory for the file name, argument, and 1131 * environment strings. 1132 */ 1133 error = exec_alloc_args(args); 1134 if (error != 0) 1135 return (error); 1136 1137 /* 1138 * Copy the file name. 1139 */ 1140 if (fname != NULL) { 1141 args->fname = args->buf; 1142 error = (segflg == UIO_SYSSPACE) ? 1143 copystr(fname, args->fname, PATH_MAX, &length) : 1144 copyinstr(fname, args->fname, PATH_MAX, &length); 1145 if (error != 0) 1146 goto err_exit; 1147 } else 1148 length = 0; 1149 1150 args->begin_argv = args->buf + length; 1151 args->endp = args->begin_argv; 1152 args->stringspace = ARG_MAX; 1153 1154 /* 1155 * extract arguments first 1156 */ 1157 while ((argp = (caddr_t) (intptr_t) fuword(argv++))) { 1158 if (argp == (caddr_t) -1) { 1159 error = EFAULT; 1160 goto err_exit; 1161 } 1162 if ((error = copyinstr(argp, args->endp, 1163 args->stringspace, &length))) { 1164 if (error == ENAMETOOLONG) 1165 error = E2BIG; 1166 goto err_exit; 1167 } 1168 args->stringspace -= length; 1169 args->endp += length; 1170 args->argc++; 1171 } 1172 1173 args->begin_envv = args->endp; 1174 1175 /* 1176 * extract environment strings 1177 */ 1178 if (envv) { 1179 while ((envp = (caddr_t)(intptr_t)fuword(envv++))) { 1180 if (envp == (caddr_t)-1) { 1181 error = EFAULT; 1182 goto err_exit; 1183 } 1184 if ((error = copyinstr(envp, args->endp, 1185 args->stringspace, &length))) { 1186 if (error == ENAMETOOLONG) 1187 error = E2BIG; 1188 goto err_exit; 1189 } 1190 args->stringspace -= length; 1191 args->endp += length; 1192 args->envc++; 1193 } 1194 } 1195 1196 return (0); 1197 1198 err_exit: 1199 exec_free_args(args); 1200 return (error); 1201 } 1202 1203 /* 1204 * Allocate temporary demand-paged, zero-filled memory for the file name, 1205 * argument, and environment strings. Returns zero if the allocation succeeds 1206 * and ENOMEM otherwise. 1207 */ 1208 int 1209 exec_alloc_args(struct image_args *args) 1210 { 1211 1212 args->buf = (char *)kmap_alloc_wait(exec_map, PATH_MAX + ARG_MAX); 1213 return (args->buf != NULL ? 0 : ENOMEM); 1214 } 1215 1216 void 1217 exec_free_args(struct image_args *args) 1218 { 1219 1220 if (args->buf != NULL) { 1221 kmap_free_wakeup(exec_map, (vm_offset_t)args->buf, 1222 PATH_MAX + ARG_MAX); 1223 args->buf = NULL; 1224 } 1225 if (args->fname_buf != NULL) { 1226 free(args->fname_buf, M_TEMP); 1227 args->fname_buf = NULL; 1228 } 1229 } 1230 1231 /* 1232 * Copy strings out to the new process address space, constructing new arg 1233 * and env vector tables. Return a pointer to the base so that it can be used 1234 * as the initial stack pointer. 1235 */ 1236 register_t * 1237 exec_copyout_strings(imgp) 1238 struct image_params *imgp; 1239 { 1240 int argc, envc; 1241 char **vectp; 1242 char *stringp; 1243 uintptr_t destp; 1244 register_t *stack_base; 1245 struct ps_strings *arginfo; 1246 struct proc *p; 1247 size_t execpath_len; 1248 int szsigcode, szps; 1249 char canary[sizeof(long) * 8]; 1250 1251 szps = sizeof(pagesizes[0]) * MAXPAGESIZES; 1252 /* 1253 * Calculate string base and vector table pointers. 1254 * Also deal with signal trampoline code for this exec type. 1255 */ 1256 if (imgp->execpath != NULL && imgp->auxargs != NULL) 1257 execpath_len = strlen(imgp->execpath) + 1; 1258 else 1259 execpath_len = 0; 1260 p = imgp->proc; 1261 szsigcode = 0; 1262 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 1263 if (p->p_sysent->sv_sigcode_base == 0) { 1264 if (p->p_sysent->sv_szsigcode != NULL) 1265 szsigcode = *(p->p_sysent->sv_szsigcode); 1266 } 1267 destp = (uintptr_t)arginfo; 1268 1269 /* 1270 * install sigcode 1271 */ 1272 if (szsigcode != 0) { 1273 destp -= szsigcode; 1274 destp = rounddown2(destp, sizeof(void *)); 1275 copyout(p->p_sysent->sv_sigcode, (void *)destp, szsigcode); 1276 } 1277 1278 /* 1279 * Copy the image path for the rtld. 1280 */ 1281 if (execpath_len != 0) { 1282 destp -= execpath_len; 1283 imgp->execpathp = destp; 1284 copyout(imgp->execpath, (void *)destp, execpath_len); 1285 } 1286 1287 /* 1288 * Prepare the canary for SSP. 1289 */ 1290 arc4rand(canary, sizeof(canary), 0); 1291 destp -= sizeof(canary); 1292 imgp->canary = destp; 1293 copyout(canary, (void *)destp, sizeof(canary)); 1294 imgp->canarylen = sizeof(canary); 1295 1296 /* 1297 * Prepare the pagesizes array. 1298 */ 1299 destp -= szps; 1300 destp = rounddown2(destp, sizeof(void *)); 1301 imgp->pagesizes = destp; 1302 copyout(pagesizes, (void *)destp, szps); 1303 imgp->pagesizeslen = szps; 1304 1305 destp -= ARG_MAX - imgp->args->stringspace; 1306 destp = rounddown2(destp, sizeof(void *)); 1307 1308 /* 1309 * If we have a valid auxargs ptr, prepare some room 1310 * on the stack. 1311 */ 1312 if (imgp->auxargs) { 1313 /* 1314 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 1315 * lower compatibility. 1316 */ 1317 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 1318 (AT_COUNT * 2); 1319 /* 1320 * The '+ 2' is for the null pointers at the end of each of 1321 * the arg and env vector sets,and imgp->auxarg_size is room 1322 * for argument of Runtime loader. 1323 */ 1324 vectp = (char **)(destp - (imgp->args->argc + 1325 imgp->args->envc + 2 + imgp->auxarg_size) 1326 * sizeof(char *)); 1327 } else { 1328 /* 1329 * The '+ 2' is for the null pointers at the end of each of 1330 * the arg and env vector sets 1331 */ 1332 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc 1333 + 2) * sizeof(char *)); 1334 } 1335 1336 /* 1337 * vectp also becomes our initial stack base 1338 */ 1339 stack_base = (register_t *)vectp; 1340 1341 stringp = imgp->args->begin_argv; 1342 argc = imgp->args->argc; 1343 envc = imgp->args->envc; 1344 1345 /* 1346 * Copy out strings - arguments and environment. 1347 */ 1348 copyout(stringp, (void *)destp, ARG_MAX - imgp->args->stringspace); 1349 1350 /* 1351 * Fill in "ps_strings" struct for ps, w, etc. 1352 */ 1353 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); 1354 suword32(&arginfo->ps_nargvstr, argc); 1355 1356 /* 1357 * Fill in argument portion of vector table. 1358 */ 1359 for (; argc > 0; --argc) { 1360 suword(vectp++, (long)(intptr_t)destp); 1361 while (*stringp++ != 0) 1362 destp++; 1363 destp++; 1364 } 1365 1366 /* a null vector table pointer separates the argp's from the envp's */ 1367 suword(vectp++, 0); 1368 1369 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); 1370 suword32(&arginfo->ps_nenvstr, envc); 1371 1372 /* 1373 * Fill in environment portion of vector table. 1374 */ 1375 for (; envc > 0; --envc) { 1376 suword(vectp++, (long)(intptr_t)destp); 1377 while (*stringp++ != 0) 1378 destp++; 1379 destp++; 1380 } 1381 1382 /* end of vector table is a null pointer */ 1383 suword(vectp, 0); 1384 1385 return (stack_base); 1386 } 1387 1388 /* 1389 * Check permissions of file to execute. 1390 * Called with imgp->vp locked. 1391 * Return 0 for success or error code on failure. 1392 */ 1393 int 1394 exec_check_permissions(imgp) 1395 struct image_params *imgp; 1396 { 1397 struct vnode *vp = imgp->vp; 1398 struct vattr *attr = imgp->attr; 1399 struct thread *td; 1400 int error, writecount; 1401 1402 td = curthread; 1403 1404 /* Get file attributes */ 1405 error = VOP_GETATTR(vp, attr, td->td_ucred); 1406 if (error) 1407 return (error); 1408 1409 #ifdef MAC 1410 error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp); 1411 if (error) 1412 return (error); 1413 #endif 1414 1415 /* 1416 * 1) Check if file execution is disabled for the filesystem that 1417 * this file resides on. 1418 * 2) Ensure that at least one execute bit is on. Otherwise, a 1419 * privileged user will always succeed, and we don't want this 1420 * to happen unless the file really is executable. 1421 * 3) Ensure that the file is a regular file. 1422 */ 1423 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 1424 (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 || 1425 (attr->va_type != VREG)) 1426 return (EACCES); 1427 1428 /* 1429 * Zero length files can't be exec'd 1430 */ 1431 if (attr->va_size == 0) 1432 return (ENOEXEC); 1433 1434 /* 1435 * Check for execute permission to file based on current credentials. 1436 */ 1437 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 1438 if (error) 1439 return (error); 1440 1441 /* 1442 * Check number of open-for-writes on the file and deny execution 1443 * if there are any. 1444 */ 1445 error = VOP_GET_WRITECOUNT(vp, &writecount); 1446 if (error != 0) 1447 return (error); 1448 if (writecount != 0) 1449 return (ETXTBSY); 1450 1451 /* 1452 * Call filesystem specific open routine (which does nothing in the 1453 * general case). 1454 */ 1455 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 1456 if (error == 0) 1457 imgp->opened = 1; 1458 return (error); 1459 } 1460 1461 /* 1462 * Exec handler registration 1463 */ 1464 int 1465 exec_register(execsw_arg) 1466 const struct execsw *execsw_arg; 1467 { 1468 const struct execsw **es, **xs, **newexecsw; 1469 int count = 2; /* New slot and trailing NULL */ 1470 1471 if (execsw) 1472 for (es = execsw; *es; es++) 1473 count++; 1474 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); 1475 if (newexecsw == NULL) 1476 return (ENOMEM); 1477 xs = newexecsw; 1478 if (execsw) 1479 for (es = execsw; *es; es++) 1480 *xs++ = *es; 1481 *xs++ = execsw_arg; 1482 *xs = NULL; 1483 if (execsw) 1484 free(execsw, M_TEMP); 1485 execsw = newexecsw; 1486 return (0); 1487 } 1488 1489 int 1490 exec_unregister(execsw_arg) 1491 const struct execsw *execsw_arg; 1492 { 1493 const struct execsw **es, **xs, **newexecsw; 1494 int count = 1; 1495 1496 if (execsw == NULL) 1497 panic("unregister with no handlers left?\n"); 1498 1499 for (es = execsw; *es; es++) { 1500 if (*es == execsw_arg) 1501 break; 1502 } 1503 if (*es == NULL) 1504 return (ENOENT); 1505 for (es = execsw; *es; es++) 1506 if (*es != execsw_arg) 1507 count++; 1508 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); 1509 if (newexecsw == NULL) 1510 return (ENOMEM); 1511 xs = newexecsw; 1512 for (es = execsw; *es; es++) 1513 if (*es != execsw_arg) 1514 *xs++ = *es; 1515 *xs = NULL; 1516 if (execsw) 1517 free(execsw, M_TEMP); 1518 execsw = newexecsw; 1519 return (0); 1520 } 1521