1 /* 2 * Copyright (c) 1993, David Greenman 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include "opt_ktrace.h" 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/sysproto.h> 36 #include <sys/signalvar.h> 37 #include <sys/kernel.h> 38 #include <sys/mount.h> 39 #include <sys/filedesc.h> 40 #include <sys/fcntl.h> 41 #include <sys/acct.h> 42 #include <sys/exec.h> 43 #include <sys/imgact.h> 44 #include <sys/imgact_elf.h> 45 #include <sys/wait.h> 46 #include <sys/malloc.h> 47 #include <sys/proc.h> 48 #include <sys/pioctl.h> 49 #include <sys/namei.h> 50 #include <sys/sysent.h> 51 #include <sys/shm.h> 52 #include <sys/sysctl.h> 53 #include <sys/user.h> 54 #include <sys/vnode.h> 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 59 #include <vm/vm.h> 60 #include <vm/vm_param.h> 61 #include <vm/pmap.h> 62 #include <vm/vm_page.h> 63 #include <vm/vm_map.h> 64 #include <vm/vm_kern.h> 65 #include <vm/vm_extern.h> 66 #include <vm/vm_object.h> 67 #include <vm/vm_pager.h> 68 69 #include <machine/reg.h> 70 71 MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); 72 73 static MALLOC_DEFINE(M_ATEXEC, "atexec", "atexec callback"); 74 75 /* 76 * callout list for things to do at exec time 77 */ 78 struct execlist { 79 execlist_fn function; 80 TAILQ_ENTRY(execlist) next; 81 }; 82 83 TAILQ_HEAD(exec_list_head, execlist); 84 static struct exec_list_head exec_list = TAILQ_HEAD_INITIALIZER(exec_list); 85 86 static register_t *exec_copyout_strings(struct image_params *); 87 88 /* XXX This should be vm_size_t. */ 89 static u_long ps_strings = PS_STRINGS; 90 SYSCTL_ULONG(_kern, KERN_PS_STRINGS, ps_strings, CTLFLAG_RD, &ps_strings, 0, ""); 91 92 /* XXX This should be vm_size_t. */ 93 static u_long usrstack = USRSTACK; 94 SYSCTL_ULONG(_kern, KERN_USRSTACK, usrstack, CTLFLAG_RD, &usrstack, 0, ""); 95 96 u_long ps_arg_cache_limit = PAGE_SIZE / 16; 97 SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, 98 &ps_arg_cache_limit, 0, ""); 99 100 int ps_argsopen = 1; 101 SYSCTL_INT(_kern, OID_AUTO, ps_argsopen, CTLFLAG_RW, &ps_argsopen, 0, ""); 102 103 #ifdef __ia64__ 104 /* XXX HACK */ 105 static int regstkpages = 256; 106 SYSCTL_INT(_machdep, OID_AUTO, regstkpages, CTLFLAG_RW, ®stkpages, 0, ""); 107 #endif 108 109 /* 110 * Each of the items is a pointer to a `const struct execsw', hence the 111 * double pointer here. 112 */ 113 static const struct execsw **execsw; 114 115 #ifndef _SYS_SYSPROTO_H_ 116 struct execve_args { 117 char *fname; 118 char **argv; 119 char **envv; 120 }; 121 #endif 122 123 /* 124 * execve() system call. 125 * 126 * MPSAFE 127 */ 128 int 129 execve(td, uap) 130 struct thread *td; 131 register struct execve_args *uap; 132 { 133 struct proc *p = td->td_proc; 134 struct nameidata nd, *ndp; 135 struct ucred *newcred = NULL, *oldcred; 136 struct uidinfo *euip; 137 register_t *stack_base; 138 int error, len, i; 139 struct image_params image_params, *imgp; 140 struct vattr attr; 141 int (*img_first)(struct image_params *); 142 struct pargs *oldargs = NULL, *newargs = NULL; 143 struct procsig *oldprocsig, *newprocsig; 144 #ifdef KTRACE 145 struct vnode *tracevp = NULL; 146 #endif 147 struct vnode *textvp = NULL; 148 int credential_changing; 149 150 imgp = &image_params; 151 152 /* 153 * Lock the process and set the P_INEXEC flag to indicate that 154 * it should be left alone until we're done here. This is 155 * necessary to avoid race conditions - e.g. in ptrace() - 156 * that might allow a local user to illicitly obtain elevated 157 * privileges. 158 */ 159 PROC_LOCK(p); 160 KASSERT((p->p_flag & P_INEXEC) == 0, 161 ("%s(): process already has P_INEXEC flag", __func__)); 162 if ((p->p_flag & P_KSES) && thread_single(SNGLE_EXIT)) { 163 PROC_UNLOCK(p); 164 return (ERESTART); /* Try again later. */ 165 } 166 /* If we get here all other threads are dead. */ 167 p->p_flag |= P_INEXEC; 168 PROC_UNLOCK(p); 169 170 /* 171 * Initialize part of the common data 172 */ 173 imgp->proc = p; 174 imgp->uap = uap; 175 imgp->attr = &attr; 176 imgp->argc = imgp->envc = 0; 177 imgp->argv0 = NULL; 178 imgp->entry_addr = 0; 179 imgp->vmspace_destroyed = 0; 180 imgp->interpreted = 0; 181 imgp->interpreter_name[0] = '\0'; 182 imgp->auxargs = NULL; 183 imgp->vp = NULL; 184 imgp->object = NULL; 185 imgp->firstpage = NULL; 186 imgp->ps_strings = 0; 187 imgp->auxarg_size = 0; 188 189 /* 190 * Allocate temporary demand zeroed space for argument and 191 * environment strings 192 */ 193 imgp->stringbase = (char *)kmem_alloc_wait(exec_map, ARG_MAX + PAGE_SIZE); 194 if (imgp->stringbase == NULL) { 195 error = ENOMEM; 196 mtx_lock(&Giant); 197 goto exec_fail; 198 } 199 imgp->stringp = imgp->stringbase; 200 imgp->stringspace = ARG_MAX; 201 imgp->image_header = imgp->stringbase + ARG_MAX; 202 203 /* 204 * Translate the file name. namei() returns a vnode pointer 205 * in ni_vp amoung other things. 206 */ 207 ndp = &nd; 208 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, 209 UIO_USERSPACE, uap->fname, td); 210 211 mtx_lock(&Giant); 212 interpret: 213 214 error = namei(ndp); 215 if (error) { 216 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, 217 ARG_MAX + PAGE_SIZE); 218 goto exec_fail; 219 } 220 221 imgp->vp = ndp->ni_vp; 222 imgp->fname = uap->fname; 223 224 /* 225 * Check file permissions (also 'opens' file) 226 */ 227 error = exec_check_permissions(imgp); 228 if (error) { 229 VOP_UNLOCK(imgp->vp, 0, td); 230 goto exec_fail_dealloc; 231 } 232 VOP_GETVOBJECT(imgp->vp, &imgp->object); 233 vm_object_reference(imgp->object); 234 235 error = exec_map_first_page(imgp); 236 VOP_UNLOCK(imgp->vp, 0, td); 237 if (error) 238 goto exec_fail_dealloc; 239 240 /* 241 * If the current process has a special image activator it 242 * wants to try first, call it. For example, emulating shell 243 * scripts differently. 244 */ 245 error = -1; 246 if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) 247 error = img_first(imgp); 248 249 /* 250 * Loop through the list of image activators, calling each one. 251 * An activator returns -1 if there is no match, 0 on success, 252 * and an error otherwise. 253 */ 254 for (i = 0; error == -1 && execsw[i]; ++i) { 255 if (execsw[i]->ex_imgact == NULL || 256 execsw[i]->ex_imgact == img_first) { 257 continue; 258 } 259 error = (*execsw[i]->ex_imgact)(imgp); 260 } 261 262 if (error) { 263 if (error == -1) 264 error = ENOEXEC; 265 goto exec_fail_dealloc; 266 } 267 268 /* 269 * Special interpreter operation, cleanup and loop up to try to 270 * activate the interpreter. 271 */ 272 if (imgp->interpreted) { 273 exec_unmap_first_page(imgp); 274 /* free name buffer and old vnode */ 275 NDFREE(ndp, NDF_ONLY_PNBUF); 276 vrele(ndp->ni_vp); 277 vm_object_deallocate(imgp->object); 278 imgp->object = NULL; 279 /* set new name to that of the interpreter */ 280 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, 281 UIO_SYSSPACE, imgp->interpreter_name, td); 282 goto interpret; 283 } 284 285 /* 286 * Copy out strings (args and env) and initialize stack base 287 */ 288 if (p->p_sysent->sv_copyout_strings) 289 stack_base = (*p->p_sysent->sv_copyout_strings)(imgp); 290 else 291 stack_base = exec_copyout_strings(imgp); 292 293 /* 294 * If custom stack fixup routine present for this process 295 * let it do the stack setup. 296 * Else stuff argument count as first item on stack 297 */ 298 if (p->p_sysent->sv_fixup) 299 (*p->p_sysent->sv_fixup)(&stack_base, imgp); 300 else 301 suword(--stack_base, imgp->argc); 302 303 /* 304 * For security and other reasons, the file descriptor table cannot 305 * be shared after an exec. 306 */ 307 FILEDESC_LOCK(p->p_fd); 308 if (p->p_fd->fd_refcnt > 1) { 309 struct filedesc *tmp; 310 311 tmp = fdcopy(td); 312 FILEDESC_UNLOCK(p->p_fd); 313 fdfree(td); 314 p->p_fd = tmp; 315 } else 316 FILEDESC_UNLOCK(p->p_fd); 317 318 /* 319 * Malloc things before we need locks. 320 */ 321 newcred = crget(); 322 euip = uifind(attr.va_uid); 323 i = imgp->endargs - imgp->stringbase; 324 if (ps_arg_cache_limit >= i + sizeof(struct pargs)) 325 newargs = pargs_alloc(i); 326 327 /* close files on exec */ 328 fdcloseexec(td); 329 330 /* 331 * For security and other reasons, signal handlers cannot 332 * be shared after an exec. The new process gets a copy of the old 333 * handlers. In execsigs(), the new process will have its signals 334 * reset. 335 */ 336 PROC_LOCK(p); 337 mp_fixme("procsig needs a lock"); 338 if (p->p_procsig->ps_refcnt > 1) { 339 oldprocsig = p->p_procsig; 340 PROC_UNLOCK(p); 341 MALLOC(newprocsig, struct procsig *, sizeof(struct procsig), 342 M_SUBPROC, M_WAITOK); 343 bcopy(oldprocsig, newprocsig, sizeof(*newprocsig)); 344 newprocsig->ps_refcnt = 1; 345 oldprocsig->ps_refcnt--; 346 PROC_LOCK(p); 347 p->p_procsig = newprocsig; 348 if (p->p_sigacts == &p->p_uarea->u_sigacts) 349 panic("shared procsig but private sigacts?"); 350 351 p->p_uarea->u_sigacts = *p->p_sigacts; 352 p->p_sigacts = &p->p_uarea->u_sigacts; 353 } 354 /* Stop profiling */ 355 stopprofclock(p); 356 357 /* reset caught signals */ 358 execsigs(p); 359 360 /* name this process - nameiexec(p, ndp) */ 361 len = min(ndp->ni_cnd.cn_namelen,MAXCOMLEN); 362 bcopy(ndp->ni_cnd.cn_nameptr, p->p_comm, len); 363 p->p_comm[len] = 0; 364 365 /* 366 * mark as execed, wakeup the process that vforked (if any) and tell 367 * it that it now has its own resources back 368 */ 369 p->p_flag |= P_EXEC; 370 if (p->p_pptr && (p->p_flag & P_PPWAIT)) { 371 p->p_flag &= ~P_PPWAIT; 372 wakeup(p->p_pptr); 373 } 374 375 /* 376 * Implement image setuid/setgid. 377 * 378 * Don't honor setuid/setgid if the filesystem prohibits it or if 379 * the process is being traced. 380 */ 381 oldcred = p->p_ucred; 382 credential_changing = 0; 383 credential_changing |= (attr.va_mode & VSUID) && oldcred->cr_uid != 384 attr.va_uid; 385 credential_changing |= (attr.va_mode & VSGID) && oldcred->cr_gid != 386 attr.va_gid; 387 388 if (credential_changing && 389 (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && 390 (p->p_flag & P_TRACED) == 0) { 391 /* 392 * Turn off syscall tracing for set-id programs, except for 393 * root. Record any set-id flags first to make sure that 394 * we do not regain any tracing during a possible block. 395 */ 396 setsugid(p); 397 #ifdef KTRACE 398 if (p->p_tracep && suser_cred(oldcred, PRISON_ROOT)) { 399 mtx_lock(&ktrace_mtx); 400 p->p_traceflag = 0; 401 tracevp = p->p_tracep; 402 p->p_tracep = NULL; 403 mtx_unlock(&ktrace_mtx); 404 } 405 #endif 406 /* Make sure file descriptors 0..2 are in use. */ 407 error = fdcheckstd(td); 408 if (error != 0) 409 goto done1; 410 /* 411 * Set the new credentials. 412 */ 413 crcopy(newcred, oldcred); 414 if (attr.va_mode & VSUID) 415 change_euid(newcred, euip); 416 if (attr.va_mode & VSGID) 417 change_egid(newcred, attr.va_gid); 418 setugidsafety(td); 419 /* 420 * Implement correct POSIX saved-id behavior. 421 */ 422 change_svuid(newcred, newcred->cr_uid); 423 change_svgid(newcred, newcred->cr_gid); 424 p->p_ucred = newcred; 425 newcred = NULL; 426 } else { 427 if (oldcred->cr_uid == oldcred->cr_ruid && 428 oldcred->cr_gid == oldcred->cr_rgid) 429 p->p_flag &= ~P_SUGID; 430 /* 431 * Implement correct POSIX saved-id behavior. 432 * 433 * XXX: It's not clear that the existing behavior is 434 * POSIX-compliant. A number of sources indicate that the 435 * saved uid/gid should only be updated if the new ruid is 436 * not equal to the old ruid, or the new euid is not equal 437 * to the old euid and the new euid is not equal to the old 438 * ruid. The FreeBSD code always updates the saved uid/gid. 439 * Also, this code uses the new (replaced) euid and egid as 440 * the source, which may or may not be the right ones to use. 441 */ 442 if (oldcred->cr_svuid != oldcred->cr_uid || 443 oldcred->cr_svgid != oldcred->cr_gid) { 444 crcopy(newcred, oldcred); 445 change_svuid(newcred, newcred->cr_uid); 446 change_svgid(newcred, newcred->cr_gid); 447 p->p_ucred = newcred; 448 newcred = NULL; 449 } 450 } 451 452 /* 453 * Store the vp for use in procfs 454 */ 455 textvp = p->p_textvp; 456 VREF(ndp->ni_vp); 457 p->p_textvp = ndp->ni_vp; 458 459 /* 460 * Notify others that we exec'd, and clear the P_INEXEC flag 461 * as we're now a bona fide freshly-execed process. 462 */ 463 KNOTE(&p->p_klist, NOTE_EXEC); 464 p->p_flag &= ~P_INEXEC; 465 466 /* 467 * If tracing the process, trap to debugger so breakpoints 468 * can be set before the program executes. 469 */ 470 _STOPEVENT(p, S_EXEC, 0); 471 472 if (p->p_flag & P_TRACED) 473 psignal(p, SIGTRAP); 474 475 /* clear "fork but no exec" flag, as we _are_ execing */ 476 p->p_acflag &= ~AFORK; 477 478 /* Free any previous argument cache */ 479 oldargs = p->p_args; 480 p->p_args = NULL; 481 482 /* Set values passed into the program in registers. */ 483 if (p->p_sysent->sv_setregs) 484 (*p->p_sysent->sv_setregs)(td, imgp->entry_addr, 485 (u_long)(uintptr_t)stack_base, imgp->ps_strings); 486 else 487 setregs(td, imgp->entry_addr, (u_long)(uintptr_t)stack_base, 488 imgp->ps_strings); 489 490 /* Cache arguments if they fit inside our allowance */ 491 if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { 492 bcopy(imgp->stringbase, newargs->ar_args, i); 493 p->p_args = newargs; 494 newargs = NULL; 495 } 496 done1: 497 PROC_UNLOCK(p); 498 499 /* 500 * Free any resources malloc'd earlier that we didn't use. 501 */ 502 uifree(euip); 503 if (newcred == NULL) 504 crfree(oldcred); 505 else 506 crfree(newcred); 507 /* 508 * Handle deferred decrement of ref counts. 509 */ 510 if (textvp != NULL) 511 vrele(textvp); 512 #ifdef KTRACE 513 if (tracevp != NULL) 514 vrele(tracevp); 515 #endif 516 if (oldargs != NULL) 517 pargs_drop(oldargs); 518 if (newargs != NULL) 519 pargs_drop(newargs); 520 521 exec_fail_dealloc: 522 523 /* 524 * free various allocated resources 525 */ 526 if (imgp->firstpage) 527 exec_unmap_first_page(imgp); 528 529 if (imgp->stringbase != NULL) 530 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, 531 ARG_MAX + PAGE_SIZE); 532 533 if (imgp->vp) { 534 NDFREE(ndp, NDF_ONLY_PNBUF); 535 vrele(imgp->vp); 536 } 537 538 if (imgp->object) 539 vm_object_deallocate(imgp->object); 540 541 if (error == 0) 542 goto done2; 543 544 exec_fail: 545 /* we're done here, clear P_INEXEC */ 546 PROC_LOCK(p); 547 p->p_flag &= ~P_INEXEC; 548 PROC_UNLOCK(p); 549 550 if (imgp->vmspace_destroyed) { 551 /* sorry, no more process anymore. exit gracefully */ 552 exit1(td, W_EXITCODE(0, SIGABRT)); 553 /* NOT REACHED */ 554 error = 0; 555 } 556 done2: 557 mtx_unlock(&Giant); 558 return (error); 559 } 560 561 int 562 exec_map_first_page(imgp) 563 struct image_params *imgp; 564 { 565 int rv, i; 566 int initial_pagein; 567 vm_page_t ma[VM_INITIAL_PAGEIN]; 568 vm_object_t object; 569 570 GIANT_REQUIRED; 571 572 if (imgp->firstpage) { 573 exec_unmap_first_page(imgp); 574 } 575 576 VOP_GETVOBJECT(imgp->vp, &object); 577 578 ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 579 580 if ((ma[0]->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) { 581 initial_pagein = VM_INITIAL_PAGEIN; 582 if (initial_pagein > object->size) 583 initial_pagein = object->size; 584 for (i = 1; i < initial_pagein; i++) { 585 if ((ma[i] = vm_page_lookup(object, i)) != NULL) { 586 if ((ma[i]->flags & PG_BUSY) || ma[i]->busy) 587 break; 588 if (ma[i]->valid) 589 break; 590 vm_page_busy(ma[i]); 591 } else { 592 ma[i] = vm_page_alloc(object, i, VM_ALLOC_NORMAL); 593 if (ma[i] == NULL) 594 break; 595 } 596 } 597 initial_pagein = i; 598 599 rv = vm_pager_get_pages(object, ma, initial_pagein, 0); 600 ma[0] = vm_page_lookup(object, 0); 601 602 if ((rv != VM_PAGER_OK) || (ma[0] == NULL) || (ma[0]->valid == 0)) { 603 if (ma[0]) { 604 vm_page_lock_queues(); 605 vm_page_protect(ma[0], VM_PROT_NONE); 606 vm_page_free(ma[0]); 607 vm_page_unlock_queues(); 608 } 609 return EIO; 610 } 611 } 612 vm_page_lock_queues(); 613 vm_page_wire(ma[0]); 614 vm_page_wakeup(ma[0]); 615 vm_page_unlock_queues(); 616 617 pmap_qenter((vm_offset_t)imgp->image_header, ma, 1); 618 imgp->firstpage = ma[0]; 619 620 return 0; 621 } 622 623 void 624 exec_unmap_first_page(imgp) 625 struct image_params *imgp; 626 { 627 GIANT_REQUIRED; 628 629 if (imgp->firstpage) { 630 pmap_qremove((vm_offset_t)imgp->image_header, 1); 631 vm_page_lock_queues(); 632 vm_page_unwire(imgp->firstpage, 1); 633 vm_page_unlock_queues(); 634 imgp->firstpage = NULL; 635 } 636 } 637 638 /* 639 * Destroy old address space, and allocate a new stack 640 * The new stack is only SGROWSIZ large because it is grown 641 * automatically in trap.c. 642 */ 643 int 644 exec_new_vmspace(imgp, minuser, maxuser, stack_addr) 645 struct image_params *imgp; 646 vm_offset_t minuser, maxuser, stack_addr; 647 { 648 int error; 649 struct execlist *ep; 650 struct proc *p = imgp->proc; 651 struct vmspace *vmspace = p->p_vmspace; 652 653 GIANT_REQUIRED; 654 655 stack_addr = stack_addr - maxssiz; 656 657 imgp->vmspace_destroyed = 1; 658 659 /* 660 * Perform functions registered with at_exec(). 661 */ 662 TAILQ_FOREACH(ep, &exec_list, next) 663 (*ep->function)(p); 664 665 /* 666 * Blow away entire process VM, if address space not shared, 667 * otherwise, create a new VM space so that other threads are 668 * not disrupted 669 */ 670 if (vmspace->vm_refcnt == 1 671 && vm_map_min(&vmspace->vm_map) == minuser 672 && vm_map_max(&vmspace->vm_map) == maxuser) { 673 if (vmspace->vm_shm) 674 shmexit(p); 675 pmap_remove_pages(vmspace_pmap(vmspace), minuser, maxuser); 676 vm_map_remove(&vmspace->vm_map, minuser, maxuser); 677 } else { 678 vmspace_exec(p, minuser, maxuser); 679 vmspace = p->p_vmspace; 680 } 681 682 /* Allocate a new stack */ 683 error = vm_map_stack(&vmspace->vm_map, stack_addr, (vm_size_t)maxssiz, 684 VM_PROT_ALL, VM_PROT_ALL, 0); 685 if (error) 686 return (error); 687 688 #ifdef __ia64__ 689 { 690 /* 691 * Allocate backing store. We really need something 692 * similar to vm_map_stack which can allow the backing 693 * store to grow upwards. This will do for now. 694 */ 695 vm_offset_t bsaddr; 696 bsaddr = USRSTACK - 2*maxssiz; 697 error = vm_map_find(&vmspace->vm_map, 0, 0, &bsaddr, 698 regstkpages * PAGE_SIZE, 0, 699 VM_PROT_ALL, VM_PROT_ALL, 0); 700 FIRST_THREAD_IN_PROC(p)->td_md.md_bspstore = bsaddr; 701 } 702 #endif 703 704 /* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the 705 * VM_STACK case, but they are still used to monitor the size of the 706 * process stack so we can check the stack rlimit. 707 */ 708 vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; 709 vmspace->vm_maxsaddr = (char *)USRSTACK - maxssiz; 710 711 return(0); 712 } 713 714 /* 715 * Copy out argument and environment strings from the old process 716 * address space into the temporary string buffer. 717 */ 718 int 719 exec_extract_strings(imgp) 720 struct image_params *imgp; 721 { 722 char **argv, **envv; 723 char *argp, *envp; 724 int error; 725 size_t length; 726 727 /* 728 * extract arguments first 729 */ 730 731 argv = imgp->uap->argv; 732 733 if (argv) { 734 argp = (caddr_t) (intptr_t) fuword(argv); 735 if (argp == (caddr_t) -1) 736 return (EFAULT); 737 if (argp) 738 argv++; 739 if (imgp->argv0) 740 argp = imgp->argv0; 741 if (argp) { 742 do { 743 if (argp == (caddr_t) -1) 744 return (EFAULT); 745 if ((error = copyinstr(argp, imgp->stringp, 746 imgp->stringspace, &length))) { 747 if (error == ENAMETOOLONG) 748 return(E2BIG); 749 return (error); 750 } 751 imgp->stringspace -= length; 752 imgp->stringp += length; 753 imgp->argc++; 754 } while ((argp = (caddr_t) (intptr_t) fuword(argv++))); 755 } 756 } 757 758 imgp->endargs = imgp->stringp; 759 760 /* 761 * extract environment strings 762 */ 763 764 envv = imgp->uap->envv; 765 766 if (envv) { 767 while ((envp = (caddr_t) (intptr_t) fuword(envv++))) { 768 if (envp == (caddr_t) -1) 769 return (EFAULT); 770 if ((error = copyinstr(envp, imgp->stringp, 771 imgp->stringspace, &length))) { 772 if (error == ENAMETOOLONG) 773 return(E2BIG); 774 return (error); 775 } 776 imgp->stringspace -= length; 777 imgp->stringp += length; 778 imgp->envc++; 779 } 780 } 781 782 return (0); 783 } 784 785 /* 786 * Copy strings out to the new process address space, constructing 787 * new arg and env vector tables. Return a pointer to the base 788 * so that it can be used as the initial stack pointer. 789 */ 790 register_t * 791 exec_copyout_strings(imgp) 792 struct image_params *imgp; 793 { 794 int argc, envc; 795 char **vectp; 796 char *stringp, *destp; 797 register_t *stack_base; 798 struct ps_strings *arginfo; 799 int szsigcode; 800 801 /* 802 * Calculate string base and vector table pointers. 803 * Also deal with signal trampoline code for this exec type. 804 */ 805 arginfo = (struct ps_strings *)PS_STRINGS; 806 szsigcode = *(imgp->proc->p_sysent->sv_szsigcode); 807 destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE - 808 roundup((ARG_MAX - imgp->stringspace), sizeof(char *)); 809 810 /* 811 * install sigcode 812 */ 813 if (szsigcode) 814 copyout(imgp->proc->p_sysent->sv_sigcode, 815 ((caddr_t)arginfo - szsigcode), szsigcode); 816 817 /* 818 * If we have a valid auxargs ptr, prepare some room 819 * on the stack. 820 */ 821 if (imgp->auxargs) { 822 /* 823 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 824 * lower compatibility. 825 */ 826 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size 827 : (AT_COUNT * 2); 828 /* 829 * The '+ 2' is for the null pointers at the end of each of 830 * the arg and env vector sets,and imgp->auxarg_size is room 831 * for argument of Runtime loader. 832 */ 833 vectp = (char **) (destp - (imgp->argc + imgp->envc + 2 + 834 imgp->auxarg_size) * sizeof(char *)); 835 836 } else 837 /* 838 * The '+ 2' is for the null pointers at the end of each of 839 * the arg and env vector sets 840 */ 841 vectp = (char **) 842 (destp - (imgp->argc + imgp->envc + 2) * sizeof(char *)); 843 844 /* 845 * vectp also becomes our initial stack base 846 */ 847 stack_base = (register_t *)vectp; 848 849 stringp = imgp->stringbase; 850 argc = imgp->argc; 851 envc = imgp->envc; 852 853 /* 854 * Copy out strings - arguments and environment. 855 */ 856 copyout(stringp, destp, ARG_MAX - imgp->stringspace); 857 858 /* 859 * Fill in "ps_strings" struct for ps, w, etc. 860 */ 861 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); 862 suword(&arginfo->ps_nargvstr, argc); 863 864 /* 865 * Fill in argument portion of vector table. 866 */ 867 for (; argc > 0; --argc) { 868 suword(vectp++, (long)(intptr_t)destp); 869 while (*stringp++ != 0) 870 destp++; 871 destp++; 872 } 873 874 /* a null vector table pointer separates the argp's from the envp's */ 875 suword(vectp++, 0); 876 877 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); 878 suword(&arginfo->ps_nenvstr, envc); 879 880 /* 881 * Fill in environment portion of vector table. 882 */ 883 for (; envc > 0; --envc) { 884 suword(vectp++, (long)(intptr_t)destp); 885 while (*stringp++ != 0) 886 destp++; 887 destp++; 888 } 889 890 /* end of vector table is a null pointer */ 891 suword(vectp, 0); 892 893 return (stack_base); 894 } 895 896 /* 897 * Check permissions of file to execute. 898 * Called with imgp->vp locked. 899 * Return 0 for success or error code on failure. 900 */ 901 int 902 exec_check_permissions(imgp) 903 struct image_params *imgp; 904 { 905 struct vnode *vp = imgp->vp; 906 struct vattr *attr = imgp->attr; 907 struct thread *td; 908 int error; 909 910 td = curthread; /* XXXKSE */ 911 /* Get file attributes */ 912 error = VOP_GETATTR(vp, attr, td->td_ucred, td); 913 if (error) 914 return (error); 915 916 /* 917 * 1) Check if file execution is disabled for the filesystem that this 918 * file resides on. 919 * 2) Insure that at least one execute bit is on - otherwise root 920 * will always succeed, and we don't want to happen unless the 921 * file really is executable. 922 * 3) Insure that the file is a regular file. 923 */ 924 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 925 ((attr->va_mode & 0111) == 0) || 926 (attr->va_type != VREG)) 927 return (EACCES); 928 929 /* 930 * Zero length files can't be exec'd 931 */ 932 if (attr->va_size == 0) 933 return (ENOEXEC); 934 935 /* 936 * Check for execute permission to file based on current credentials. 937 */ 938 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 939 if (error) 940 return (error); 941 942 /* 943 * Check number of open-for-writes on the file and deny execution 944 * if there are any. 945 */ 946 if (vp->v_writecount) 947 return (ETXTBSY); 948 949 /* 950 * Call filesystem specific open routine (which does nothing in the 951 * general case). 952 */ 953 error = VOP_OPEN(vp, FREAD, td->td_ucred, td); 954 return (error); 955 } 956 957 /* 958 * Exec handler registration 959 */ 960 int 961 exec_register(execsw_arg) 962 const struct execsw *execsw_arg; 963 { 964 const struct execsw **es, **xs, **newexecsw; 965 int count = 2; /* New slot and trailing NULL */ 966 967 if (execsw) 968 for (es = execsw; *es; es++) 969 count++; 970 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); 971 if (newexecsw == NULL) 972 return ENOMEM; 973 xs = newexecsw; 974 if (execsw) 975 for (es = execsw; *es; es++) 976 *xs++ = *es; 977 *xs++ = execsw_arg; 978 *xs = NULL; 979 if (execsw) 980 free(execsw, M_TEMP); 981 execsw = newexecsw; 982 return 0; 983 } 984 985 int 986 exec_unregister(execsw_arg) 987 const struct execsw *execsw_arg; 988 { 989 const struct execsw **es, **xs, **newexecsw; 990 int count = 1; 991 992 if (execsw == NULL) 993 panic("unregister with no handlers left?\n"); 994 995 for (es = execsw; *es; es++) { 996 if (*es == execsw_arg) 997 break; 998 } 999 if (*es == NULL) 1000 return ENOENT; 1001 for (es = execsw; *es; es++) 1002 if (*es != execsw_arg) 1003 count++; 1004 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); 1005 if (newexecsw == NULL) 1006 return ENOMEM; 1007 xs = newexecsw; 1008 for (es = execsw; *es; es++) 1009 if (*es != execsw_arg) 1010 *xs++ = *es; 1011 *xs = NULL; 1012 if (execsw) 1013 free(execsw, M_TEMP); 1014 execsw = newexecsw; 1015 return 0; 1016 } 1017 1018 int 1019 at_exec(function) 1020 execlist_fn function; 1021 { 1022 struct execlist *ep; 1023 1024 #ifdef INVARIANTS 1025 /* Be noisy if the programmer has lost track of things */ 1026 if (rm_at_exec(function)) 1027 printf("WARNING: exec callout entry (%p) already present\n", 1028 function); 1029 #endif 1030 ep = malloc(sizeof(*ep), M_ATEXEC, M_NOWAIT); 1031 if (ep == NULL) 1032 return (ENOMEM); 1033 ep->function = function; 1034 TAILQ_INSERT_TAIL(&exec_list, ep, next); 1035 return (0); 1036 } 1037 1038 /* 1039 * Scan the exec callout list for the given item and remove it. 1040 * Returns the number of items removed (0 or 1) 1041 */ 1042 int 1043 rm_at_exec(function) 1044 execlist_fn function; 1045 { 1046 struct execlist *ep; 1047 1048 TAILQ_FOREACH(ep, &exec_list, next) { 1049 if (ep->function == function) { 1050 TAILQ_REMOVE(&exec_list, ep, next); 1051 free(ep, M_ATEXEC); 1052 return(1); 1053 } 1054 } 1055 return (0); 1056 } 1057 1058