1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * 40 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 41 * $FreeBSD$ 42 */ 43 44 /* 45 * Mapped file (mmap) interface to VM 46 */ 47 48 #include "opt_compat.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/kernel.h> 53 #include <sys/lock.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/filedesc.h> 57 #include <sys/proc.h> 58 #include <sys/resource.h> 59 #include <sys/resourcevar.h> 60 #include <sys/vnode.h> 61 #include <sys/fcntl.h> 62 #include <sys/file.h> 63 #include <sys/mman.h> 64 #include <sys/conf.h> 65 #include <sys/stat.h> 66 #include <sys/vmmeter.h> 67 #include <sys/sysctl.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 #include <vm/vm_pager.h> 76 #include <vm/vm_pageout.h> 77 #include <vm/vm_extern.h> 78 #include <vm/vm_page.h> 79 #include <vm/vm_kern.h> 80 81 #ifndef _SYS_SYSPROTO_H_ 82 struct sbrk_args { 83 int incr; 84 }; 85 #endif 86 87 static int max_proc_mmap; 88 SYSCTL_INT(_vm, OID_AUTO, max_proc_mmap, CTLFLAG_RW, &max_proc_mmap, 0, ""); 89 90 /* 91 * Set the maximum number of vm_map_entry structures per process. Roughly 92 * speaking vm_map_entry structures are tiny, so allowing them to eat 1/100 93 * of our KVM malloc space still results in generous limits. We want a 94 * default that is good enough to prevent the kernel running out of resources 95 * if attacked from compromised user account but generous enough such that 96 * multi-threaded processes are not unduly inconvenienced. 97 */ 98 static void vmmapentry_rsrc_init(void *); 99 SYSINIT(vmmersrc, SI_SUB_KVM_RSRC, SI_ORDER_FIRST, vmmapentry_rsrc_init, NULL) 100 101 static void 102 vmmapentry_rsrc_init(dummy) 103 void *dummy; 104 { 105 max_proc_mmap = vm_kmem_size / sizeof(struct vm_map_entry); 106 max_proc_mmap /= 100; 107 } 108 109 /* 110 * MPSAFE 111 */ 112 /* ARGSUSED */ 113 int 114 sbrk(td, uap) 115 struct thread *td; 116 struct sbrk_args *uap; 117 { 118 /* Not yet implemented */ 119 /* mtx_lock(&Giant); */ 120 /* mtx_unlock(&Giant); */ 121 return (EOPNOTSUPP); 122 } 123 124 #ifndef _SYS_SYSPROTO_H_ 125 struct sstk_args { 126 int incr; 127 }; 128 #endif 129 130 /* 131 * MPSAFE 132 */ 133 /* ARGSUSED */ 134 int 135 sstk(td, uap) 136 struct thread *td; 137 struct sstk_args *uap; 138 { 139 /* Not yet implemented */ 140 /* mtx_lock(&Giant); */ 141 /* mtx_unlock(&Giant); */ 142 return (EOPNOTSUPP); 143 } 144 145 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 146 #ifndef _SYS_SYSPROTO_H_ 147 struct getpagesize_args { 148 int dummy; 149 }; 150 #endif 151 152 /* ARGSUSED */ 153 int 154 ogetpagesize(td, uap) 155 struct thread *td; 156 struct getpagesize_args *uap; 157 { 158 /* MP SAFE */ 159 td->td_retval[0] = PAGE_SIZE; 160 return (0); 161 } 162 #endif /* COMPAT_43 || COMPAT_SUNOS */ 163 164 165 /* 166 * Memory Map (mmap) system call. Note that the file offset 167 * and address are allowed to be NOT page aligned, though if 168 * the MAP_FIXED flag it set, both must have the same remainder 169 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 170 * page-aligned, the actual mapping starts at trunc_page(addr) 171 * and the return value is adjusted up by the page offset. 172 * 173 * Generally speaking, only character devices which are themselves 174 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 175 * there would be no cache coherency between a descriptor and a VM mapping 176 * both to the same character device. 177 * 178 * Block devices can be mmap'd no matter what they represent. Cache coherency 179 * is maintained as long as you do not write directly to the underlying 180 * character device. 181 */ 182 #ifndef _SYS_SYSPROTO_H_ 183 struct mmap_args { 184 void *addr; 185 size_t len; 186 int prot; 187 int flags; 188 int fd; 189 long pad; 190 off_t pos; 191 }; 192 #endif 193 194 /* 195 * MPSAFE 196 */ 197 int 198 mmap(td, uap) 199 struct thread *td; 200 struct mmap_args *uap; 201 { 202 struct file *fp = NULL; 203 struct vnode *vp; 204 vm_offset_t addr; 205 vm_size_t size, pageoff; 206 vm_prot_t prot, maxprot; 207 void *handle; 208 int flags, error; 209 int disablexworkaround; 210 off_t pos; 211 struct vmspace *vms = td->td_proc->p_vmspace; 212 vm_object_t obj; 213 214 addr = (vm_offset_t) uap->addr; 215 size = uap->len; 216 prot = uap->prot & VM_PROT_ALL; 217 flags = uap->flags; 218 pos = uap->pos; 219 220 fp = NULL; 221 /* make sure mapping fits into numeric range etc */ 222 if ((ssize_t) uap->len < 0 || 223 ((flags & MAP_ANON) && uap->fd != -1)) 224 return (EINVAL); 225 226 if (flags & MAP_STACK) { 227 if ((uap->fd != -1) || 228 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 229 return (EINVAL); 230 flags |= MAP_ANON; 231 pos = 0; 232 } 233 234 /* 235 * Align the file position to a page boundary, 236 * and save its page offset component. 237 */ 238 pageoff = (pos & PAGE_MASK); 239 pos -= pageoff; 240 241 /* Adjust size for rounding (on both ends). */ 242 size += pageoff; /* low end... */ 243 size = (vm_size_t) round_page(size); /* hi end */ 244 245 /* 246 * Check for illegal addresses. Watch out for address wrap... Note 247 * that VM_*_ADDRESS are not constants due to casts (argh). 248 */ 249 if (flags & MAP_FIXED) { 250 /* 251 * The specified address must have the same remainder 252 * as the file offset taken modulo PAGE_SIZE, so it 253 * should be aligned after adjustment by pageoff. 254 */ 255 addr -= pageoff; 256 if (addr & PAGE_MASK) 257 return (EINVAL); 258 /* Address range must be all in user VM space. */ 259 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 260 return (EINVAL); 261 #ifndef __i386__ 262 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 263 return (EINVAL); 264 #endif 265 if (addr + size < addr) 266 return (EINVAL); 267 } 268 /* 269 * XXX for non-fixed mappings where no hint is provided or 270 * the hint would fall in the potential heap space, 271 * place it after the end of the largest possible heap. 272 * 273 * There should really be a pmap call to determine a reasonable 274 * location. 275 */ 276 else if (addr == 0 || 277 (addr >= round_page((vm_offset_t)vms->vm_taddr) && 278 addr < round_page((vm_offset_t)vms->vm_daddr + maxdsiz))) 279 addr = round_page((vm_offset_t)vms->vm_daddr + maxdsiz); 280 281 mtx_lock(&Giant); /* syscall marked mp-safe but isn't */ 282 if (flags & MAP_ANON) { 283 /* 284 * Mapping blank space is trivial. 285 */ 286 handle = NULL; 287 maxprot = VM_PROT_ALL; 288 pos = 0; 289 } else { 290 /* 291 * Mapping file, get fp for validation. Obtain vnode and make 292 * sure it is of appropriate type. 293 * don't let the descriptor disappear on us if we block 294 */ 295 if ((error = fget(td, uap->fd, &fp)) != 0) 296 goto done; 297 if (fp->f_type != DTYPE_VNODE) { 298 error = EINVAL; 299 goto done; 300 } 301 302 /* 303 * POSIX shared-memory objects are defined to have 304 * kernel persistence, and are not defined to support 305 * read(2)/write(2) -- or even open(2). Thus, we can 306 * use MAP_ASYNC to trade on-disk coherence for speed. 307 * The shm_open(3) library routine turns on the FPOSIXSHM 308 * flag to request this behavior. 309 */ 310 if (fp->f_flag & FPOSIXSHM) 311 flags |= MAP_NOSYNC; 312 vp = (struct vnode *) fp->f_data; 313 if (vp->v_type != VREG && vp->v_type != VCHR) { 314 error = EINVAL; 315 goto done; 316 } 317 if (vp->v_type == VREG) { 318 /* 319 * Get the proper underlying object 320 */ 321 if (VOP_GETVOBJECT(vp, &obj) != 0) { 322 error = EINVAL; 323 goto done; 324 } 325 vp = (struct vnode*)obj->handle; 326 } 327 /* 328 * XXX hack to handle use of /dev/zero to map anon memory (ala 329 * SunOS). 330 */ 331 if ((vp->v_type == VCHR) && 332 (vp->v_rdev->si_devsw->d_flags & D_MMAP_ANON)) { 333 handle = NULL; 334 maxprot = VM_PROT_ALL; 335 flags |= MAP_ANON; 336 pos = 0; 337 } else { 338 /* 339 * cdevs does not provide private mappings of any kind. 340 */ 341 /* 342 * However, for XIG X server to continue to work, 343 * we should allow the superuser to do it anyway. 344 * We only allow it at securelevel < 1. 345 * (Because the XIG X server writes directly to video 346 * memory via /dev/mem, it should never work at any 347 * other securelevel. 348 * XXX this will have to go 349 */ 350 if (securelevel_ge(td->td_ucred, 1)) 351 disablexworkaround = 1; 352 else 353 disablexworkaround = suser(td); 354 if (vp->v_type == VCHR && disablexworkaround && 355 (flags & (MAP_PRIVATE|MAP_COPY))) { 356 error = EINVAL; 357 goto done; 358 } 359 /* 360 * Ensure that file and memory protections are 361 * compatible. Note that we only worry about 362 * writability if mapping is shared; in this case, 363 * current and max prot are dictated by the open file. 364 * XXX use the vnode instead? Problem is: what 365 * credentials do we use for determination? What if 366 * proc does a setuid? 367 */ 368 maxprot = VM_PROT_EXECUTE; /* ??? */ 369 if (fp->f_flag & FREAD) { 370 maxprot |= VM_PROT_READ; 371 } else if (prot & PROT_READ) { 372 error = EACCES; 373 goto done; 374 } 375 /* 376 * If we are sharing potential changes (either via 377 * MAP_SHARED or via the implicit sharing of character 378 * device mappings), and we are trying to get write 379 * permission although we opened it without asking 380 * for it, bail out. Check for superuser, only if 381 * we're at securelevel < 1, to allow the XIG X server 382 * to continue to work. 383 */ 384 if ((flags & MAP_SHARED) != 0 || 385 (vp->v_type == VCHR && disablexworkaround)) { 386 if ((fp->f_flag & FWRITE) != 0) { 387 struct vattr va; 388 if ((error = 389 VOP_GETATTR(vp, &va, 390 td->td_ucred, td))) { 391 goto done; 392 } 393 if ((va.va_flags & 394 (SF_SNAPSHOT|IMMUTABLE|APPEND)) == 0) { 395 maxprot |= VM_PROT_WRITE; 396 } else if (prot & PROT_WRITE) { 397 error = EPERM; 398 goto done; 399 } 400 } else if ((prot & PROT_WRITE) != 0) { 401 error = EACCES; 402 goto done; 403 } 404 } else { 405 maxprot |= VM_PROT_WRITE; 406 } 407 408 handle = (void *)vp; 409 } 410 } 411 412 /* 413 * Do not allow more then a certain number of vm_map_entry structures 414 * per process. Scale with the number of rforks sharing the map 415 * to make the limit reasonable for threads. 416 */ 417 if (max_proc_mmap && 418 vms->vm_map.nentries >= max_proc_mmap * vms->vm_refcnt) { 419 error = ENOMEM; 420 goto done; 421 } 422 423 mtx_unlock(&Giant); 424 error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, 425 flags, handle, pos); 426 if (error == 0) 427 td->td_retval[0] = (register_t) (addr + pageoff); 428 goto done2; 429 done: 430 mtx_unlock(&Giant); 431 done2: 432 if (fp) 433 fdrop(fp, td); 434 return (error); 435 } 436 437 #ifdef COMPAT_43 438 #ifndef _SYS_SYSPROTO_H_ 439 struct ommap_args { 440 caddr_t addr; 441 int len; 442 int prot; 443 int flags; 444 int fd; 445 long pos; 446 }; 447 #endif 448 int 449 ommap(td, uap) 450 struct thread *td; 451 struct ommap_args *uap; 452 { 453 struct mmap_args nargs; 454 static const char cvtbsdprot[8] = { 455 0, 456 PROT_EXEC, 457 PROT_WRITE, 458 PROT_EXEC | PROT_WRITE, 459 PROT_READ, 460 PROT_EXEC | PROT_READ, 461 PROT_WRITE | PROT_READ, 462 PROT_EXEC | PROT_WRITE | PROT_READ, 463 }; 464 465 #define OMAP_ANON 0x0002 466 #define OMAP_COPY 0x0020 467 #define OMAP_SHARED 0x0010 468 #define OMAP_FIXED 0x0100 469 470 nargs.addr = uap->addr; 471 nargs.len = uap->len; 472 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 473 nargs.flags = 0; 474 if (uap->flags & OMAP_ANON) 475 nargs.flags |= MAP_ANON; 476 if (uap->flags & OMAP_COPY) 477 nargs.flags |= MAP_COPY; 478 if (uap->flags & OMAP_SHARED) 479 nargs.flags |= MAP_SHARED; 480 else 481 nargs.flags |= MAP_PRIVATE; 482 if (uap->flags & OMAP_FIXED) 483 nargs.flags |= MAP_FIXED; 484 nargs.fd = uap->fd; 485 nargs.pos = uap->pos; 486 return (mmap(td, &nargs)); 487 } 488 #endif /* COMPAT_43 */ 489 490 491 #ifndef _SYS_SYSPROTO_H_ 492 struct msync_args { 493 void *addr; 494 int len; 495 int flags; 496 }; 497 #endif 498 /* 499 * MPSAFE 500 */ 501 int 502 msync(td, uap) 503 struct thread *td; 504 struct msync_args *uap; 505 { 506 vm_offset_t addr; 507 vm_size_t size, pageoff; 508 int flags; 509 vm_map_t map; 510 int rv; 511 512 addr = (vm_offset_t) uap->addr; 513 size = uap->len; 514 flags = uap->flags; 515 516 pageoff = (addr & PAGE_MASK); 517 addr -= pageoff; 518 size += pageoff; 519 size = (vm_size_t) round_page(size); 520 if (addr + size < addr) 521 return (EINVAL); 522 523 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 524 return (EINVAL); 525 526 mtx_lock(&Giant); 527 528 map = &td->td_proc->p_vmspace->vm_map; 529 530 /* 531 * XXX Gak! If size is zero we are supposed to sync "all modified 532 * pages with the region containing addr". Unfortunately, we don't 533 * really keep track of individual mmaps so we approximate by flushing 534 * the range of the map entry containing addr. This can be incorrect 535 * if the region splits or is coalesced with a neighbor. 536 */ 537 if (size == 0) { 538 vm_map_entry_t entry; 539 540 vm_map_lock_read(map); 541 rv = vm_map_lookup_entry(map, addr, &entry); 542 vm_map_unlock_read(map); 543 if (rv == FALSE) { 544 rv = -1; 545 goto done2; 546 } 547 addr = entry->start; 548 size = entry->end - entry->start; 549 } 550 551 /* 552 * Clean the pages and interpret the return value. 553 */ 554 rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, 555 (flags & MS_INVALIDATE) != 0); 556 557 done2: 558 mtx_unlock(&Giant); 559 560 switch (rv) { 561 case KERN_SUCCESS: 562 return (0); 563 case KERN_INVALID_ADDRESS: 564 return (EINVAL); /* Sun returns ENOMEM? */ 565 case KERN_FAILURE: 566 return (EIO); 567 default: 568 return (EINVAL); 569 } 570 } 571 572 #ifndef _SYS_SYSPROTO_H_ 573 struct munmap_args { 574 void *addr; 575 size_t len; 576 }; 577 #endif 578 /* 579 * MPSAFE 580 */ 581 int 582 munmap(td, uap) 583 struct thread *td; 584 struct munmap_args *uap; 585 { 586 vm_offset_t addr; 587 vm_size_t size, pageoff; 588 vm_map_t map; 589 590 addr = (vm_offset_t) uap->addr; 591 size = uap->len; 592 593 pageoff = (addr & PAGE_MASK); 594 addr -= pageoff; 595 size += pageoff; 596 size = (vm_size_t) round_page(size); 597 if (addr + size < addr) 598 return (EINVAL); 599 600 if (size == 0) 601 return (0); 602 603 /* 604 * Check for illegal addresses. Watch out for address wrap... Note 605 * that VM_*_ADDRESS are not constants due to casts (argh). 606 */ 607 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 608 return (EINVAL); 609 #ifndef __i386__ 610 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 611 return (EINVAL); 612 #endif 613 map = &td->td_proc->p_vmspace->vm_map; 614 /* 615 * Make sure entire range is allocated. 616 */ 617 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 618 return (EINVAL); 619 620 /* returns nothing but KERN_SUCCESS anyway */ 621 (void) vm_map_remove(map, addr, addr + size); 622 return (0); 623 } 624 625 #if 0 626 void 627 munmapfd(td, fd) 628 struct thread *td; 629 int fd; 630 { 631 /* 632 * XXX should unmap any regions mapped to this file 633 */ 634 FILEDESC_LOCK(p->p_fd); 635 td->td_proc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; 636 FILEDESC_UNLOCK(p->p_fd); 637 } 638 #endif 639 640 #ifndef _SYS_SYSPROTO_H_ 641 struct mprotect_args { 642 const void *addr; 643 size_t len; 644 int prot; 645 }; 646 #endif 647 /* 648 * MPSAFE 649 */ 650 int 651 mprotect(td, uap) 652 struct thread *td; 653 struct mprotect_args *uap; 654 { 655 vm_offset_t addr; 656 vm_size_t size, pageoff; 657 vm_prot_t prot; 658 659 addr = (vm_offset_t) uap->addr; 660 size = uap->len; 661 prot = uap->prot & VM_PROT_ALL; 662 #if defined(VM_PROT_READ_IS_EXEC) 663 if (prot & VM_PROT_READ) 664 prot |= VM_PROT_EXECUTE; 665 #endif 666 667 pageoff = (addr & PAGE_MASK); 668 addr -= pageoff; 669 size += pageoff; 670 size = (vm_size_t) round_page(size); 671 if (addr + size < addr) 672 return (EINVAL); 673 674 switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 675 addr + size, prot, FALSE)) { 676 case KERN_SUCCESS: 677 return (0); 678 case KERN_PROTECTION_FAILURE: 679 return (EACCES); 680 } 681 return (EINVAL); 682 } 683 684 #ifndef _SYS_SYSPROTO_H_ 685 struct minherit_args { 686 void *addr; 687 size_t len; 688 int inherit; 689 }; 690 #endif 691 /* 692 * MPSAFE 693 */ 694 int 695 minherit(td, uap) 696 struct thread *td; 697 struct minherit_args *uap; 698 { 699 vm_offset_t addr; 700 vm_size_t size, pageoff; 701 vm_inherit_t inherit; 702 703 addr = (vm_offset_t)uap->addr; 704 size = uap->len; 705 inherit = uap->inherit; 706 707 pageoff = (addr & PAGE_MASK); 708 addr -= pageoff; 709 size += pageoff; 710 size = (vm_size_t) round_page(size); 711 if (addr + size < addr) 712 return (EINVAL); 713 714 switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 715 addr + size, inherit)) { 716 case KERN_SUCCESS: 717 return (0); 718 case KERN_PROTECTION_FAILURE: 719 return (EACCES); 720 } 721 return (EINVAL); 722 } 723 724 #ifndef _SYS_SYSPROTO_H_ 725 struct madvise_args { 726 void *addr; 727 size_t len; 728 int behav; 729 }; 730 #endif 731 732 /* 733 * MPSAFE 734 */ 735 /* ARGSUSED */ 736 int 737 madvise(td, uap) 738 struct thread *td; 739 struct madvise_args *uap; 740 { 741 vm_offset_t start, end; 742 743 /* 744 * Check for illegal behavior 745 */ 746 if (uap->behav < 0 || uap->behav > MADV_CORE) 747 return (EINVAL); 748 /* 749 * Check for illegal addresses. Watch out for address wrap... Note 750 * that VM_*_ADDRESS are not constants due to casts (argh). 751 */ 752 if (VM_MAXUSER_ADDRESS > 0 && 753 ((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS) 754 return (EINVAL); 755 #ifndef __i386__ 756 if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS) 757 return (EINVAL); 758 #endif 759 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 760 return (EINVAL); 761 762 /* 763 * Since this routine is only advisory, we default to conservative 764 * behavior. 765 */ 766 start = trunc_page((vm_offset_t) uap->addr); 767 end = round_page((vm_offset_t) uap->addr + uap->len); 768 769 if (vm_map_madvise(&td->td_proc->p_vmspace->vm_map, start, end, 770 uap->behav)) 771 return (EINVAL); 772 return (0); 773 } 774 775 #ifndef _SYS_SYSPROTO_H_ 776 struct mincore_args { 777 const void *addr; 778 size_t len; 779 char *vec; 780 }; 781 #endif 782 783 /* 784 * MPSAFE 785 */ 786 /* ARGSUSED */ 787 int 788 mincore(td, uap) 789 struct thread *td; 790 struct mincore_args *uap; 791 { 792 vm_offset_t addr, first_addr; 793 vm_offset_t end, cend; 794 pmap_t pmap; 795 vm_map_t map; 796 char *vec; 797 int error = 0; 798 int vecindex, lastvecindex; 799 vm_map_entry_t current; 800 vm_map_entry_t entry; 801 int mincoreinfo; 802 unsigned int timestamp; 803 804 /* 805 * Make sure that the addresses presented are valid for user 806 * mode. 807 */ 808 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 809 end = addr + (vm_size_t)round_page(uap->len); 810 if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS) 811 return (EINVAL); 812 if (end < addr) 813 return (EINVAL); 814 815 /* 816 * Address of byte vector 817 */ 818 vec = uap->vec; 819 820 mtx_lock(&Giant); 821 map = &td->td_proc->p_vmspace->vm_map; 822 pmap = vmspace_pmap(td->td_proc->p_vmspace); 823 824 vm_map_lock_read(map); 825 RestartScan: 826 timestamp = map->timestamp; 827 828 if (!vm_map_lookup_entry(map, addr, &entry)) 829 entry = entry->next; 830 831 /* 832 * Do this on a map entry basis so that if the pages are not 833 * in the current processes address space, we can easily look 834 * up the pages elsewhere. 835 */ 836 lastvecindex = -1; 837 for (current = entry; 838 (current != &map->header) && (current->start < end); 839 current = current->next) { 840 841 /* 842 * ignore submaps (for now) or null objects 843 */ 844 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 845 current->object.vm_object == NULL) 846 continue; 847 848 /* 849 * limit this scan to the current map entry and the 850 * limits for the mincore call 851 */ 852 if (addr < current->start) 853 addr = current->start; 854 cend = current->end; 855 if (cend > end) 856 cend = end; 857 858 /* 859 * scan this entry one page at a time 860 */ 861 while (addr < cend) { 862 /* 863 * Check pmap first, it is likely faster, also 864 * it can provide info as to whether we are the 865 * one referencing or modifying the page. 866 */ 867 mincoreinfo = pmap_mincore(pmap, addr); 868 if (!mincoreinfo) { 869 vm_pindex_t pindex; 870 vm_ooffset_t offset; 871 vm_page_t m; 872 /* 873 * calculate the page index into the object 874 */ 875 offset = current->offset + (addr - current->start); 876 pindex = OFF_TO_IDX(offset); 877 m = vm_page_lookup(current->object.vm_object, 878 pindex); 879 /* 880 * if the page is resident, then gather information about 881 * it. 882 */ 883 if (m) { 884 mincoreinfo = MINCORE_INCORE; 885 if (m->dirty || 886 pmap_is_modified(m)) 887 mincoreinfo |= MINCORE_MODIFIED_OTHER; 888 if ((m->flags & PG_REFERENCED) || 889 pmap_ts_referenced(m)) { 890 vm_page_flag_set(m, PG_REFERENCED); 891 mincoreinfo |= MINCORE_REFERENCED_OTHER; 892 } 893 } 894 } 895 896 /* 897 * subyte may page fault. In case it needs to modify 898 * the map, we release the lock. 899 */ 900 vm_map_unlock_read(map); 901 902 /* 903 * calculate index into user supplied byte vector 904 */ 905 vecindex = OFF_TO_IDX(addr - first_addr); 906 907 /* 908 * If we have skipped map entries, we need to make sure that 909 * the byte vector is zeroed for those skipped entries. 910 */ 911 while ((lastvecindex + 1) < vecindex) { 912 error = subyte(vec + lastvecindex, 0); 913 if (error) { 914 error = EFAULT; 915 goto done2; 916 } 917 ++lastvecindex; 918 } 919 920 /* 921 * Pass the page information to the user 922 */ 923 error = subyte(vec + vecindex, mincoreinfo); 924 if (error) { 925 error = EFAULT; 926 goto done2; 927 } 928 929 /* 930 * If the map has changed, due to the subyte, the previous 931 * output may be invalid. 932 */ 933 vm_map_lock_read(map); 934 if (timestamp != map->timestamp) 935 goto RestartScan; 936 937 lastvecindex = vecindex; 938 addr += PAGE_SIZE; 939 } 940 } 941 942 /* 943 * subyte may page fault. In case it needs to modify 944 * the map, we release the lock. 945 */ 946 vm_map_unlock_read(map); 947 948 /* 949 * Zero the last entries in the byte vector. 950 */ 951 vecindex = OFF_TO_IDX(end - first_addr); 952 while ((lastvecindex + 1) < vecindex) { 953 error = subyte(vec + lastvecindex, 0); 954 if (error) { 955 error = EFAULT; 956 goto done2; 957 } 958 ++lastvecindex; 959 } 960 961 /* 962 * If the map has changed, due to the subyte, the previous 963 * output may be invalid. 964 */ 965 vm_map_lock_read(map); 966 if (timestamp != map->timestamp) 967 goto RestartScan; 968 vm_map_unlock_read(map); 969 done2: 970 mtx_unlock(&Giant); 971 return (error); 972 } 973 974 #ifndef _SYS_SYSPROTO_H_ 975 struct mlock_args { 976 const void *addr; 977 size_t len; 978 }; 979 #endif 980 /* 981 * MPSAFE 982 */ 983 int 984 mlock(td, uap) 985 struct thread *td; 986 struct mlock_args *uap; 987 { 988 vm_offset_t addr; 989 vm_size_t size, pageoff; 990 int error; 991 992 addr = (vm_offset_t) uap->addr; 993 size = uap->len; 994 995 pageoff = (addr & PAGE_MASK); 996 addr -= pageoff; 997 size += pageoff; 998 size = (vm_size_t) round_page(size); 999 1000 /* disable wrap around */ 1001 if (addr + size < addr) 1002 return (EINVAL); 1003 1004 if (atop(size) + cnt.v_wire_count > vm_page_max_wired) 1005 return (EAGAIN); 1006 1007 #ifdef pmap_wired_count 1008 if (size + ptoa(pmap_wired_count(vm_map_pmap(&td->td_proc->p_vmspace->vm_map))) > 1009 td->td_proc->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 1010 return (ENOMEM); 1011 #else 1012 error = suser(td); 1013 if (error) 1014 return (error); 1015 #endif 1016 1017 error = vm_map_wire(&td->td_proc->p_vmspace->vm_map, addr, 1018 addr + size, TRUE); 1019 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1020 } 1021 1022 #ifndef _SYS_SYSPROTO_H_ 1023 struct mlockall_args { 1024 int how; 1025 }; 1026 #endif 1027 1028 /* 1029 * MPSAFE 1030 */ 1031 int 1032 mlockall(td, uap) 1033 struct thread *td; 1034 struct mlockall_args *uap; 1035 { 1036 /* mtx_lock(&Giant); */ 1037 /* mtx_unlock(&Giant); */ 1038 return 0; 1039 } 1040 1041 #ifndef _SYS_SYSPROTO_H_ 1042 struct munlockall_args { 1043 int how; 1044 }; 1045 #endif 1046 1047 /* 1048 * MPSAFE 1049 */ 1050 int 1051 munlockall(td, uap) 1052 struct thread *td; 1053 struct munlockall_args *uap; 1054 { 1055 /* mtx_lock(&Giant); */ 1056 /* mtx_unlock(&Giant); */ 1057 return 0; 1058 } 1059 1060 #ifndef _SYS_SYSPROTO_H_ 1061 struct munlock_args { 1062 const void *addr; 1063 size_t len; 1064 }; 1065 #endif 1066 /* 1067 * MPSAFE 1068 */ 1069 int 1070 munlock(td, uap) 1071 struct thread *td; 1072 struct munlock_args *uap; 1073 { 1074 vm_offset_t addr; 1075 vm_size_t size, pageoff; 1076 int error; 1077 1078 addr = (vm_offset_t) uap->addr; 1079 size = uap->len; 1080 1081 pageoff = (addr & PAGE_MASK); 1082 addr -= pageoff; 1083 size += pageoff; 1084 size = (vm_size_t) round_page(size); 1085 1086 /* disable wrap around */ 1087 if (addr + size < addr) 1088 return (EINVAL); 1089 1090 #ifndef pmap_wired_count 1091 error = suser(td); 1092 if (error) 1093 return (error); 1094 #endif 1095 1096 error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, addr, 1097 addr + size, TRUE); 1098 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1099 } 1100 1101 /* 1102 * vm_mmap() 1103 * 1104 * MPSAFE 1105 * 1106 * Internal version of mmap. Currently used by mmap, exec, and sys5 1107 * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. 1108 */ 1109 int 1110 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1111 vm_prot_t maxprot, int flags, 1112 void *handle, 1113 vm_ooffset_t foff) 1114 { 1115 boolean_t fitit; 1116 vm_object_t object; 1117 struct vnode *vp = NULL; 1118 objtype_t type; 1119 int rv = KERN_SUCCESS; 1120 vm_ooffset_t objsize; 1121 int docow; 1122 struct thread *td = curthread; 1123 1124 if (size == 0) 1125 return (0); 1126 1127 objsize = size = round_page(size); 1128 1129 if (td->td_proc->p_vmspace->vm_map.size + size > 1130 td->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1131 return(ENOMEM); 1132 } 1133 1134 /* 1135 * We currently can only deal with page aligned file offsets. 1136 * The check is here rather than in the syscall because the 1137 * kernel calls this function internally for other mmaping 1138 * operations (such as in exec) and non-aligned offsets will 1139 * cause pmap inconsistencies...so we want to be sure to 1140 * disallow this in all cases. 1141 */ 1142 if (foff & PAGE_MASK) 1143 return (EINVAL); 1144 1145 if ((flags & MAP_FIXED) == 0) { 1146 fitit = TRUE; 1147 *addr = round_page(*addr); 1148 } else { 1149 if (*addr != trunc_page(*addr)) 1150 return (EINVAL); 1151 fitit = FALSE; 1152 (void) vm_map_remove(map, *addr, *addr + size); 1153 } 1154 1155 /* 1156 * Lookup/allocate object. 1157 */ 1158 if (flags & MAP_ANON) { 1159 type = OBJT_DEFAULT; 1160 /* 1161 * Unnamed anonymous regions always start at 0. 1162 */ 1163 if (handle == 0) 1164 foff = 0; 1165 } else { 1166 vp = (struct vnode *) handle; 1167 mtx_lock(&Giant); 1168 if (vp->v_type == VCHR) { 1169 type = OBJT_DEVICE; 1170 handle = (void *)(intptr_t)vp->v_rdev; 1171 } else { 1172 struct vattr vat; 1173 int error; 1174 1175 error = VOP_GETATTR(vp, &vat, td->td_ucred, td); 1176 if (error) { 1177 mtx_unlock(&Giant); 1178 return (error); 1179 } 1180 objsize = round_page(vat.va_size); 1181 type = OBJT_VNODE; 1182 /* 1183 * if it is a regular file without any references 1184 * we do not need to sync it. 1185 */ 1186 if (vp->v_type == VREG && vat.va_nlink == 0) { 1187 flags |= MAP_NOSYNC; 1188 } 1189 } 1190 mtx_unlock(&Giant); 1191 } 1192 1193 if (handle == NULL) { 1194 object = NULL; 1195 docow = 0; 1196 } else { 1197 object = vm_pager_allocate(type, 1198 handle, objsize, prot, foff); 1199 if (object == NULL) { 1200 return (type == OBJT_DEVICE ? EINVAL : ENOMEM); 1201 } 1202 docow = MAP_PREFAULT_PARTIAL; 1203 } 1204 1205 /* 1206 * Force device mappings to be shared. 1207 */ 1208 if (type == OBJT_DEVICE || type == OBJT_PHYS) { 1209 flags &= ~(MAP_PRIVATE|MAP_COPY); 1210 flags |= MAP_SHARED; 1211 } 1212 1213 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 1214 docow |= MAP_COPY_ON_WRITE; 1215 if (flags & MAP_NOSYNC) 1216 docow |= MAP_DISABLE_SYNCER; 1217 if (flags & MAP_NOCORE) 1218 docow |= MAP_DISABLE_COREDUMP; 1219 1220 #if defined(VM_PROT_READ_IS_EXEC) 1221 if (prot & VM_PROT_READ) 1222 prot |= VM_PROT_EXECUTE; 1223 1224 if (maxprot & VM_PROT_READ) 1225 maxprot |= VM_PROT_EXECUTE; 1226 #endif 1227 1228 if (fitit) 1229 *addr = pmap_addr_hint(object, *addr, size); 1230 1231 if (flags & MAP_STACK) 1232 rv = vm_map_stack (map, *addr, size, prot, 1233 maxprot, docow); 1234 else 1235 rv = vm_map_find(map, object, foff, addr, size, fitit, 1236 prot, maxprot, docow); 1237 1238 if (rv != KERN_SUCCESS) { 1239 /* 1240 * Lose the object reference. Will destroy the 1241 * object if it's an unnamed anonymous mapping 1242 * or named anonymous without other references. 1243 */ 1244 vm_object_deallocate(object); 1245 } else if (flags & MAP_SHARED) { 1246 /* 1247 * Shared memory is also shared with children. 1248 */ 1249 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 1250 if (rv != KERN_SUCCESS) 1251 (void) vm_map_remove(map, *addr, *addr + size); 1252 } 1253 switch (rv) { 1254 case KERN_SUCCESS: 1255 return (0); 1256 case KERN_INVALID_ADDRESS: 1257 case KERN_NO_SPACE: 1258 return (ENOMEM); 1259 case KERN_PROTECTION_FAILURE: 1260 return (EACCES); 1261 default: 1262 return (EINVAL); 1263 } 1264 } 1265