1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35 * 36 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37 */ 38 39 /* 40 * Mapped file (mmap) interface to VM 41 */ 42 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include "opt_compat.h" 47 #include "opt_mac.h" 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/kernel.h> 52 #include <sys/lock.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/filedesc.h> 56 #include <sys/proc.h> 57 #include <sys/resource.h> 58 #include <sys/resourcevar.h> 59 #include <sys/vnode.h> 60 #include <sys/fcntl.h> 61 #include <sys/file.h> 62 #include <sys/mac.h> 63 #include <sys/mman.h> 64 #include <sys/mount.h> 65 #include <sys/conf.h> 66 #include <sys/stat.h> 67 #include <sys/vmmeter.h> 68 #include <sys/sysctl.h> 69 70 #include <vm/vm.h> 71 #include <vm/vm_param.h> 72 #include <vm/pmap.h> 73 #include <vm/vm_map.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_page.h> 76 #include <vm/vm_pager.h> 77 #include <vm/vm_pageout.h> 78 #include <vm/vm_extern.h> 79 #include <vm/vm_page.h> 80 #include <vm/vm_kern.h> 81 82 #ifndef _SYS_SYSPROTO_H_ 83 struct sbrk_args { 84 int incr; 85 }; 86 #endif 87 88 static int max_proc_mmap; 89 SYSCTL_INT(_vm, OID_AUTO, max_proc_mmap, CTLFLAG_RW, &max_proc_mmap, 0, ""); 90 91 /* 92 * Set the maximum number of vm_map_entry structures per process. Roughly 93 * speaking vm_map_entry structures are tiny, so allowing them to eat 1/100 94 * of our KVM malloc space still results in generous limits. We want a 95 * default that is good enough to prevent the kernel running out of resources 96 * if attacked from compromised user account but generous enough such that 97 * multi-threaded processes are not unduly inconvenienced. 98 */ 99 static void vmmapentry_rsrc_init(void *); 100 SYSINIT(vmmersrc, SI_SUB_KVM_RSRC, SI_ORDER_FIRST, vmmapentry_rsrc_init, NULL) 101 102 static void 103 vmmapentry_rsrc_init(dummy) 104 void *dummy; 105 { 106 max_proc_mmap = vm_kmem_size / sizeof(struct vm_map_entry); 107 max_proc_mmap /= 100; 108 } 109 110 static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 111 int *, struct vnode *, vm_ooffset_t, vm_object_t *); 112 113 /* 114 * MPSAFE 115 */ 116 /* ARGSUSED */ 117 int 118 sbrk(td, uap) 119 struct thread *td; 120 struct sbrk_args *uap; 121 { 122 /* Not yet implemented */ 123 return (EOPNOTSUPP); 124 } 125 126 #ifndef _SYS_SYSPROTO_H_ 127 struct sstk_args { 128 int incr; 129 }; 130 #endif 131 132 /* 133 * MPSAFE 134 */ 135 /* ARGSUSED */ 136 int 137 sstk(td, uap) 138 struct thread *td; 139 struct sstk_args *uap; 140 { 141 /* Not yet implemented */ 142 return (EOPNOTSUPP); 143 } 144 145 #if defined(COMPAT_43) 146 #ifndef _SYS_SYSPROTO_H_ 147 struct getpagesize_args { 148 int dummy; 149 }; 150 #endif 151 152 /* ARGSUSED */ 153 int 154 ogetpagesize(td, uap) 155 struct thread *td; 156 struct getpagesize_args *uap; 157 { 158 /* MP SAFE */ 159 td->td_retval[0] = PAGE_SIZE; 160 return (0); 161 } 162 #endif /* COMPAT_43 */ 163 164 165 /* 166 * Memory Map (mmap) system call. Note that the file offset 167 * and address are allowed to be NOT page aligned, though if 168 * the MAP_FIXED flag it set, both must have the same remainder 169 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 170 * page-aligned, the actual mapping starts at trunc_page(addr) 171 * and the return value is adjusted up by the page offset. 172 * 173 * Generally speaking, only character devices which are themselves 174 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 175 * there would be no cache coherency between a descriptor and a VM mapping 176 * both to the same character device. 177 * 178 * Block devices can be mmap'd no matter what they represent. Cache coherency 179 * is maintained as long as you do not write directly to the underlying 180 * character device. 181 */ 182 #ifndef _SYS_SYSPROTO_H_ 183 struct mmap_args { 184 void *addr; 185 size_t len; 186 int prot; 187 int flags; 188 int fd; 189 long pad; 190 off_t pos; 191 }; 192 #endif 193 194 /* 195 * MPSAFE 196 */ 197 int 198 mmap(td, uap) 199 struct thread *td; 200 struct mmap_args *uap; 201 { 202 struct file *fp; 203 struct vnode *vp; 204 vm_offset_t addr; 205 vm_size_t size, pageoff; 206 vm_prot_t prot, maxprot; 207 void *handle; 208 int flags, error; 209 off_t pos; 210 struct vmspace *vms = td->td_proc->p_vmspace; 211 212 addr = (vm_offset_t) uap->addr; 213 size = uap->len; 214 prot = uap->prot & VM_PROT_ALL; 215 flags = uap->flags; 216 pos = uap->pos; 217 218 fp = NULL; 219 /* make sure mapping fits into numeric range etc */ 220 if ((ssize_t) uap->len < 0 || 221 ((flags & MAP_ANON) && uap->fd != -1)) 222 return (EINVAL); 223 224 if (flags & MAP_STACK) { 225 if ((uap->fd != -1) || 226 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 227 return (EINVAL); 228 flags |= MAP_ANON; 229 pos = 0; 230 } 231 232 /* 233 * Align the file position to a page boundary, 234 * and save its page offset component. 235 */ 236 pageoff = (pos & PAGE_MASK); 237 pos -= pageoff; 238 239 /* Adjust size for rounding (on both ends). */ 240 size += pageoff; /* low end... */ 241 size = (vm_size_t) round_page(size); /* hi end */ 242 243 /* 244 * Check for illegal addresses. Watch out for address wrap... Note 245 * that VM_*_ADDRESS are not constants due to casts (argh). 246 */ 247 if (flags & MAP_FIXED) { 248 /* 249 * The specified address must have the same remainder 250 * as the file offset taken modulo PAGE_SIZE, so it 251 * should be aligned after adjustment by pageoff. 252 */ 253 addr -= pageoff; 254 if (addr & PAGE_MASK) 255 return (EINVAL); 256 /* Address range must be all in user VM space. */ 257 if (addr < vm_map_min(&vms->vm_map) || 258 addr + size > vm_map_max(&vms->vm_map)) 259 return (EINVAL); 260 if (addr + size < addr) 261 return (EINVAL); 262 } else { 263 /* 264 * XXX for non-fixed mappings where no hint is provided or 265 * the hint would fall in the potential heap space, 266 * place it after the end of the largest possible heap. 267 * 268 * There should really be a pmap call to determine a reasonable 269 * location. 270 */ 271 PROC_LOCK(td->td_proc); 272 if (addr == 0 || 273 (addr >= round_page((vm_offset_t)vms->vm_taddr) && 274 addr < round_page((vm_offset_t)vms->vm_daddr + 275 lim_max(td->td_proc, RLIMIT_DATA)))) 276 addr = round_page((vm_offset_t)vms->vm_daddr + 277 lim_max(td->td_proc, RLIMIT_DATA)); 278 PROC_UNLOCK(td->td_proc); 279 } 280 if (flags & MAP_ANON) { 281 /* 282 * Mapping blank space is trivial. 283 */ 284 handle = NULL; 285 maxprot = VM_PROT_ALL; 286 pos = 0; 287 } else { 288 /* 289 * Mapping file, get fp for validation. Obtain vnode and make 290 * sure it is of appropriate type. 291 * don't let the descriptor disappear on us if we block 292 */ 293 if ((error = fget(td, uap->fd, &fp)) != 0) 294 goto done; 295 if (fp->f_type != DTYPE_VNODE) { 296 error = EINVAL; 297 goto done; 298 } 299 /* 300 * POSIX shared-memory objects are defined to have 301 * kernel persistence, and are not defined to support 302 * read(2)/write(2) -- or even open(2). Thus, we can 303 * use MAP_ASYNC to trade on-disk coherence for speed. 304 * The shm_open(3) library routine turns on the FPOSIXSHM 305 * flag to request this behavior. 306 */ 307 if (fp->f_flag & FPOSIXSHM) 308 flags |= MAP_NOSYNC; 309 vp = fp->f_vnode; 310 /* 311 * Ensure that file and memory protections are 312 * compatible. Note that we only worry about 313 * writability if mapping is shared; in this case, 314 * current and max prot are dictated by the open file. 315 * XXX use the vnode instead? Problem is: what 316 * credentials do we use for determination? What if 317 * proc does a setuid? 318 */ 319 if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC) 320 maxprot = VM_PROT_NONE; 321 else 322 maxprot = VM_PROT_EXECUTE; 323 if (fp->f_flag & FREAD) { 324 maxprot |= VM_PROT_READ; 325 } else if (prot & PROT_READ) { 326 error = EACCES; 327 goto done; 328 } 329 /* 330 * If we are sharing potential changes (either via 331 * MAP_SHARED or via the implicit sharing of character 332 * device mappings), and we are trying to get write 333 * permission although we opened it without asking 334 * for it, bail out. 335 */ 336 if ((flags & MAP_SHARED) != 0) { 337 if ((fp->f_flag & FWRITE) != 0) { 338 maxprot |= VM_PROT_WRITE; 339 } else if ((prot & PROT_WRITE) != 0) { 340 error = EACCES; 341 goto done; 342 } 343 } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) { 344 maxprot |= VM_PROT_WRITE; 345 } 346 handle = (void *)vp; 347 } 348 349 /* 350 * Do not allow more then a certain number of vm_map_entry structures 351 * per process. Scale with the number of rforks sharing the map 352 * to make the limit reasonable for threads. 353 */ 354 if (max_proc_mmap && 355 vms->vm_map.nentries >= max_proc_mmap * vms->vm_refcnt) { 356 error = ENOMEM; 357 goto done; 358 } 359 360 error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, 361 flags, handle, pos); 362 if (error == 0) 363 td->td_retval[0] = (register_t) (addr + pageoff); 364 done: 365 if (fp) 366 fdrop(fp, td); 367 368 return (error); 369 } 370 371 #ifdef COMPAT_43 372 #ifndef _SYS_SYSPROTO_H_ 373 struct ommap_args { 374 caddr_t addr; 375 int len; 376 int prot; 377 int flags; 378 int fd; 379 long pos; 380 }; 381 #endif 382 int 383 ommap(td, uap) 384 struct thread *td; 385 struct ommap_args *uap; 386 { 387 struct mmap_args nargs; 388 static const char cvtbsdprot[8] = { 389 0, 390 PROT_EXEC, 391 PROT_WRITE, 392 PROT_EXEC | PROT_WRITE, 393 PROT_READ, 394 PROT_EXEC | PROT_READ, 395 PROT_WRITE | PROT_READ, 396 PROT_EXEC | PROT_WRITE | PROT_READ, 397 }; 398 399 #define OMAP_ANON 0x0002 400 #define OMAP_COPY 0x0020 401 #define OMAP_SHARED 0x0010 402 #define OMAP_FIXED 0x0100 403 404 nargs.addr = uap->addr; 405 nargs.len = uap->len; 406 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 407 nargs.flags = 0; 408 if (uap->flags & OMAP_ANON) 409 nargs.flags |= MAP_ANON; 410 if (uap->flags & OMAP_COPY) 411 nargs.flags |= MAP_COPY; 412 if (uap->flags & OMAP_SHARED) 413 nargs.flags |= MAP_SHARED; 414 else 415 nargs.flags |= MAP_PRIVATE; 416 if (uap->flags & OMAP_FIXED) 417 nargs.flags |= MAP_FIXED; 418 nargs.fd = uap->fd; 419 nargs.pos = uap->pos; 420 return (mmap(td, &nargs)); 421 } 422 #endif /* COMPAT_43 */ 423 424 425 #ifndef _SYS_SYSPROTO_H_ 426 struct msync_args { 427 void *addr; 428 int len; 429 int flags; 430 }; 431 #endif 432 /* 433 * MPSAFE 434 */ 435 int 436 msync(td, uap) 437 struct thread *td; 438 struct msync_args *uap; 439 { 440 vm_offset_t addr; 441 vm_size_t size, pageoff; 442 int flags; 443 vm_map_t map; 444 int rv; 445 446 addr = (vm_offset_t) uap->addr; 447 size = uap->len; 448 flags = uap->flags; 449 450 pageoff = (addr & PAGE_MASK); 451 addr -= pageoff; 452 size += pageoff; 453 size = (vm_size_t) round_page(size); 454 if (addr + size < addr) 455 return (EINVAL); 456 457 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 458 return (EINVAL); 459 460 map = &td->td_proc->p_vmspace->vm_map; 461 462 /* 463 * Clean the pages and interpret the return value. 464 */ 465 rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 466 (flags & MS_INVALIDATE) != 0); 467 switch (rv) { 468 case KERN_SUCCESS: 469 return (0); 470 case KERN_INVALID_ADDRESS: 471 return (EINVAL); /* Sun returns ENOMEM? */ 472 case KERN_INVALID_ARGUMENT: 473 return (EBUSY); 474 default: 475 return (EINVAL); 476 } 477 } 478 479 #ifndef _SYS_SYSPROTO_H_ 480 struct munmap_args { 481 void *addr; 482 size_t len; 483 }; 484 #endif 485 /* 486 * MPSAFE 487 */ 488 int 489 munmap(td, uap) 490 struct thread *td; 491 struct munmap_args *uap; 492 { 493 vm_offset_t addr; 494 vm_size_t size, pageoff; 495 vm_map_t map; 496 497 addr = (vm_offset_t) uap->addr; 498 size = uap->len; 499 if (size == 0) 500 return (EINVAL); 501 502 pageoff = (addr & PAGE_MASK); 503 addr -= pageoff; 504 size += pageoff; 505 size = (vm_size_t) round_page(size); 506 if (addr + size < addr) 507 return (EINVAL); 508 509 /* 510 * Check for illegal addresses. Watch out for address wrap... 511 */ 512 map = &td->td_proc->p_vmspace->vm_map; 513 if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 514 return (EINVAL); 515 vm_map_lock(map); 516 /* 517 * Make sure entire range is allocated. 518 */ 519 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) { 520 vm_map_unlock(map); 521 return (EINVAL); 522 } 523 /* returns nothing but KERN_SUCCESS anyway */ 524 vm_map_delete(map, addr, addr + size); 525 vm_map_unlock(map); 526 return (0); 527 } 528 529 #ifndef _SYS_SYSPROTO_H_ 530 struct mprotect_args { 531 const void *addr; 532 size_t len; 533 int prot; 534 }; 535 #endif 536 /* 537 * MPSAFE 538 */ 539 int 540 mprotect(td, uap) 541 struct thread *td; 542 struct mprotect_args *uap; 543 { 544 vm_offset_t addr; 545 vm_size_t size, pageoff; 546 vm_prot_t prot; 547 548 addr = (vm_offset_t) uap->addr; 549 size = uap->len; 550 prot = uap->prot & VM_PROT_ALL; 551 #if defined(VM_PROT_READ_IS_EXEC) 552 if (prot & VM_PROT_READ) 553 prot |= VM_PROT_EXECUTE; 554 #endif 555 556 pageoff = (addr & PAGE_MASK); 557 addr -= pageoff; 558 size += pageoff; 559 size = (vm_size_t) round_page(size); 560 if (addr + size < addr) 561 return (EINVAL); 562 563 switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 564 addr + size, prot, FALSE)) { 565 case KERN_SUCCESS: 566 return (0); 567 case KERN_PROTECTION_FAILURE: 568 return (EACCES); 569 } 570 return (EINVAL); 571 } 572 573 #ifndef _SYS_SYSPROTO_H_ 574 struct minherit_args { 575 void *addr; 576 size_t len; 577 int inherit; 578 }; 579 #endif 580 /* 581 * MPSAFE 582 */ 583 int 584 minherit(td, uap) 585 struct thread *td; 586 struct minherit_args *uap; 587 { 588 vm_offset_t addr; 589 vm_size_t size, pageoff; 590 vm_inherit_t inherit; 591 592 addr = (vm_offset_t)uap->addr; 593 size = uap->len; 594 inherit = uap->inherit; 595 596 pageoff = (addr & PAGE_MASK); 597 addr -= pageoff; 598 size += pageoff; 599 size = (vm_size_t) round_page(size); 600 if (addr + size < addr) 601 return (EINVAL); 602 603 switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 604 addr + size, inherit)) { 605 case KERN_SUCCESS: 606 return (0); 607 case KERN_PROTECTION_FAILURE: 608 return (EACCES); 609 } 610 return (EINVAL); 611 } 612 613 #ifndef _SYS_SYSPROTO_H_ 614 struct madvise_args { 615 void *addr; 616 size_t len; 617 int behav; 618 }; 619 #endif 620 621 /* 622 * MPSAFE 623 */ 624 /* ARGSUSED */ 625 int 626 madvise(td, uap) 627 struct thread *td; 628 struct madvise_args *uap; 629 { 630 vm_offset_t start, end; 631 vm_map_t map; 632 struct proc *p; 633 int error; 634 635 /* 636 * Check for our special case, advising the swap pager we are 637 * "immortal." 638 */ 639 if (uap->behav == MADV_PROTECT) { 640 error = suser(td); 641 if (error == 0) { 642 p = td->td_proc; 643 PROC_LOCK(p); 644 p->p_flag |= P_PROTECTED; 645 PROC_UNLOCK(p); 646 } 647 return (error); 648 } 649 /* 650 * Check for illegal behavior 651 */ 652 if (uap->behav < 0 || uap->behav > MADV_CORE) 653 return (EINVAL); 654 /* 655 * Check for illegal addresses. Watch out for address wrap... Note 656 * that VM_*_ADDRESS are not constants due to casts (argh). 657 */ 658 map = &td->td_proc->p_vmspace->vm_map; 659 if ((vm_offset_t)uap->addr < vm_map_min(map) || 660 (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) 661 return (EINVAL); 662 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 663 return (EINVAL); 664 665 /* 666 * Since this routine is only advisory, we default to conservative 667 * behavior. 668 */ 669 start = trunc_page((vm_offset_t) uap->addr); 670 end = round_page((vm_offset_t) uap->addr + uap->len); 671 672 if (vm_map_madvise(map, start, end, uap->behav)) 673 return (EINVAL); 674 return (0); 675 } 676 677 #ifndef _SYS_SYSPROTO_H_ 678 struct mincore_args { 679 const void *addr; 680 size_t len; 681 char *vec; 682 }; 683 #endif 684 685 /* 686 * MPSAFE 687 */ 688 /* ARGSUSED */ 689 int 690 mincore(td, uap) 691 struct thread *td; 692 struct mincore_args *uap; 693 { 694 vm_offset_t addr, first_addr; 695 vm_offset_t end, cend; 696 pmap_t pmap; 697 vm_map_t map; 698 char *vec; 699 int error = 0; 700 int vecindex, lastvecindex; 701 vm_map_entry_t current; 702 vm_map_entry_t entry; 703 int mincoreinfo; 704 unsigned int timestamp; 705 706 /* 707 * Make sure that the addresses presented are valid for user 708 * mode. 709 */ 710 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 711 end = addr + (vm_size_t)round_page(uap->len); 712 map = &td->td_proc->p_vmspace->vm_map; 713 if (end > vm_map_max(map) || end < addr) 714 return (EINVAL); 715 716 /* 717 * Address of byte vector 718 */ 719 vec = uap->vec; 720 721 pmap = vmspace_pmap(td->td_proc->p_vmspace); 722 723 vm_map_lock_read(map); 724 RestartScan: 725 timestamp = map->timestamp; 726 727 if (!vm_map_lookup_entry(map, addr, &entry)) 728 entry = entry->next; 729 730 /* 731 * Do this on a map entry basis so that if the pages are not 732 * in the current processes address space, we can easily look 733 * up the pages elsewhere. 734 */ 735 lastvecindex = -1; 736 for (current = entry; 737 (current != &map->header) && (current->start < end); 738 current = current->next) { 739 740 /* 741 * ignore submaps (for now) or null objects 742 */ 743 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 744 current->object.vm_object == NULL) 745 continue; 746 747 /* 748 * limit this scan to the current map entry and the 749 * limits for the mincore call 750 */ 751 if (addr < current->start) 752 addr = current->start; 753 cend = current->end; 754 if (cend > end) 755 cend = end; 756 757 /* 758 * scan this entry one page at a time 759 */ 760 while (addr < cend) { 761 /* 762 * Check pmap first, it is likely faster, also 763 * it can provide info as to whether we are the 764 * one referencing or modifying the page. 765 */ 766 mincoreinfo = pmap_mincore(pmap, addr); 767 if (!mincoreinfo) { 768 vm_pindex_t pindex; 769 vm_ooffset_t offset; 770 vm_page_t m; 771 /* 772 * calculate the page index into the object 773 */ 774 offset = current->offset + (addr - current->start); 775 pindex = OFF_TO_IDX(offset); 776 VM_OBJECT_LOCK(current->object.vm_object); 777 m = vm_page_lookup(current->object.vm_object, 778 pindex); 779 /* 780 * if the page is resident, then gather information about 781 * it. 782 */ 783 if (m != NULL && m->valid != 0) { 784 mincoreinfo = MINCORE_INCORE; 785 vm_page_lock_queues(); 786 if (m->dirty || 787 pmap_is_modified(m)) 788 mincoreinfo |= MINCORE_MODIFIED_OTHER; 789 if ((m->flags & PG_REFERENCED) || 790 pmap_ts_referenced(m)) { 791 vm_page_flag_set(m, PG_REFERENCED); 792 mincoreinfo |= MINCORE_REFERENCED_OTHER; 793 } 794 vm_page_unlock_queues(); 795 } 796 VM_OBJECT_UNLOCK(current->object.vm_object); 797 } 798 799 /* 800 * subyte may page fault. In case it needs to modify 801 * the map, we release the lock. 802 */ 803 vm_map_unlock_read(map); 804 805 /* 806 * calculate index into user supplied byte vector 807 */ 808 vecindex = OFF_TO_IDX(addr - first_addr); 809 810 /* 811 * If we have skipped map entries, we need to make sure that 812 * the byte vector is zeroed for those skipped entries. 813 */ 814 while ((lastvecindex + 1) < vecindex) { 815 error = subyte(vec + lastvecindex, 0); 816 if (error) { 817 error = EFAULT; 818 goto done2; 819 } 820 ++lastvecindex; 821 } 822 823 /* 824 * Pass the page information to the user 825 */ 826 error = subyte(vec + vecindex, mincoreinfo); 827 if (error) { 828 error = EFAULT; 829 goto done2; 830 } 831 832 /* 833 * If the map has changed, due to the subyte, the previous 834 * output may be invalid. 835 */ 836 vm_map_lock_read(map); 837 if (timestamp != map->timestamp) 838 goto RestartScan; 839 840 lastvecindex = vecindex; 841 addr += PAGE_SIZE; 842 } 843 } 844 845 /* 846 * subyte may page fault. In case it needs to modify 847 * the map, we release the lock. 848 */ 849 vm_map_unlock_read(map); 850 851 /* 852 * Zero the last entries in the byte vector. 853 */ 854 vecindex = OFF_TO_IDX(end - first_addr); 855 while ((lastvecindex + 1) < vecindex) { 856 error = subyte(vec + lastvecindex, 0); 857 if (error) { 858 error = EFAULT; 859 goto done2; 860 } 861 ++lastvecindex; 862 } 863 864 /* 865 * If the map has changed, due to the subyte, the previous 866 * output may be invalid. 867 */ 868 vm_map_lock_read(map); 869 if (timestamp != map->timestamp) 870 goto RestartScan; 871 vm_map_unlock_read(map); 872 done2: 873 return (error); 874 } 875 876 #ifndef _SYS_SYSPROTO_H_ 877 struct mlock_args { 878 const void *addr; 879 size_t len; 880 }; 881 #endif 882 /* 883 * MPSAFE 884 */ 885 int 886 mlock(td, uap) 887 struct thread *td; 888 struct mlock_args *uap; 889 { 890 struct proc *proc; 891 vm_offset_t addr, end, last, start; 892 vm_size_t npages, size; 893 int error; 894 895 error = suser(td); 896 if (error) 897 return (error); 898 addr = (vm_offset_t)uap->addr; 899 size = uap->len; 900 last = addr + size; 901 start = trunc_page(addr); 902 end = round_page(last); 903 if (last < addr || end < addr) 904 return (EINVAL); 905 npages = atop(end - start); 906 if (npages > vm_page_max_wired) 907 return (ENOMEM); 908 proc = td->td_proc; 909 PROC_LOCK(proc); 910 if (ptoa(npages + 911 pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map))) > 912 lim_cur(proc, RLIMIT_MEMLOCK)) { 913 PROC_UNLOCK(proc); 914 return (ENOMEM); 915 } 916 PROC_UNLOCK(proc); 917 if (npages + cnt.v_wire_count > vm_page_max_wired) 918 return (EAGAIN); 919 error = vm_map_wire(&proc->p_vmspace->vm_map, start, end, 920 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 921 return (error == KERN_SUCCESS ? 0 : ENOMEM); 922 } 923 924 #ifndef _SYS_SYSPROTO_H_ 925 struct mlockall_args { 926 int how; 927 }; 928 #endif 929 930 /* 931 * MPSAFE 932 */ 933 int 934 mlockall(td, uap) 935 struct thread *td; 936 struct mlockall_args *uap; 937 { 938 vm_map_t map; 939 int error; 940 941 map = &td->td_proc->p_vmspace->vm_map; 942 error = 0; 943 944 if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 945 return (EINVAL); 946 947 #if 0 948 /* 949 * If wiring all pages in the process would cause it to exceed 950 * a hard resource limit, return ENOMEM. 951 */ 952 PROC_LOCK(td->td_proc); 953 if (map->size - ptoa(pmap_wired_count(vm_map_pmap(map)) > 954 lim_cur(td->td_proc, RLIMIT_MEMLOCK))) { 955 PROC_UNLOCK(td->td_proc); 956 return (ENOMEM); 957 } 958 PROC_UNLOCK(td->td_proc); 959 #else 960 error = suser(td); 961 if (error) 962 return (error); 963 #endif 964 965 if (uap->how & MCL_FUTURE) { 966 vm_map_lock(map); 967 vm_map_modflags(map, MAP_WIREFUTURE, 0); 968 vm_map_unlock(map); 969 error = 0; 970 } 971 972 if (uap->how & MCL_CURRENT) { 973 /* 974 * P1003.1-2001 mandates that all currently mapped pages 975 * will be memory resident and locked (wired) upon return 976 * from mlockall(). vm_map_wire() will wire pages, by 977 * calling vm_fault_wire() for each page in the region. 978 */ 979 error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 980 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 981 error = (error == KERN_SUCCESS ? 0 : EAGAIN); 982 } 983 984 return (error); 985 } 986 987 #ifndef _SYS_SYSPROTO_H_ 988 struct munlockall_args { 989 register_t dummy; 990 }; 991 #endif 992 993 /* 994 * MPSAFE 995 */ 996 int 997 munlockall(td, uap) 998 struct thread *td; 999 struct munlockall_args *uap; 1000 { 1001 vm_map_t map; 1002 int error; 1003 1004 map = &td->td_proc->p_vmspace->vm_map; 1005 error = suser(td); 1006 if (error) 1007 return (error); 1008 1009 /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1010 vm_map_lock(map); 1011 vm_map_modflags(map, 0, MAP_WIREFUTURE); 1012 vm_map_unlock(map); 1013 1014 /* Forcibly unwire all pages. */ 1015 error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1016 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1017 1018 return (error); 1019 } 1020 1021 #ifndef _SYS_SYSPROTO_H_ 1022 struct munlock_args { 1023 const void *addr; 1024 size_t len; 1025 }; 1026 #endif 1027 /* 1028 * MPSAFE 1029 */ 1030 int 1031 munlock(td, uap) 1032 struct thread *td; 1033 struct munlock_args *uap; 1034 { 1035 vm_offset_t addr, end, last, start; 1036 vm_size_t size; 1037 int error; 1038 1039 error = suser(td); 1040 if (error) 1041 return (error); 1042 addr = (vm_offset_t)uap->addr; 1043 size = uap->len; 1044 last = addr + size; 1045 start = trunc_page(addr); 1046 end = round_page(last); 1047 if (last < addr || end < addr) 1048 return (EINVAL); 1049 error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 1050 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1051 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1052 } 1053 1054 /* 1055 * vm_mmap_vnode() 1056 * 1057 * MPSAFE 1058 * 1059 * Helper function for vm_mmap. Perform sanity check specific for mmap 1060 * operations on vnodes. 1061 */ 1062 int 1063 vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1064 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1065 struct vnode *vp, vm_ooffset_t foff, vm_object_t *objp) 1066 { 1067 struct vattr va; 1068 void *handle; 1069 vm_object_t obj; 1070 int error, flags, type; 1071 1072 mtx_lock(&Giant); 1073 if ((error = vget(vp, LK_EXCLUSIVE, td)) != 0) { 1074 mtx_unlock(&Giant); 1075 return (error); 1076 } 1077 flags = *flagsp; 1078 if (vp->v_type == VREG) { 1079 /* 1080 * Get the proper underlying object 1081 */ 1082 if (VOP_GETVOBJECT(vp, &obj) != 0) { 1083 error = EINVAL; 1084 goto done; 1085 } 1086 if (obj->handle != vp) { 1087 vput(vp); 1088 vp = (struct vnode*)obj->handle; 1089 vget(vp, LK_EXCLUSIVE, td); 1090 } 1091 type = OBJT_VNODE; 1092 handle = vp; 1093 } else if (vp->v_type == VCHR) { 1094 type = OBJT_DEVICE; 1095 handle = vp->v_rdev; 1096 1097 /* XXX: lack thredref on device */ 1098 if(vp->v_rdev->si_devsw->d_flags & D_MMAP_ANON) { 1099 *maxprotp = VM_PROT_ALL; 1100 *flagsp |= MAP_ANON; 1101 error = 0; 1102 goto done; 1103 } 1104 /* 1105 * cdevs does not provide private mappings of any kind. 1106 */ 1107 if ((*maxprotp & VM_PROT_WRITE) == 0 && 1108 (prot & PROT_WRITE) != 0) { 1109 error = EACCES; 1110 goto done; 1111 } 1112 if (flags & (MAP_PRIVATE|MAP_COPY)) { 1113 error = EINVAL; 1114 goto done; 1115 } 1116 /* 1117 * Force device mappings to be shared. 1118 */ 1119 flags |= MAP_SHARED; 1120 } else { 1121 error = EINVAL; 1122 goto done; 1123 } 1124 if ((error = VOP_GETATTR(vp, &va, td->td_ucred, td))) { 1125 goto done; 1126 } 1127 if ((flags & MAP_SHARED) != 0) { 1128 if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1129 if (prot & PROT_WRITE) { 1130 error = EPERM; 1131 goto done; 1132 } 1133 *maxprotp &= ~VM_PROT_WRITE; 1134 } 1135 #ifdef MAC 1136 error = mac_check_vnode_mmap(td->td_ucred, vp, prot); 1137 if (error != 0) 1138 goto done; 1139 #endif 1140 } 1141 /* 1142 * If it is a regular file without any references 1143 * we do not need to sync it. 1144 * Adjust object size to be the size of actual file. 1145 */ 1146 if (vp->v_type == VREG) { 1147 objsize = round_page(va.va_size); 1148 if (va.va_nlink == 0) 1149 flags |= MAP_NOSYNC; 1150 } 1151 obj = vm_pager_allocate(type, handle, objsize, prot, foff); 1152 if (obj == NULL) { 1153 error = (type == OBJT_DEVICE ? EINVAL : ENOMEM); 1154 goto done; 1155 } 1156 *objp = obj; 1157 *flagsp = flags; 1158 done: 1159 vput(vp); 1160 mtx_unlock(&Giant); 1161 return (error); 1162 } 1163 1164 /* 1165 * vm_mmap() 1166 * 1167 * MPSAFE 1168 * 1169 * Internal version of mmap. Currently used by mmap, exec, and sys5 1170 * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. 1171 */ 1172 int 1173 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1174 vm_prot_t maxprot, int flags, 1175 void *handle, 1176 vm_ooffset_t foff) 1177 { 1178 boolean_t fitit; 1179 vm_object_t object; 1180 int rv = KERN_SUCCESS; 1181 vm_ooffset_t objsize; 1182 int docow, error; 1183 struct thread *td = curthread; 1184 1185 if (size == 0) 1186 return (0); 1187 1188 objsize = size = round_page(size); 1189 1190 PROC_LOCK(td->td_proc); 1191 if (td->td_proc->p_vmspace->vm_map.size + size > 1192 lim_cur(td->td_proc, RLIMIT_VMEM)) { 1193 PROC_UNLOCK(td->td_proc); 1194 return(ENOMEM); 1195 } 1196 PROC_UNLOCK(td->td_proc); 1197 1198 /* 1199 * We currently can only deal with page aligned file offsets. 1200 * The check is here rather than in the syscall because the 1201 * kernel calls this function internally for other mmaping 1202 * operations (such as in exec) and non-aligned offsets will 1203 * cause pmap inconsistencies...so we want to be sure to 1204 * disallow this in all cases. 1205 */ 1206 if (foff & PAGE_MASK) 1207 return (EINVAL); 1208 1209 if ((flags & MAP_FIXED) == 0) { 1210 fitit = TRUE; 1211 *addr = round_page(*addr); 1212 } else { 1213 if (*addr != trunc_page(*addr)) 1214 return (EINVAL); 1215 fitit = FALSE; 1216 (void) vm_map_remove(map, *addr, *addr + size); 1217 } 1218 /* 1219 * Lookup/allocate object. 1220 */ 1221 if (handle != NULL) { 1222 error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 1223 handle, foff, &object); 1224 if (error) { 1225 return (error); 1226 } 1227 } 1228 if (flags & MAP_ANON) { 1229 object = NULL; 1230 docow = 0; 1231 /* 1232 * Unnamed anonymous regions always start at 0. 1233 */ 1234 if (handle == 0) 1235 foff = 0; 1236 } else { 1237 docow = MAP_PREFAULT_PARTIAL; 1238 } 1239 1240 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 1241 docow |= MAP_COPY_ON_WRITE; 1242 if (flags & MAP_NOSYNC) 1243 docow |= MAP_DISABLE_SYNCER; 1244 if (flags & MAP_NOCORE) 1245 docow |= MAP_DISABLE_COREDUMP; 1246 1247 #if defined(VM_PROT_READ_IS_EXEC) 1248 if (prot & VM_PROT_READ) 1249 prot |= VM_PROT_EXECUTE; 1250 1251 if (maxprot & VM_PROT_READ) 1252 maxprot |= VM_PROT_EXECUTE; 1253 #endif 1254 1255 if (fitit) 1256 *addr = pmap_addr_hint(object, *addr, size); 1257 1258 if (flags & MAP_STACK) 1259 rv = vm_map_stack(map, *addr, size, prot, maxprot, 1260 docow | MAP_STACK_GROWS_DOWN); 1261 else 1262 rv = vm_map_find(map, object, foff, addr, size, fitit, 1263 prot, maxprot, docow); 1264 1265 if (rv != KERN_SUCCESS) { 1266 /* 1267 * Lose the object reference. Will destroy the 1268 * object if it's an unnamed anonymous mapping 1269 * or named anonymous without other references. 1270 */ 1271 vm_object_deallocate(object); 1272 } else if (flags & MAP_SHARED) { 1273 /* 1274 * Shared memory is also shared with children. 1275 */ 1276 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 1277 if (rv != KERN_SUCCESS) 1278 (void) vm_map_remove(map, *addr, *addr + size); 1279 } 1280 1281 /* 1282 * If the process has requested that all future mappings 1283 * be wired, then heed this. 1284 */ 1285 if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE)) 1286 vm_map_wire(map, *addr, *addr + size, 1287 VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES); 1288 1289 switch (rv) { 1290 case KERN_SUCCESS: 1291 return (0); 1292 case KERN_INVALID_ADDRESS: 1293 case KERN_NO_SPACE: 1294 return (ENOMEM); 1295 case KERN_PROTECTION_FAILURE: 1296 return (EACCES); 1297 default: 1298 return (EINVAL); 1299 } 1300 } 1301