1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * 40 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 41 * $FreeBSD$ 42 */ 43 44 /* 45 * Mapped file (mmap) interface to VM 46 */ 47 48 #include "opt_rlimit.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/sysproto.h> 53 #include <sys/filedesc.h> 54 #include <sys/resourcevar.h> 55 #include <sys/proc.h> 56 #include <sys/vnode.h> 57 #include <sys/file.h> 58 #include <sys/mman.h> 59 #include <sys/conf.h> 60 #include <sys/vmmeter.h> 61 62 #include <miscfs/specfs/specdev.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_prot.h> 67 #include <vm/vm_inherit.h> 68 #include <sys/lock.h> 69 #include <vm/pmap.h> 70 #include <vm/vm_map.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_pager.h> 73 #include <vm/vm_pageout.h> 74 #include <vm/vm_extern.h> 75 #include <vm/vm_kern.h> 76 #include <vm/vm_page.h> 77 78 #ifndef _SYS_SYSPROTO_H_ 79 struct sbrk_args { 80 int incr; 81 }; 82 #endif 83 84 /* ARGSUSED */ 85 int 86 sbrk(p, uap, retval) 87 struct proc *p; 88 struct sbrk_args *uap; 89 int *retval; 90 { 91 92 /* Not yet implemented */ 93 return (EOPNOTSUPP); 94 } 95 96 #ifndef _SYS_SYSPROTO_H_ 97 struct sstk_args { 98 int incr; 99 }; 100 #endif 101 102 /* ARGSUSED */ 103 int 104 sstk(p, uap, retval) 105 struct proc *p; 106 struct sstk_args *uap; 107 int *retval; 108 { 109 110 /* Not yet implemented */ 111 return (EOPNOTSUPP); 112 } 113 114 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 115 #ifndef _SYS_SYSPROTO_H_ 116 struct getpagesize_args { 117 int dummy; 118 }; 119 #endif 120 121 /* ARGSUSED */ 122 int 123 ogetpagesize(p, uap, retval) 124 struct proc *p; 125 struct getpagesize_args *uap; 126 int *retval; 127 { 128 129 *retval = PAGE_SIZE; 130 return (0); 131 } 132 #endif /* COMPAT_43 || COMPAT_SUNOS */ 133 134 #ifndef _SYS_SYSPROTO_H_ 135 struct mmap_args { 136 caddr_t addr; 137 size_t len; 138 int prot; 139 int flags; 140 int fd; 141 long pad; 142 off_t pos; 143 }; 144 #endif 145 146 int 147 mmap(p, uap, retval) 148 struct proc *p; 149 register struct mmap_args *uap; 150 int *retval; 151 { 152 register struct filedesc *fdp = p->p_fd; 153 register struct file *fp; 154 struct vnode *vp; 155 vm_offset_t addr; 156 vm_size_t size, pageoff; 157 vm_prot_t prot, maxprot; 158 caddr_t handle; 159 int flags, error; 160 161 prot = uap->prot & VM_PROT_ALL; 162 flags = uap->flags; 163 /* 164 * Address (if FIXED) must be page aligned. Size is implicitly rounded 165 * to a page boundary. 166 */ 167 addr = (vm_offset_t) uap->addr; 168 if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) || 169 (ssize_t) uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1)) 170 return (EINVAL); 171 172 /* 173 * Round page if not already disallowed by above test 174 * XXX: Is there any point in the MAP_FIXED align requirement above? 175 */ 176 size = uap->len; 177 pageoff = (addr & PAGE_MASK); 178 addr -= pageoff; 179 size += pageoff; 180 size = (vm_size_t) round_page(size); 181 182 /* 183 * Check for illegal addresses. Watch out for address wrap... Note 184 * that VM_*_ADDRESS are not constants due to casts (argh). 185 */ 186 if (flags & MAP_FIXED) { 187 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 188 return (EINVAL); 189 #ifndef i386 190 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 191 return (EINVAL); 192 #endif 193 if (addr + size < addr) 194 return (EINVAL); 195 } 196 /* 197 * XXX if no hint provided for a non-fixed mapping place it after the 198 * end of the largest possible heap. 199 * 200 * There should really be a pmap call to determine a reasonable location. 201 */ 202 if (addr == 0 && (flags & MAP_FIXED) == 0) 203 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ); 204 if (flags & MAP_ANON) { 205 /* 206 * Mapping blank space is trivial. 207 */ 208 handle = NULL; 209 maxprot = VM_PROT_ALL; 210 } else { 211 /* 212 * Mapping file, get fp for validation. Obtain vnode and make 213 * sure it is of appropriate type. 214 */ 215 if (((unsigned) uap->fd) >= fdp->fd_nfiles || 216 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 217 return (EBADF); 218 if (fp->f_type != DTYPE_VNODE) 219 return (EINVAL); 220 vp = (struct vnode *) fp->f_data; 221 if (vp->v_type != VREG && vp->v_type != VCHR) 222 return (EINVAL); 223 /* 224 * XXX hack to handle use of /dev/zero to map anon memory (ala 225 * SunOS). 226 */ 227 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 228 handle = NULL; 229 maxprot = VM_PROT_ALL; 230 flags |= MAP_ANON; 231 } else { 232 /* 233 * Ensure that file and memory protections are 234 * compatible. Note that we only worry about 235 * writability if mapping is shared; in this case, 236 * current and max prot are dictated by the open file. 237 * XXX use the vnode instead? Problem is: what 238 * credentials do we use for determination? What if 239 * proc does a setuid? 240 */ 241 maxprot = VM_PROT_EXECUTE; /* ??? */ 242 if (fp->f_flag & FREAD) 243 maxprot |= VM_PROT_READ; 244 else if (prot & PROT_READ) 245 return (EACCES); 246 if (flags & MAP_SHARED) { 247 if (fp->f_flag & FWRITE) 248 maxprot |= VM_PROT_WRITE; 249 else if (prot & PROT_WRITE) 250 return (EACCES); 251 } else 252 maxprot |= VM_PROT_WRITE; 253 handle = (caddr_t) vp; 254 } 255 } 256 error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 257 flags, handle, uap->pos); 258 if (error == 0) 259 *retval = (int) addr; 260 return (error); 261 } 262 263 #ifdef COMPAT_43 264 #ifndef _SYS_SYSPROTO_H_ 265 struct ommap_args { 266 caddr_t addr; 267 int len; 268 int prot; 269 int flags; 270 int fd; 271 long pos; 272 }; 273 #endif 274 int 275 ommap(p, uap, retval) 276 struct proc *p; 277 register struct ommap_args *uap; 278 int *retval; 279 { 280 struct mmap_args nargs; 281 static const char cvtbsdprot[8] = { 282 0, 283 PROT_EXEC, 284 PROT_WRITE, 285 PROT_EXEC | PROT_WRITE, 286 PROT_READ, 287 PROT_EXEC | PROT_READ, 288 PROT_WRITE | PROT_READ, 289 PROT_EXEC | PROT_WRITE | PROT_READ, 290 }; 291 292 #define OMAP_ANON 0x0002 293 #define OMAP_COPY 0x0020 294 #define OMAP_SHARED 0x0010 295 #define OMAP_FIXED 0x0100 296 #define OMAP_INHERIT 0x0800 297 298 nargs.addr = uap->addr; 299 nargs.len = uap->len; 300 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 301 nargs.flags = 0; 302 if (uap->flags & OMAP_ANON) 303 nargs.flags |= MAP_ANON; 304 if (uap->flags & OMAP_COPY) 305 nargs.flags |= MAP_COPY; 306 if (uap->flags & OMAP_SHARED) 307 nargs.flags |= MAP_SHARED; 308 else 309 nargs.flags |= MAP_PRIVATE; 310 if (uap->flags & OMAP_FIXED) 311 nargs.flags |= MAP_FIXED; 312 if (uap->flags & OMAP_INHERIT) 313 nargs.flags |= MAP_INHERIT; 314 nargs.fd = uap->fd; 315 nargs.pos = uap->pos; 316 return (mmap(p, &nargs, retval)); 317 } 318 #endif /* COMPAT_43 */ 319 320 321 #ifndef _SYS_SYSPROTO_H_ 322 struct msync_args { 323 caddr_t addr; 324 int len; 325 int flags; 326 }; 327 #endif 328 int 329 msync(p, uap, retval) 330 struct proc *p; 331 struct msync_args *uap; 332 int *retval; 333 { 334 vm_offset_t addr; 335 vm_size_t size, pageoff; 336 int flags; 337 vm_map_t map; 338 int rv; 339 340 addr = (vm_offset_t) uap->addr; 341 size = uap->len; 342 flags = uap->flags; 343 344 pageoff = (addr & PAGE_MASK); 345 addr -= pageoff; 346 size += pageoff; 347 size = (vm_size_t) round_page(size); 348 if (addr + size < addr) 349 return(EINVAL); 350 351 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 352 return (EINVAL); 353 354 map = &p->p_vmspace->vm_map; 355 356 /* 357 * XXX Gak! If size is zero we are supposed to sync "all modified 358 * pages with the region containing addr". Unfortunately, we don't 359 * really keep track of individual mmaps so we approximate by flushing 360 * the range of the map entry containing addr. This can be incorrect 361 * if the region splits or is coalesced with a neighbor. 362 */ 363 if (size == 0) { 364 vm_map_entry_t entry; 365 366 vm_map_lock_read(map); 367 rv = vm_map_lookup_entry(map, addr, &entry); 368 vm_map_unlock_read(map); 369 if (rv == FALSE) 370 return (EINVAL); 371 addr = entry->start; 372 size = entry->end - entry->start; 373 } 374 375 /* 376 * Clean the pages and interpret the return value. 377 */ 378 rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, 379 (flags & MS_INVALIDATE) != 0); 380 381 switch (rv) { 382 case KERN_SUCCESS: 383 break; 384 case KERN_INVALID_ADDRESS: 385 return (EINVAL); /* Sun returns ENOMEM? */ 386 case KERN_FAILURE: 387 return (EIO); 388 default: 389 return (EINVAL); 390 } 391 392 return (0); 393 } 394 395 #ifndef _SYS_SYSPROTO_H_ 396 struct munmap_args { 397 caddr_t addr; 398 size_t len; 399 }; 400 #endif 401 int 402 munmap(p, uap, retval) 403 register struct proc *p; 404 register struct munmap_args *uap; 405 int *retval; 406 { 407 vm_offset_t addr; 408 vm_size_t size, pageoff; 409 vm_map_t map; 410 411 addr = (vm_offset_t) uap->addr; 412 size = uap->len; 413 414 pageoff = (addr & PAGE_MASK); 415 addr -= pageoff; 416 size += pageoff; 417 size = (vm_size_t) round_page(size); 418 if (addr + size < addr) 419 return(EINVAL); 420 421 if (size == 0) 422 return (0); 423 424 /* 425 * Check for illegal addresses. Watch out for address wrap... Note 426 * that VM_*_ADDRESS are not constants due to casts (argh). 427 */ 428 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 429 return (EINVAL); 430 #ifndef i386 431 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 432 return (EINVAL); 433 #endif 434 if (addr + size < addr) 435 return (EINVAL); 436 map = &p->p_vmspace->vm_map; 437 /* 438 * Make sure entire range is allocated. 439 */ 440 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 441 return (EINVAL); 442 /* returns nothing but KERN_SUCCESS anyway */ 443 (void) vm_map_remove(map, addr, addr + size); 444 return (0); 445 } 446 447 void 448 munmapfd(p, fd) 449 struct proc *p; 450 int fd; 451 { 452 /* 453 * XXX should unmap any regions mapped to this file 454 */ 455 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; 456 } 457 458 #ifndef _SYS_SYSPROTO_H_ 459 struct mprotect_args { 460 caddr_t addr; 461 size_t len; 462 int prot; 463 }; 464 #endif 465 int 466 mprotect(p, uap, retval) 467 struct proc *p; 468 struct mprotect_args *uap; 469 int *retval; 470 { 471 vm_offset_t addr; 472 vm_size_t size, pageoff; 473 register vm_prot_t prot; 474 475 addr = (vm_offset_t) uap->addr; 476 size = uap->len; 477 prot = uap->prot & VM_PROT_ALL; 478 #if defined(VM_PROT_READ_IS_EXEC) 479 if (prot & VM_PROT_READ) 480 prot |= VM_PROT_EXECUTE; 481 #endif 482 483 pageoff = (addr & PAGE_MASK); 484 addr -= pageoff; 485 size += pageoff; 486 size = (vm_size_t) round_page(size); 487 if (addr + size < addr) 488 return(EINVAL); 489 490 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, 491 FALSE)) { 492 case KERN_SUCCESS: 493 return (0); 494 case KERN_PROTECTION_FAILURE: 495 return (EACCES); 496 } 497 return (EINVAL); 498 } 499 500 #ifndef _SYS_SYSPROTO_H_ 501 struct minherit_args { 502 caddr_t addr; 503 size_t len; 504 int inherit; 505 }; 506 #endif 507 int 508 minherit(p, uap, retval) 509 struct proc *p; 510 struct minherit_args *uap; 511 int *retval; 512 { 513 vm_offset_t addr; 514 vm_size_t size, pageoff; 515 register vm_inherit_t inherit; 516 517 addr = (vm_offset_t)uap->addr; 518 size = uap->len; 519 inherit = uap->inherit; 520 521 pageoff = (addr & PAGE_MASK); 522 addr -= pageoff; 523 size += pageoff; 524 size = (vm_size_t) round_page(size); 525 if (addr + size < addr) 526 return(EINVAL); 527 528 switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, 529 inherit)) { 530 case KERN_SUCCESS: 531 return (0); 532 case KERN_PROTECTION_FAILURE: 533 return (EACCES); 534 } 535 return (EINVAL); 536 } 537 538 #ifndef _SYS_SYSPROTO_H_ 539 struct madvise_args { 540 caddr_t addr; 541 size_t len; 542 int behav; 543 }; 544 #endif 545 546 /* ARGSUSED */ 547 int 548 madvise(p, uap, retval) 549 struct proc *p; 550 struct madvise_args *uap; 551 int *retval; 552 { 553 vm_map_t map; 554 pmap_t pmap; 555 vm_offset_t start, end; 556 /* 557 * Check for illegal addresses. Watch out for address wrap... Note 558 * that VM_*_ADDRESS are not constants due to casts (argh). 559 */ 560 if (VM_MAXUSER_ADDRESS > 0 && 561 ((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS) 562 return (EINVAL); 563 #ifndef i386 564 if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS) 565 return (EINVAL); 566 #endif 567 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 568 return (EINVAL); 569 570 /* 571 * Since this routine is only advisory, we default to conservative 572 * behavior. 573 */ 574 start = trunc_page((vm_offset_t) uap->addr); 575 end = round_page((vm_offset_t) uap->addr + uap->len); 576 577 map = &p->p_vmspace->vm_map; 578 pmap = &p->p_vmspace->vm_pmap; 579 580 vm_map_madvise(map, pmap, start, end, uap->behav); 581 582 return (0); 583 } 584 585 #ifndef _SYS_SYSPROTO_H_ 586 struct mincore_args { 587 caddr_t addr; 588 size_t len; 589 char *vec; 590 }; 591 #endif 592 593 /* ARGSUSED */ 594 int 595 mincore(p, uap, retval) 596 struct proc *p; 597 struct mincore_args *uap; 598 int *retval; 599 { 600 vm_offset_t addr, first_addr; 601 vm_offset_t end, cend; 602 pmap_t pmap; 603 vm_map_t map; 604 char *vec; 605 int error; 606 int vecindex, lastvecindex; 607 register vm_map_entry_t current; 608 vm_map_entry_t entry; 609 int mincoreinfo; 610 611 /* 612 * Make sure that the addresses presented are valid for user 613 * mode. 614 */ 615 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 616 end = addr + (vm_size_t)round_page(uap->len); 617 if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS) 618 return (EINVAL); 619 if (end < addr) 620 return (EINVAL); 621 622 /* 623 * Address of byte vector 624 */ 625 vec = uap->vec; 626 627 map = &p->p_vmspace->vm_map; 628 pmap = &p->p_vmspace->vm_pmap; 629 630 vm_map_lock(map); 631 632 /* 633 * Not needed here 634 */ 635 #if 0 636 VM_MAP_RANGE_CHECK(map, addr, end); 637 #endif 638 639 if (!vm_map_lookup_entry(map, addr, &entry)) 640 entry = entry->next; 641 642 /* 643 * Do this on a map entry basis so that if the pages are not 644 * in the current processes address space, we can easily look 645 * up the pages elsewhere. 646 */ 647 lastvecindex = -1; 648 for(current = entry; 649 (current != &map->header) && (current->start < end); 650 current = current->next) { 651 652 /* 653 * ignore submaps (for now) or null objects 654 */ 655 if ((current->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) || 656 current->object.vm_object == NULL) 657 continue; 658 659 /* 660 * limit this scan to the current map entry and the 661 * limits for the mincore call 662 */ 663 if (addr < current->start) 664 addr = current->start; 665 cend = current->end; 666 if (cend > end) 667 cend = end; 668 669 /* 670 * scan this entry one page at a time 671 */ 672 while(addr < cend) { 673 /* 674 * Check pmap first, it is likely faster, also 675 * it can provide info as to whether we are the 676 * one referencing or modifying the page. 677 */ 678 mincoreinfo = pmap_mincore(pmap, addr); 679 if (!mincoreinfo) { 680 vm_pindex_t pindex; 681 vm_ooffset_t offset; 682 vm_page_t m; 683 /* 684 * calculate the page index into the object 685 */ 686 offset = current->offset + (addr - current->start); 687 pindex = OFF_TO_IDX(offset); 688 m = vm_page_lookup(current->object.vm_object, 689 pindex); 690 /* 691 * if the page is resident, then gather information about 692 * it. 693 */ 694 if (m) { 695 mincoreinfo = MINCORE_INCORE; 696 if (m->dirty || 697 pmap_is_modified(VM_PAGE_TO_PHYS(m))) 698 mincoreinfo |= MINCORE_MODIFIED_OTHER; 699 if ((m->flags & PG_REFERENCED) || 700 pmap_ts_referenced(VM_PAGE_TO_PHYS(m))) { 701 m->flags |= PG_REFERENCED; 702 mincoreinfo |= MINCORE_REFERENCED_OTHER; 703 } 704 } 705 } 706 707 /* 708 * calculate index into user supplied byte vector 709 */ 710 vecindex = OFF_TO_IDX(addr - first_addr); 711 712 /* 713 * If we have skipped map entries, we need to make sure that 714 * the byte vector is zeroed for those skipped entries. 715 */ 716 while((lastvecindex + 1) < vecindex) { 717 error = subyte( vec + lastvecindex, 0); 718 if (error) { 719 vm_map_unlock(map); 720 return (EFAULT); 721 } 722 ++lastvecindex; 723 } 724 725 /* 726 * Pass the page information to the user 727 */ 728 error = subyte( vec + vecindex, mincoreinfo); 729 if (error) { 730 vm_map_unlock(map); 731 return (EFAULT); 732 } 733 lastvecindex = vecindex; 734 addr += PAGE_SIZE; 735 } 736 } 737 738 /* 739 * Zero the last entries in the byte vector. 740 */ 741 vecindex = OFF_TO_IDX(end - first_addr); 742 while((lastvecindex + 1) < vecindex) { 743 error = subyte( vec + lastvecindex, 0); 744 if (error) { 745 vm_map_unlock(map); 746 return (EFAULT); 747 } 748 ++lastvecindex; 749 } 750 751 vm_map_unlock(map); 752 return (0); 753 } 754 755 #ifndef _SYS_SYSPROTO_H_ 756 struct mlock_args { 757 caddr_t addr; 758 size_t len; 759 }; 760 #endif 761 int 762 mlock(p, uap, retval) 763 struct proc *p; 764 struct mlock_args *uap; 765 int *retval; 766 { 767 vm_offset_t addr; 768 vm_size_t size, pageoff; 769 int error; 770 771 addr = (vm_offset_t) uap->addr; 772 size = uap->len; 773 774 pageoff = (addr & PAGE_MASK); 775 addr -= pageoff; 776 size += pageoff; 777 size = (vm_size_t) round_page(size); 778 779 /* disable wrap around */ 780 if (addr + size < addr) 781 return (EINVAL); 782 783 if (atop(size) + cnt.v_wire_count > vm_page_max_wired) 784 return (EAGAIN); 785 786 #ifdef pmap_wired_count 787 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 788 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 789 return (EAGAIN); 790 #else 791 error = suser(p->p_ucred, &p->p_acflag); 792 if (error) 793 return (error); 794 #endif 795 796 error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE); 797 return (error == KERN_SUCCESS ? 0 : ENOMEM); 798 } 799 800 #ifndef _SYS_SYSPROTO_H_ 801 struct munlock_args { 802 caddr_t addr; 803 size_t len; 804 }; 805 #endif 806 int 807 munlock(p, uap, retval) 808 struct proc *p; 809 struct munlock_args *uap; 810 int *retval; 811 { 812 vm_offset_t addr; 813 vm_size_t size, pageoff; 814 int error; 815 816 addr = (vm_offset_t) uap->addr; 817 size = uap->len; 818 819 pageoff = (addr & PAGE_MASK); 820 addr -= pageoff; 821 size += pageoff; 822 size = (vm_size_t) round_page(size); 823 824 /* disable wrap around */ 825 if (addr + size < addr) 826 return (EINVAL); 827 828 #ifndef pmap_wired_count 829 error = suser(p->p_ucred, &p->p_acflag); 830 if (error) 831 return (error); 832 #endif 833 834 error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE); 835 return (error == KERN_SUCCESS ? 0 : ENOMEM); 836 } 837 838 /* 839 * Internal version of mmap. 840 * Currently used by mmap, exec, and sys5 shared memory. 841 * Handle is either a vnode pointer or NULL for MAP_ANON. 842 */ 843 int 844 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) 845 register vm_map_t map; 846 register vm_offset_t *addr; 847 register vm_size_t size; 848 vm_prot_t prot, maxprot; 849 register int flags; 850 caddr_t handle; /* XXX should be vp */ 851 vm_ooffset_t foff; 852 { 853 boolean_t fitit; 854 vm_object_t object; 855 struct vnode *vp = NULL; 856 objtype_t type; 857 int rv = KERN_SUCCESS; 858 vm_ooffset_t objsize; 859 int docow; 860 struct proc *p = curproc; 861 862 if (size == 0) 863 return (0); 864 865 objsize = size = round_page(size); 866 867 /* 868 * We currently can only deal with page aligned file offsets. 869 * The check is here rather than in the syscall because the 870 * kernel calls this function internally for other mmaping 871 * operations (such as in exec) and non-aligned offsets will 872 * cause pmap inconsistencies...so we want to be sure to 873 * disallow this in all cases. 874 */ 875 if (foff & PAGE_MASK) 876 return (EINVAL); 877 878 if ((flags & MAP_FIXED) == 0) { 879 fitit = TRUE; 880 *addr = round_page(*addr); 881 } else { 882 if (*addr != trunc_page(*addr)) 883 return (EINVAL); 884 fitit = FALSE; 885 (void) vm_map_remove(map, *addr, *addr + size); 886 } 887 888 /* 889 * Lookup/allocate object. 890 */ 891 if (flags & MAP_ANON) { 892 type = OBJT_DEFAULT; 893 /* 894 * Unnamed anonymous regions always start at 0. 895 */ 896 if (handle == 0) 897 foff = 0; 898 } else { 899 vp = (struct vnode *) handle; 900 if (vp->v_type == VCHR) { 901 type = OBJT_DEVICE; 902 handle = (caddr_t) vp->v_rdev; 903 } else { 904 struct vattr vat; 905 int error; 906 907 error = VOP_GETATTR(vp, &vat, p->p_ucred, p); 908 if (error) 909 return (error); 910 objsize = round_page(vat.va_size); 911 type = OBJT_VNODE; 912 } 913 } 914 915 if (handle == NULL) { 916 object = NULL; 917 } else { 918 object = vm_pager_allocate(type, handle, OFF_TO_IDX(objsize), prot, foff); 919 if (object == NULL) 920 return (type == OBJT_DEVICE ? EINVAL : ENOMEM); 921 } 922 923 /* 924 * Force device mappings to be shared. 925 */ 926 if (type == OBJT_DEVICE) { 927 flags &= ~(MAP_PRIVATE|MAP_COPY); 928 flags |= MAP_SHARED; 929 } 930 931 docow = 0; 932 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { 933 docow = MAP_COPY_ON_WRITE | MAP_COPY_NEEDED; 934 } 935 936 #if defined(VM_PROT_READ_IS_EXEC) 937 if (prot & VM_PROT_READ) 938 prot |= VM_PROT_EXECUTE; 939 940 if (maxprot & VM_PROT_READ) 941 maxprot |= VM_PROT_EXECUTE; 942 #endif 943 944 rv = vm_map_find(map, object, foff, addr, size, fitit, 945 prot, maxprot, docow); 946 947 948 if (rv != KERN_SUCCESS) { 949 /* 950 * Lose the object reference. Will destroy the 951 * object if it's an unnamed anonymous mapping 952 * or named anonymous without other references. 953 */ 954 vm_object_deallocate(object); 955 goto out; 956 } 957 958 /* 959 * "Pre-fault" resident pages. 960 */ 961 if ((type == OBJT_VNODE) && (map->pmap != NULL) && (object != NULL)) { 962 pmap_object_init_pt(map->pmap, *addr, 963 object, (vm_pindex_t) OFF_TO_IDX(foff), size, 1); 964 } 965 966 /* 967 * Shared memory is also shared with children. 968 */ 969 if (flags & (MAP_SHARED|MAP_INHERIT)) { 970 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 971 if (rv != KERN_SUCCESS) { 972 (void) vm_map_remove(map, *addr, *addr + size); 973 goto out; 974 } 975 } 976 out: 977 switch (rv) { 978 case KERN_SUCCESS: 979 return (0); 980 case KERN_INVALID_ADDRESS: 981 case KERN_NO_SPACE: 982 return (ENOMEM); 983 case KERN_PROTECTION_FAILURE: 984 return (EACCES); 985 default: 986 return (EINVAL); 987 } 988 } 989