1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * 40 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 41 * $Id: vm_mmap.c,v 1.56 1996/12/28 22:40:44 dyson Exp $ 42 */ 43 44 /* 45 * Mapped file (mmap) interface to VM 46 */ 47 48 #include "opt_rlimit.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/sysproto.h> 53 #include <sys/filedesc.h> 54 #include <sys/resourcevar.h> 55 #include <sys/proc.h> 56 #include <sys/vnode.h> 57 #include <sys/file.h> 58 #include <sys/mman.h> 59 #include <sys/conf.h> 60 #include <sys/vmmeter.h> 61 62 #include <miscfs/specfs/specdev.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_prot.h> 67 #include <vm/vm_inherit.h> 68 #include <vm/lock.h> 69 #include <vm/pmap.h> 70 #include <vm/vm_map.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_pager.h> 73 #include <vm/vm_pageout.h> 74 #include <vm/vm_extern.h> 75 #include <vm/vm_kern.h> 76 #include <vm/vm_page.h> 77 78 #ifndef _SYS_SYSPROTO_H_ 79 struct sbrk_args { 80 int incr; 81 }; 82 #endif 83 84 /* ARGSUSED */ 85 int 86 sbrk(p, uap, retval) 87 struct proc *p; 88 struct sbrk_args *uap; 89 int *retval; 90 { 91 92 /* Not yet implemented */ 93 return (EOPNOTSUPP); 94 } 95 96 #ifndef _SYS_SYSPROTO_H_ 97 struct sstk_args { 98 int incr; 99 }; 100 #endif 101 102 /* ARGSUSED */ 103 int 104 sstk(p, uap, retval) 105 struct proc *p; 106 struct sstk_args *uap; 107 int *retval; 108 { 109 110 /* Not yet implemented */ 111 return (EOPNOTSUPP); 112 } 113 114 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 115 #ifndef _SYS_SYSPROTO_H_ 116 struct getpagesize_args { 117 int dummy; 118 }; 119 #endif 120 121 /* ARGSUSED */ 122 int 123 ogetpagesize(p, uap, retval) 124 struct proc *p; 125 struct getpagesize_args *uap; 126 int *retval; 127 { 128 129 *retval = PAGE_SIZE; 130 return (0); 131 } 132 #endif /* COMPAT_43 || COMPAT_SUNOS */ 133 134 #ifndef _SYS_SYSPROTO_H_ 135 struct mmap_args { 136 caddr_t addr; 137 size_t len; 138 int prot; 139 int flags; 140 int fd; 141 long pad; 142 off_t pos; 143 }; 144 #endif 145 146 int 147 mmap(p, uap, retval) 148 struct proc *p; 149 register struct mmap_args *uap; 150 int *retval; 151 { 152 register struct filedesc *fdp = p->p_fd; 153 register struct file *fp; 154 struct vnode *vp; 155 vm_offset_t addr; 156 vm_size_t size, pageoff; 157 vm_prot_t prot, maxprot; 158 caddr_t handle; 159 int flags, error; 160 161 prot = uap->prot & VM_PROT_ALL; 162 flags = uap->flags; 163 /* 164 * Address (if FIXED) must be page aligned. Size is implicitly rounded 165 * to a page boundary. 166 */ 167 addr = (vm_offset_t) uap->addr; 168 if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) || 169 (ssize_t) uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1)) 170 return (EINVAL); 171 172 /* 173 * Round page if not already disallowed by above test 174 * XXX: Is there any point in the MAP_FIXED align requirement above? 175 */ 176 size = uap->len; 177 pageoff = (addr & PAGE_MASK); 178 addr -= pageoff; 179 size += pageoff; 180 size = (vm_size_t) round_page(size); 181 182 /* 183 * Check for illegal addresses. Watch out for address wrap... Note 184 * that VM_*_ADDRESS are not constants due to casts (argh). 185 */ 186 if (flags & MAP_FIXED) { 187 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 188 return (EINVAL); 189 #ifndef i386 190 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 191 return (EINVAL); 192 #endif 193 if (addr + size < addr) 194 return (EINVAL); 195 } 196 /* 197 * XXX if no hint provided for a non-fixed mapping place it after the 198 * end of the largest possible heap. 199 * 200 * There should really be a pmap call to determine a reasonable location. 201 */ 202 if (addr == 0 && (flags & MAP_FIXED) == 0) 203 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ); 204 if (flags & MAP_ANON) { 205 /* 206 * Mapping blank space is trivial. 207 */ 208 handle = NULL; 209 maxprot = VM_PROT_ALL; 210 } else { 211 /* 212 * Mapping file, get fp for validation. Obtain vnode and make 213 * sure it is of appropriate type. 214 */ 215 if (((unsigned) uap->fd) >= fdp->fd_nfiles || 216 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 217 return (EBADF); 218 if (fp->f_type != DTYPE_VNODE) 219 return (EINVAL); 220 vp = (struct vnode *) fp->f_data; 221 if (vp->v_type != VREG && vp->v_type != VCHR) 222 return (EINVAL); 223 /* 224 * XXX hack to handle use of /dev/zero to map anon memory (ala 225 * SunOS). 226 */ 227 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 228 handle = NULL; 229 maxprot = VM_PROT_ALL; 230 flags |= MAP_ANON; 231 } else { 232 /* 233 * Ensure that file and memory protections are 234 * compatible. Note that we only worry about 235 * writability if mapping is shared; in this case, 236 * current and max prot are dictated by the open file. 237 * XXX use the vnode instead? Problem is: what 238 * credentials do we use for determination? What if 239 * proc does a setuid? 240 */ 241 maxprot = VM_PROT_EXECUTE; /* ??? */ 242 if (fp->f_flag & FREAD) 243 maxprot |= VM_PROT_READ; 244 else if (prot & PROT_READ) 245 return (EACCES); 246 if (flags & MAP_SHARED) { 247 if (fp->f_flag & FWRITE) 248 maxprot |= VM_PROT_WRITE; 249 else if (prot & PROT_WRITE) 250 return (EACCES); 251 } else 252 maxprot |= VM_PROT_WRITE; 253 handle = (caddr_t) vp; 254 } 255 } 256 error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 257 flags, handle, uap->pos); 258 if (error == 0) 259 *retval = (int) addr; 260 return (error); 261 } 262 263 #ifdef COMPAT_43 264 #ifndef _SYS_SYSPROTO_H_ 265 struct ommap_args { 266 caddr_t addr; 267 int len; 268 int prot; 269 int flags; 270 int fd; 271 long pos; 272 }; 273 #endif 274 int 275 ommap(p, uap, retval) 276 struct proc *p; 277 register struct ommap_args *uap; 278 int *retval; 279 { 280 struct mmap_args nargs; 281 static const char cvtbsdprot[8] = { 282 0, 283 PROT_EXEC, 284 PROT_WRITE, 285 PROT_EXEC | PROT_WRITE, 286 PROT_READ, 287 PROT_EXEC | PROT_READ, 288 PROT_WRITE | PROT_READ, 289 PROT_EXEC | PROT_WRITE | PROT_READ, 290 }; 291 292 #define OMAP_ANON 0x0002 293 #define OMAP_COPY 0x0020 294 #define OMAP_SHARED 0x0010 295 #define OMAP_FIXED 0x0100 296 #define OMAP_INHERIT 0x0800 297 298 nargs.addr = uap->addr; 299 nargs.len = uap->len; 300 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 301 nargs.flags = 0; 302 if (uap->flags & OMAP_ANON) 303 nargs.flags |= MAP_ANON; 304 if (uap->flags & OMAP_COPY) 305 nargs.flags |= MAP_COPY; 306 if (uap->flags & OMAP_SHARED) 307 nargs.flags |= MAP_SHARED; 308 else 309 nargs.flags |= MAP_PRIVATE; 310 if (uap->flags & OMAP_FIXED) 311 nargs.flags |= MAP_FIXED; 312 if (uap->flags & OMAP_INHERIT) 313 nargs.flags |= MAP_INHERIT; 314 nargs.fd = uap->fd; 315 nargs.pos = uap->pos; 316 return (mmap(p, &nargs, retval)); 317 } 318 #endif /* COMPAT_43 */ 319 320 321 #ifndef _SYS_SYSPROTO_H_ 322 struct msync_args { 323 caddr_t addr; 324 int len; 325 int flags; 326 }; 327 #endif 328 int 329 msync(p, uap, retval) 330 struct proc *p; 331 struct msync_args *uap; 332 int *retval; 333 { 334 vm_offset_t addr; 335 vm_size_t size, pageoff; 336 int flags; 337 vm_map_t map; 338 int rv; 339 340 addr = (vm_offset_t) uap->addr; 341 size = uap->len; 342 flags = uap->flags; 343 344 pageoff = (addr & PAGE_MASK); 345 addr -= pageoff; 346 size += pageoff; 347 size = (vm_size_t) round_page(size); 348 if (addr + size < addr) 349 return(EINVAL); 350 351 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 352 return (EINVAL); 353 354 map = &p->p_vmspace->vm_map; 355 356 /* 357 * XXX Gak! If size is zero we are supposed to sync "all modified 358 * pages with the region containing addr". Unfortunately, we don't 359 * really keep track of individual mmaps so we approximate by flushing 360 * the range of the map entry containing addr. This can be incorrect 361 * if the region splits or is coalesced with a neighbor. 362 */ 363 if (size == 0) { 364 vm_map_entry_t entry; 365 366 vm_map_lock_read(map); 367 rv = vm_map_lookup_entry(map, addr, &entry); 368 vm_map_unlock_read(map); 369 if (rv == FALSE) 370 return (EINVAL); 371 addr = entry->start; 372 size = entry->end - entry->start; 373 } 374 375 /* 376 * Clean the pages and interpret the return value. 377 */ 378 rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, 379 (flags & MS_INVALIDATE) != 0); 380 381 switch (rv) { 382 case KERN_SUCCESS: 383 break; 384 case KERN_INVALID_ADDRESS: 385 return (EINVAL); /* Sun returns ENOMEM? */ 386 case KERN_FAILURE: 387 return (EIO); 388 default: 389 return (EINVAL); 390 } 391 392 return (0); 393 } 394 395 #ifndef _SYS_SYSPROTO_H_ 396 struct munmap_args { 397 caddr_t addr; 398 size_t len; 399 }; 400 #endif 401 int 402 munmap(p, uap, retval) 403 register struct proc *p; 404 register struct munmap_args *uap; 405 int *retval; 406 { 407 vm_offset_t addr; 408 vm_size_t size, pageoff; 409 vm_map_t map; 410 411 addr = (vm_offset_t) uap->addr; 412 size = uap->len; 413 414 pageoff = (addr & PAGE_MASK); 415 addr -= pageoff; 416 size += pageoff; 417 size = (vm_size_t) round_page(size); 418 if (addr + size < addr) 419 return(EINVAL); 420 421 if (size == 0) 422 return (0); 423 424 /* 425 * Check for illegal addresses. Watch out for address wrap... Note 426 * that VM_*_ADDRESS are not constants due to casts (argh). 427 */ 428 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 429 return (EINVAL); 430 #ifndef i386 431 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 432 return (EINVAL); 433 #endif 434 if (addr + size < addr) 435 return (EINVAL); 436 map = &p->p_vmspace->vm_map; 437 /* 438 * Make sure entire range is allocated. 439 */ 440 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 441 return (EINVAL); 442 /* returns nothing but KERN_SUCCESS anyway */ 443 (void) vm_map_remove(map, addr, addr + size); 444 return (0); 445 } 446 447 void 448 munmapfd(p, fd) 449 struct proc *p; 450 int fd; 451 { 452 /* 453 * XXX should unmap any regions mapped to this file 454 */ 455 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; 456 } 457 458 #ifndef _SYS_SYSPROTO_H_ 459 struct mprotect_args { 460 caddr_t addr; 461 size_t len; 462 int prot; 463 }; 464 #endif 465 int 466 mprotect(p, uap, retval) 467 struct proc *p; 468 struct mprotect_args *uap; 469 int *retval; 470 { 471 vm_offset_t addr; 472 vm_size_t size, pageoff; 473 register vm_prot_t prot; 474 475 addr = (vm_offset_t) uap->addr; 476 size = uap->len; 477 prot = uap->prot & VM_PROT_ALL; 478 #if defined(VM_PROT_READ_IS_EXEC) 479 if (prot & VM_PROT_READ) 480 prot |= VM_PROT_EXECUTE; 481 #endif 482 483 pageoff = (addr & PAGE_MASK); 484 addr -= pageoff; 485 size += pageoff; 486 size = (vm_size_t) round_page(size); 487 if (addr + size < addr) 488 return(EINVAL); 489 490 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, 491 FALSE)) { 492 case KERN_SUCCESS: 493 return (0); 494 case KERN_PROTECTION_FAILURE: 495 return (EACCES); 496 } 497 return (EINVAL); 498 } 499 500 #ifndef _SYS_SYSPROTO_H_ 501 struct minherit_args { 502 caddr_t addr; 503 size_t len; 504 int inherit; 505 }; 506 #endif 507 int 508 minherit(p, uap, retval) 509 struct proc *p; 510 struct minherit_args *uap; 511 int *retval; 512 { 513 vm_offset_t addr; 514 vm_size_t size, pageoff; 515 register vm_inherit_t inherit; 516 517 addr = (vm_offset_t)uap->addr; 518 size = uap->len; 519 inherit = uap->inherit; 520 521 pageoff = (addr & PAGE_MASK); 522 addr -= pageoff; 523 size += pageoff; 524 size = (vm_size_t) round_page(size); 525 if (addr + size < addr) 526 return(EINVAL); 527 528 switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, 529 inherit)) { 530 case KERN_SUCCESS: 531 return (0); 532 case KERN_PROTECTION_FAILURE: 533 return (EACCES); 534 } 535 return (EINVAL); 536 } 537 538 #ifndef _SYS_SYSPROTO_H_ 539 struct madvise_args { 540 caddr_t addr; 541 size_t len; 542 int behav; 543 }; 544 #endif 545 546 /* ARGSUSED */ 547 int 548 madvise(p, uap, retval) 549 struct proc *p; 550 struct madvise_args *uap; 551 int *retval; 552 { 553 vm_map_t map; 554 pmap_t pmap; 555 vm_offset_t start, end; 556 /* 557 * Check for illegal addresses. Watch out for address wrap... Note 558 * that VM_*_ADDRESS are not constants due to casts (argh). 559 */ 560 if (VM_MAXUSER_ADDRESS > 0 && 561 ((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS) 562 return (EINVAL); 563 #ifndef i386 564 if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS) 565 return (EINVAL); 566 #endif 567 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 568 return (EINVAL); 569 570 /* 571 * Since this routine is only advisory, we default to conservative 572 * behavior. 573 */ 574 start = trunc_page((vm_offset_t) uap->addr); 575 end = round_page((vm_offset_t) uap->addr + uap->len); 576 577 map = &p->p_vmspace->vm_map; 578 pmap = &p->p_vmspace->vm_pmap; 579 580 vm_map_madvise(map, pmap, start, end, uap->behav); 581 582 return (0); 583 } 584 585 #ifndef _SYS_SYSPROTO_H_ 586 struct mincore_args { 587 caddr_t addr; 588 size_t len; 589 char *vec; 590 }; 591 #endif 592 593 /* ARGSUSED */ 594 int 595 mincore(p, uap, retval) 596 struct proc *p; 597 struct mincore_args *uap; 598 int *retval; 599 { 600 vm_offset_t addr, first_addr; 601 vm_offset_t end, cend; 602 pmap_t pmap; 603 vm_map_t map; 604 char *vec; 605 int error; 606 int vecindex, lastvecindex; 607 register vm_map_entry_t current; 608 vm_map_entry_t entry; 609 int mincoreinfo; 610 611 /* 612 * Make sure that the addresses presented are valid for user 613 * mode. 614 */ 615 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 616 end = addr + (vm_size_t)round_page(uap->len); 617 if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS) 618 return (EINVAL); 619 if (end < addr) 620 return (EINVAL); 621 622 /* 623 * Address of byte vector 624 */ 625 vec = uap->vec; 626 627 map = &p->p_vmspace->vm_map; 628 pmap = &p->p_vmspace->vm_pmap; 629 630 vm_map_lock(map); 631 632 /* 633 * Not needed here 634 */ 635 #if 0 636 VM_MAP_RANGE_CHECK(map, addr, end); 637 #endif 638 639 if (!vm_map_lookup_entry(map, addr, &entry)) 640 entry = entry->next; 641 642 /* 643 * Do this on a map entry basis so that if the pages are not 644 * in the current processes address space, we can easily look 645 * up the pages elsewhere. 646 */ 647 lastvecindex = -1; 648 for(current = entry; 649 (current != &map->header) && (current->start < end); 650 current = current->next) { 651 652 /* 653 * ignore submaps (for now) or null objects 654 */ 655 if (current->is_a_map || current->is_sub_map || 656 current->object.vm_object == NULL) 657 continue; 658 659 /* 660 * limit this scan to the current map entry and the 661 * limits for the mincore call 662 */ 663 if (addr < current->start) 664 addr = current->start; 665 cend = current->end; 666 if (cend > end) 667 cend = end; 668 669 /* 670 * scan this entry one page at a time 671 */ 672 while(addr < cend) { 673 /* 674 * Check pmap first, it is likely faster, also 675 * it can provide info as to whether we are the 676 * one referencing or modifying the page. 677 */ 678 mincoreinfo = pmap_mincore(pmap, addr); 679 if (!mincoreinfo) { 680 vm_pindex_t pindex; 681 vm_ooffset_t offset; 682 vm_page_t m; 683 /* 684 * calculate the page index into the object 685 */ 686 offset = current->offset + (addr - current->start); 687 pindex = OFF_TO_IDX(offset); 688 m = vm_page_lookup(current->object.vm_object, 689 pindex); 690 /* 691 * if the page is resident, then gather information about 692 * it. 693 */ 694 if (m) { 695 mincoreinfo = MINCORE_INCORE; 696 if (m->dirty || 697 pmap_is_modified(VM_PAGE_TO_PHYS(m))) 698 mincoreinfo |= MINCORE_MODIFIED_OTHER; 699 if ((m->flags & PG_REFERENCED) || 700 pmap_is_referenced(VM_PAGE_TO_PHYS(m))) 701 mincoreinfo |= MINCORE_REFERENCED_OTHER; 702 } 703 } 704 705 /* 706 * calculate index into user supplied byte vector 707 */ 708 vecindex = OFF_TO_IDX(addr - first_addr); 709 710 /* 711 * If we have skipped map entries, we need to make sure that 712 * the byte vector is zeroed for those skipped entries. 713 */ 714 while((lastvecindex + 1) < vecindex) { 715 error = subyte( vec + lastvecindex, 0); 716 if (error) { 717 vm_map_unlock(map); 718 return (EFAULT); 719 } 720 ++lastvecindex; 721 } 722 723 /* 724 * Pass the page information to the user 725 */ 726 error = subyte( vec + vecindex, mincoreinfo); 727 if (error) { 728 vm_map_unlock(map); 729 return (EFAULT); 730 } 731 lastvecindex = vecindex; 732 addr += PAGE_SIZE; 733 } 734 } 735 736 /* 737 * Zero the last entries in the byte vector. 738 */ 739 vecindex = OFF_TO_IDX(end - first_addr); 740 while((lastvecindex + 1) < vecindex) { 741 error = subyte( vec + lastvecindex, 0); 742 if (error) { 743 vm_map_unlock(map); 744 return (EFAULT); 745 } 746 ++lastvecindex; 747 } 748 749 vm_map_unlock(map); 750 return (0); 751 } 752 753 #ifndef _SYS_SYSPROTO_H_ 754 struct mlock_args { 755 caddr_t addr; 756 size_t len; 757 }; 758 #endif 759 int 760 mlock(p, uap, retval) 761 struct proc *p; 762 struct mlock_args *uap; 763 int *retval; 764 { 765 vm_offset_t addr; 766 vm_size_t size, pageoff; 767 int error; 768 769 addr = (vm_offset_t) uap->addr; 770 size = uap->len; 771 772 pageoff = (addr & PAGE_MASK); 773 addr -= pageoff; 774 size += pageoff; 775 size = (vm_size_t) round_page(size); 776 777 /* disable wrap around */ 778 if (addr + size < addr) 779 return (EINVAL); 780 781 if (atop(size) + cnt.v_wire_count > vm_page_max_wired) 782 return (EAGAIN); 783 784 #ifdef pmap_wired_count 785 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 786 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 787 return (EAGAIN); 788 #else 789 error = suser(p->p_ucred, &p->p_acflag); 790 if (error) 791 return (error); 792 #endif 793 794 error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE); 795 return (error == KERN_SUCCESS ? 0 : ENOMEM); 796 } 797 798 #ifndef _SYS_SYSPROTO_H_ 799 struct munlock_args { 800 caddr_t addr; 801 size_t len; 802 }; 803 #endif 804 int 805 munlock(p, uap, retval) 806 struct proc *p; 807 struct munlock_args *uap; 808 int *retval; 809 { 810 vm_offset_t addr; 811 vm_size_t size, pageoff; 812 int error; 813 814 addr = (vm_offset_t) uap->addr; 815 size = uap->len; 816 817 pageoff = (addr & PAGE_MASK); 818 addr -= pageoff; 819 size += pageoff; 820 size = (vm_size_t) round_page(size); 821 822 /* disable wrap around */ 823 if (addr + size < addr) 824 return (EINVAL); 825 826 #ifndef pmap_wired_count 827 error = suser(p->p_ucred, &p->p_acflag); 828 if (error) 829 return (error); 830 #endif 831 832 error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE); 833 return (error == KERN_SUCCESS ? 0 : ENOMEM); 834 } 835 836 /* 837 * Internal version of mmap. 838 * Currently used by mmap, exec, and sys5 shared memory. 839 * Handle is either a vnode pointer or NULL for MAP_ANON. 840 */ 841 int 842 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) 843 register vm_map_t map; 844 register vm_offset_t *addr; 845 register vm_size_t size; 846 vm_prot_t prot, maxprot; 847 register int flags; 848 caddr_t handle; /* XXX should be vp */ 849 vm_ooffset_t foff; 850 { 851 boolean_t fitit; 852 vm_object_t object; 853 struct vnode *vp = NULL; 854 objtype_t type; 855 int rv = KERN_SUCCESS; 856 vm_ooffset_t objsize; 857 int docow; 858 struct proc *p = curproc; 859 860 if (size == 0) 861 return (0); 862 863 objsize = size = round_page(size); 864 865 /* 866 * We currently can only deal with page aligned file offsets. 867 * The check is here rather than in the syscall because the 868 * kernel calls this function internally for other mmaping 869 * operations (such as in exec) and non-aligned offsets will 870 * cause pmap inconsistencies...so we want to be sure to 871 * disallow this in all cases. 872 */ 873 if (foff & PAGE_MASK) 874 return (EINVAL); 875 876 if ((flags & MAP_FIXED) == 0) { 877 fitit = TRUE; 878 *addr = round_page(*addr); 879 } else { 880 if (*addr != trunc_page(*addr)) 881 return (EINVAL); 882 fitit = FALSE; 883 (void) vm_map_remove(map, *addr, *addr + size); 884 } 885 886 /* 887 * Lookup/allocate object. 888 */ 889 if (flags & MAP_ANON) { 890 type = OBJT_DEFAULT; 891 /* 892 * Unnamed anonymous regions always start at 0. 893 */ 894 if (handle == 0) 895 foff = 0; 896 } else { 897 vp = (struct vnode *) handle; 898 if (vp->v_type == VCHR) { 899 type = OBJT_DEVICE; 900 handle = (caddr_t) vp->v_rdev; 901 } else { 902 struct vattr vat; 903 int error; 904 905 error = VOP_GETATTR(vp, &vat, p->p_ucred, p); 906 if (error) 907 return (error); 908 objsize = round_page(vat.va_size); 909 type = OBJT_VNODE; 910 } 911 } 912 913 if (handle == NULL) { 914 object = NULL; 915 } else { 916 object = vm_pager_allocate(type, handle, OFF_TO_IDX(objsize), prot, foff); 917 if (object == NULL) 918 return (type == OBJT_DEVICE ? EINVAL : ENOMEM); 919 } 920 921 /* 922 * Force device mappings to be shared. 923 */ 924 if (type == OBJT_DEVICE) { 925 flags &= ~(MAP_PRIVATE|MAP_COPY); 926 flags |= MAP_SHARED; 927 } 928 929 docow = 0; 930 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { 931 docow = MAP_COPY_ON_WRITE | MAP_COPY_NEEDED; 932 } 933 934 #if defined(VM_PROT_READ_IS_EXEC) 935 if (prot & VM_PROT_READ) 936 prot |= VM_PROT_EXECUTE; 937 938 if (maxprot & VM_PROT_READ) 939 maxprot |= VM_PROT_EXECUTE; 940 #endif 941 942 rv = vm_map_find(map, object, foff, addr, size, fitit, 943 prot, maxprot, docow); 944 945 946 if (rv != KERN_SUCCESS) { 947 /* 948 * Lose the object reference. Will destroy the 949 * object if it's an unnamed anonymous mapping 950 * or named anonymous without other references. 951 */ 952 vm_object_deallocate(object); 953 goto out; 954 } 955 956 /* 957 * "Pre-fault" resident pages. 958 */ 959 if ((type == OBJT_VNODE) && (map->pmap != NULL) && (object != NULL)) { 960 pmap_object_init_pt(map->pmap, *addr, 961 object, (vm_pindex_t) OFF_TO_IDX(foff), size, 1); 962 } 963 964 /* 965 * Shared memory is also shared with children. 966 */ 967 if (flags & (MAP_SHARED|MAP_INHERIT)) { 968 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 969 if (rv != KERN_SUCCESS) { 970 (void) vm_map_remove(map, *addr, *addr + size); 971 goto out; 972 } 973 } 974 out: 975 switch (rv) { 976 case KERN_SUCCESS: 977 return (0); 978 case KERN_INVALID_ADDRESS: 979 case KERN_NO_SPACE: 980 return (ENOMEM); 981 case KERN_PROTECTION_FAILURE: 982 return (EACCES); 983 default: 984 return (EINVAL); 985 } 986 } 987