1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * 40 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 41 * $Id: vm_mmap.c,v 1.63 1997/03/23 03:37:53 bde Exp $ 42 */ 43 44 /* 45 * Mapped file (mmap) interface to VM 46 */ 47 48 #include "opt_rlimit.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/sysproto.h> 53 #include <sys/filedesc.h> 54 #include <sys/resourcevar.h> 55 #include <sys/proc.h> 56 #include <sys/vnode.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/mman.h> 60 #include <sys/conf.h> 61 #include <sys/vmmeter.h> 62 63 #include <miscfs/specfs/specdev.h> 64 65 #include <vm/vm.h> 66 #include <vm/vm_param.h> 67 #include <vm/vm_prot.h> 68 #include <vm/vm_inherit.h> 69 #include <sys/lock.h> 70 #include <vm/pmap.h> 71 #include <vm/vm_map.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_pager.h> 74 #include <vm/vm_pageout.h> 75 #include <vm/vm_extern.h> 76 #include <vm/vm_kern.h> 77 #include <vm/vm_page.h> 78 79 #ifndef _SYS_SYSPROTO_H_ 80 struct sbrk_args { 81 int incr; 82 }; 83 #endif 84 85 /* ARGSUSED */ 86 int 87 sbrk(p, uap, retval) 88 struct proc *p; 89 struct sbrk_args *uap; 90 int *retval; 91 { 92 93 /* Not yet implemented */ 94 return (EOPNOTSUPP); 95 } 96 97 #ifndef _SYS_SYSPROTO_H_ 98 struct sstk_args { 99 int incr; 100 }; 101 #endif 102 103 /* ARGSUSED */ 104 int 105 sstk(p, uap, retval) 106 struct proc *p; 107 struct sstk_args *uap; 108 int *retval; 109 { 110 111 /* Not yet implemented */ 112 return (EOPNOTSUPP); 113 } 114 115 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 116 #ifndef _SYS_SYSPROTO_H_ 117 struct getpagesize_args { 118 int dummy; 119 }; 120 #endif 121 122 /* ARGSUSED */ 123 int 124 ogetpagesize(p, uap, retval) 125 struct proc *p; 126 struct getpagesize_args *uap; 127 int *retval; 128 { 129 130 *retval = PAGE_SIZE; 131 return (0); 132 } 133 #endif /* COMPAT_43 || COMPAT_SUNOS */ 134 135 #ifndef _SYS_SYSPROTO_H_ 136 struct mmap_args { 137 caddr_t addr; 138 size_t len; 139 int prot; 140 int flags; 141 int fd; 142 long pad; 143 off_t pos; 144 }; 145 #endif 146 147 int 148 mmap(p, uap, retval) 149 struct proc *p; 150 register struct mmap_args *uap; 151 int *retval; 152 { 153 register struct filedesc *fdp = p->p_fd; 154 register struct file *fp; 155 struct vnode *vp; 156 vm_offset_t addr; 157 vm_size_t size, pageoff; 158 vm_prot_t prot, maxprot; 159 caddr_t handle; 160 int flags, error; 161 162 prot = uap->prot & VM_PROT_ALL; 163 flags = uap->flags; 164 /* 165 * Address (if FIXED) must be page aligned. Size is implicitly rounded 166 * to a page boundary. 167 */ 168 addr = (vm_offset_t) uap->addr; 169 if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) || 170 (ssize_t) uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1)) 171 return (EINVAL); 172 173 /* 174 * Round page if not already disallowed by above test 175 * XXX: Is there any point in the MAP_FIXED align requirement above? 176 */ 177 size = uap->len; 178 pageoff = (addr & PAGE_MASK); 179 addr -= pageoff; 180 size += pageoff; 181 size = (vm_size_t) round_page(size); 182 183 /* 184 * Check for illegal addresses. Watch out for address wrap... Note 185 * that VM_*_ADDRESS are not constants due to casts (argh). 186 */ 187 if (flags & MAP_FIXED) { 188 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 189 return (EINVAL); 190 #ifndef i386 191 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 192 return (EINVAL); 193 #endif 194 if (addr + size < addr) 195 return (EINVAL); 196 } 197 /* 198 * XXX if no hint provided for a non-fixed mapping place it after the 199 * end of the largest possible heap. 200 * 201 * There should really be a pmap call to determine a reasonable location. 202 */ 203 if (addr == 0 && (flags & MAP_FIXED) == 0) 204 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ); 205 if (flags & MAP_ANON) { 206 /* 207 * Mapping blank space is trivial. 208 */ 209 handle = NULL; 210 maxprot = VM_PROT_ALL; 211 } else { 212 /* 213 * Mapping file, get fp for validation. Obtain vnode and make 214 * sure it is of appropriate type. 215 */ 216 if (((unsigned) uap->fd) >= fdp->fd_nfiles || 217 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 218 return (EBADF); 219 if (fp->f_type != DTYPE_VNODE) 220 return (EINVAL); 221 vp = (struct vnode *) fp->f_data; 222 if (vp->v_type != VREG && vp->v_type != VCHR) 223 return (EINVAL); 224 /* 225 * XXX hack to handle use of /dev/zero to map anon memory (ala 226 * SunOS). 227 */ 228 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 229 handle = NULL; 230 maxprot = VM_PROT_ALL; 231 flags |= MAP_ANON; 232 } else { 233 /* 234 * Ensure that file and memory protections are 235 * compatible. Note that we only worry about 236 * writability if mapping is shared; in this case, 237 * current and max prot are dictated by the open file. 238 * XXX use the vnode instead? Problem is: what 239 * credentials do we use for determination? What if 240 * proc does a setuid? 241 */ 242 maxprot = VM_PROT_EXECUTE; /* ??? */ 243 if (fp->f_flag & FREAD) 244 maxprot |= VM_PROT_READ; 245 else if (prot & PROT_READ) 246 return (EACCES); 247 if (flags & MAP_SHARED) { 248 if (fp->f_flag & FWRITE) 249 maxprot |= VM_PROT_WRITE; 250 else if (prot & PROT_WRITE) 251 return (EACCES); 252 } else 253 maxprot |= VM_PROT_WRITE; 254 handle = (caddr_t) vp; 255 } 256 } 257 error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 258 flags, handle, uap->pos); 259 if (error == 0) 260 *retval = (int) addr; 261 return (error); 262 } 263 264 #ifdef COMPAT_43 265 #ifndef _SYS_SYSPROTO_H_ 266 struct ommap_args { 267 caddr_t addr; 268 int len; 269 int prot; 270 int flags; 271 int fd; 272 long pos; 273 }; 274 #endif 275 int 276 ommap(p, uap, retval) 277 struct proc *p; 278 register struct ommap_args *uap; 279 int *retval; 280 { 281 struct mmap_args nargs; 282 static const char cvtbsdprot[8] = { 283 0, 284 PROT_EXEC, 285 PROT_WRITE, 286 PROT_EXEC | PROT_WRITE, 287 PROT_READ, 288 PROT_EXEC | PROT_READ, 289 PROT_WRITE | PROT_READ, 290 PROT_EXEC | PROT_WRITE | PROT_READ, 291 }; 292 293 #define OMAP_ANON 0x0002 294 #define OMAP_COPY 0x0020 295 #define OMAP_SHARED 0x0010 296 #define OMAP_FIXED 0x0100 297 #define OMAP_INHERIT 0x0800 298 299 nargs.addr = uap->addr; 300 nargs.len = uap->len; 301 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 302 nargs.flags = 0; 303 if (uap->flags & OMAP_ANON) 304 nargs.flags |= MAP_ANON; 305 if (uap->flags & OMAP_COPY) 306 nargs.flags |= MAP_COPY; 307 if (uap->flags & OMAP_SHARED) 308 nargs.flags |= MAP_SHARED; 309 else 310 nargs.flags |= MAP_PRIVATE; 311 if (uap->flags & OMAP_FIXED) 312 nargs.flags |= MAP_FIXED; 313 if (uap->flags & OMAP_INHERIT) 314 nargs.flags |= MAP_INHERIT; 315 nargs.fd = uap->fd; 316 nargs.pos = uap->pos; 317 return (mmap(p, &nargs, retval)); 318 } 319 #endif /* COMPAT_43 */ 320 321 322 #ifndef _SYS_SYSPROTO_H_ 323 struct msync_args { 324 caddr_t addr; 325 int len; 326 int flags; 327 }; 328 #endif 329 int 330 msync(p, uap, retval) 331 struct proc *p; 332 struct msync_args *uap; 333 int *retval; 334 { 335 vm_offset_t addr; 336 vm_size_t size, pageoff; 337 int flags; 338 vm_map_t map; 339 int rv; 340 341 addr = (vm_offset_t) uap->addr; 342 size = uap->len; 343 flags = uap->flags; 344 345 pageoff = (addr & PAGE_MASK); 346 addr -= pageoff; 347 size += pageoff; 348 size = (vm_size_t) round_page(size); 349 if (addr + size < addr) 350 return(EINVAL); 351 352 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 353 return (EINVAL); 354 355 map = &p->p_vmspace->vm_map; 356 357 /* 358 * XXX Gak! If size is zero we are supposed to sync "all modified 359 * pages with the region containing addr". Unfortunately, we don't 360 * really keep track of individual mmaps so we approximate by flushing 361 * the range of the map entry containing addr. This can be incorrect 362 * if the region splits or is coalesced with a neighbor. 363 */ 364 if (size == 0) { 365 vm_map_entry_t entry; 366 367 vm_map_lock_read(map); 368 rv = vm_map_lookup_entry(map, addr, &entry); 369 vm_map_unlock_read(map); 370 if (rv == FALSE) 371 return (EINVAL); 372 addr = entry->start; 373 size = entry->end - entry->start; 374 } 375 376 /* 377 * Clean the pages and interpret the return value. 378 */ 379 rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, 380 (flags & MS_INVALIDATE) != 0); 381 382 switch (rv) { 383 case KERN_SUCCESS: 384 break; 385 case KERN_INVALID_ADDRESS: 386 return (EINVAL); /* Sun returns ENOMEM? */ 387 case KERN_FAILURE: 388 return (EIO); 389 default: 390 return (EINVAL); 391 } 392 393 return (0); 394 } 395 396 #ifndef _SYS_SYSPROTO_H_ 397 struct munmap_args { 398 caddr_t addr; 399 size_t len; 400 }; 401 #endif 402 int 403 munmap(p, uap, retval) 404 register struct proc *p; 405 register struct munmap_args *uap; 406 int *retval; 407 { 408 vm_offset_t addr; 409 vm_size_t size, pageoff; 410 vm_map_t map; 411 412 addr = (vm_offset_t) uap->addr; 413 size = uap->len; 414 415 pageoff = (addr & PAGE_MASK); 416 addr -= pageoff; 417 size += pageoff; 418 size = (vm_size_t) round_page(size); 419 if (addr + size < addr) 420 return(EINVAL); 421 422 if (size == 0) 423 return (0); 424 425 /* 426 * Check for illegal addresses. Watch out for address wrap... Note 427 * that VM_*_ADDRESS are not constants due to casts (argh). 428 */ 429 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 430 return (EINVAL); 431 #ifndef i386 432 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 433 return (EINVAL); 434 #endif 435 if (addr + size < addr) 436 return (EINVAL); 437 map = &p->p_vmspace->vm_map; 438 /* 439 * Make sure entire range is allocated. 440 */ 441 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 442 return (EINVAL); 443 /* returns nothing but KERN_SUCCESS anyway */ 444 (void) vm_map_remove(map, addr, addr + size); 445 return (0); 446 } 447 448 void 449 munmapfd(p, fd) 450 struct proc *p; 451 int fd; 452 { 453 /* 454 * XXX should unmap any regions mapped to this file 455 */ 456 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; 457 } 458 459 #ifndef _SYS_SYSPROTO_H_ 460 struct mprotect_args { 461 caddr_t addr; 462 size_t len; 463 int prot; 464 }; 465 #endif 466 int 467 mprotect(p, uap, retval) 468 struct proc *p; 469 struct mprotect_args *uap; 470 int *retval; 471 { 472 vm_offset_t addr; 473 vm_size_t size, pageoff; 474 register vm_prot_t prot; 475 476 addr = (vm_offset_t) uap->addr; 477 size = uap->len; 478 prot = uap->prot & VM_PROT_ALL; 479 #if defined(VM_PROT_READ_IS_EXEC) 480 if (prot & VM_PROT_READ) 481 prot |= VM_PROT_EXECUTE; 482 #endif 483 484 pageoff = (addr & PAGE_MASK); 485 addr -= pageoff; 486 size += pageoff; 487 size = (vm_size_t) round_page(size); 488 if (addr + size < addr) 489 return(EINVAL); 490 491 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, 492 FALSE)) { 493 case KERN_SUCCESS: 494 return (0); 495 case KERN_PROTECTION_FAILURE: 496 return (EACCES); 497 } 498 return (EINVAL); 499 } 500 501 #ifndef _SYS_SYSPROTO_H_ 502 struct minherit_args { 503 caddr_t addr; 504 size_t len; 505 int inherit; 506 }; 507 #endif 508 int 509 minherit(p, uap, retval) 510 struct proc *p; 511 struct minherit_args *uap; 512 int *retval; 513 { 514 vm_offset_t addr; 515 vm_size_t size, pageoff; 516 register vm_inherit_t inherit; 517 518 addr = (vm_offset_t)uap->addr; 519 size = uap->len; 520 inherit = uap->inherit; 521 522 pageoff = (addr & PAGE_MASK); 523 addr -= pageoff; 524 size += pageoff; 525 size = (vm_size_t) round_page(size); 526 if (addr + size < addr) 527 return(EINVAL); 528 529 switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, 530 inherit)) { 531 case KERN_SUCCESS: 532 return (0); 533 case KERN_PROTECTION_FAILURE: 534 return (EACCES); 535 } 536 return (EINVAL); 537 } 538 539 #ifndef _SYS_SYSPROTO_H_ 540 struct madvise_args { 541 caddr_t addr; 542 size_t len; 543 int behav; 544 }; 545 #endif 546 547 /* ARGSUSED */ 548 int 549 madvise(p, uap, retval) 550 struct proc *p; 551 struct madvise_args *uap; 552 int *retval; 553 { 554 vm_map_t map; 555 pmap_t pmap; 556 vm_offset_t start, end; 557 /* 558 * Check for illegal addresses. Watch out for address wrap... Note 559 * that VM_*_ADDRESS are not constants due to casts (argh). 560 */ 561 if (VM_MAXUSER_ADDRESS > 0 && 562 ((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS) 563 return (EINVAL); 564 #ifndef i386 565 if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS) 566 return (EINVAL); 567 #endif 568 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 569 return (EINVAL); 570 571 /* 572 * Since this routine is only advisory, we default to conservative 573 * behavior. 574 */ 575 start = trunc_page((vm_offset_t) uap->addr); 576 end = round_page((vm_offset_t) uap->addr + uap->len); 577 578 map = &p->p_vmspace->vm_map; 579 pmap = &p->p_vmspace->vm_pmap; 580 581 vm_map_madvise(map, pmap, start, end, uap->behav); 582 583 return (0); 584 } 585 586 #ifndef _SYS_SYSPROTO_H_ 587 struct mincore_args { 588 caddr_t addr; 589 size_t len; 590 char *vec; 591 }; 592 #endif 593 594 /* ARGSUSED */ 595 int 596 mincore(p, uap, retval) 597 struct proc *p; 598 struct mincore_args *uap; 599 int *retval; 600 { 601 vm_offset_t addr, first_addr; 602 vm_offset_t end, cend; 603 pmap_t pmap; 604 vm_map_t map; 605 char *vec; 606 int error; 607 int vecindex, lastvecindex; 608 register vm_map_entry_t current; 609 vm_map_entry_t entry; 610 int mincoreinfo; 611 612 /* 613 * Make sure that the addresses presented are valid for user 614 * mode. 615 */ 616 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 617 end = addr + (vm_size_t)round_page(uap->len); 618 if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS) 619 return (EINVAL); 620 if (end < addr) 621 return (EINVAL); 622 623 /* 624 * Address of byte vector 625 */ 626 vec = uap->vec; 627 628 map = &p->p_vmspace->vm_map; 629 pmap = &p->p_vmspace->vm_pmap; 630 631 vm_map_lock(map); 632 633 /* 634 * Not needed here 635 */ 636 #if 0 637 VM_MAP_RANGE_CHECK(map, addr, end); 638 #endif 639 640 if (!vm_map_lookup_entry(map, addr, &entry)) 641 entry = entry->next; 642 643 /* 644 * Do this on a map entry basis so that if the pages are not 645 * in the current processes address space, we can easily look 646 * up the pages elsewhere. 647 */ 648 lastvecindex = -1; 649 for(current = entry; 650 (current != &map->header) && (current->start < end); 651 current = current->next) { 652 653 /* 654 * ignore submaps (for now) or null objects 655 */ 656 if ((current->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) || 657 current->object.vm_object == NULL) 658 continue; 659 660 /* 661 * limit this scan to the current map entry and the 662 * limits for the mincore call 663 */ 664 if (addr < current->start) 665 addr = current->start; 666 cend = current->end; 667 if (cend > end) 668 cend = end; 669 670 /* 671 * scan this entry one page at a time 672 */ 673 while(addr < cend) { 674 /* 675 * Check pmap first, it is likely faster, also 676 * it can provide info as to whether we are the 677 * one referencing or modifying the page. 678 */ 679 mincoreinfo = pmap_mincore(pmap, addr); 680 if (!mincoreinfo) { 681 vm_pindex_t pindex; 682 vm_ooffset_t offset; 683 vm_page_t m; 684 /* 685 * calculate the page index into the object 686 */ 687 offset = current->offset + (addr - current->start); 688 pindex = OFF_TO_IDX(offset); 689 m = vm_page_lookup(current->object.vm_object, 690 pindex); 691 /* 692 * if the page is resident, then gather information about 693 * it. 694 */ 695 if (m) { 696 mincoreinfo = MINCORE_INCORE; 697 if (m->dirty || 698 pmap_is_modified(VM_PAGE_TO_PHYS(m))) 699 mincoreinfo |= MINCORE_MODIFIED_OTHER; 700 if ((m->flags & PG_REFERENCED) || 701 pmap_ts_referenced(VM_PAGE_TO_PHYS(m))) { 702 m->flags |= PG_REFERENCED; 703 mincoreinfo |= MINCORE_REFERENCED_OTHER; 704 } 705 } 706 } 707 708 /* 709 * calculate index into user supplied byte vector 710 */ 711 vecindex = OFF_TO_IDX(addr - first_addr); 712 713 /* 714 * If we have skipped map entries, we need to make sure that 715 * the byte vector is zeroed for those skipped entries. 716 */ 717 while((lastvecindex + 1) < vecindex) { 718 error = subyte( vec + lastvecindex, 0); 719 if (error) { 720 vm_map_unlock(map); 721 return (EFAULT); 722 } 723 ++lastvecindex; 724 } 725 726 /* 727 * Pass the page information to the user 728 */ 729 error = subyte( vec + vecindex, mincoreinfo); 730 if (error) { 731 vm_map_unlock(map); 732 return (EFAULT); 733 } 734 lastvecindex = vecindex; 735 addr += PAGE_SIZE; 736 } 737 } 738 739 /* 740 * Zero the last entries in the byte vector. 741 */ 742 vecindex = OFF_TO_IDX(end - first_addr); 743 while((lastvecindex + 1) < vecindex) { 744 error = subyte( vec + lastvecindex, 0); 745 if (error) { 746 vm_map_unlock(map); 747 return (EFAULT); 748 } 749 ++lastvecindex; 750 } 751 752 vm_map_unlock(map); 753 return (0); 754 } 755 756 #ifndef _SYS_SYSPROTO_H_ 757 struct mlock_args { 758 caddr_t addr; 759 size_t len; 760 }; 761 #endif 762 int 763 mlock(p, uap, retval) 764 struct proc *p; 765 struct mlock_args *uap; 766 int *retval; 767 { 768 vm_offset_t addr; 769 vm_size_t size, pageoff; 770 int error; 771 772 addr = (vm_offset_t) uap->addr; 773 size = uap->len; 774 775 pageoff = (addr & PAGE_MASK); 776 addr -= pageoff; 777 size += pageoff; 778 size = (vm_size_t) round_page(size); 779 780 /* disable wrap around */ 781 if (addr + size < addr) 782 return (EINVAL); 783 784 if (atop(size) + cnt.v_wire_count > vm_page_max_wired) 785 return (EAGAIN); 786 787 #ifdef pmap_wired_count 788 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 789 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 790 return (ENOMEM); 791 #else 792 error = suser(p->p_ucred, &p->p_acflag); 793 if (error) 794 return (error); 795 #endif 796 797 error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE); 798 return (error == KERN_SUCCESS ? 0 : ENOMEM); 799 } 800 801 #ifndef _SYS_SYSPROTO_H_ 802 struct mlockall_args { 803 int how; 804 }; 805 #endif 806 807 int 808 mlockall(p, uap, retval) 809 struct proc *p; 810 struct mlockall_args *uap; 811 int *retval; 812 { 813 return 0; 814 } 815 816 #ifndef _SYS_SYSPROTO_H_ 817 struct mlockall_args { 818 int how; 819 }; 820 #endif 821 822 int 823 munlockall(p, uap, retval) 824 struct proc *p; 825 struct munlockall_args *uap; 826 int *retval; 827 { 828 return 0; 829 } 830 831 #ifndef _SYS_SYSPROTO_H_ 832 struct munlock_args { 833 caddr_t addr; 834 size_t len; 835 }; 836 #endif 837 int 838 munlock(p, uap, retval) 839 struct proc *p; 840 struct munlock_args *uap; 841 int *retval; 842 { 843 vm_offset_t addr; 844 vm_size_t size, pageoff; 845 int error; 846 847 addr = (vm_offset_t) uap->addr; 848 size = uap->len; 849 850 pageoff = (addr & PAGE_MASK); 851 addr -= pageoff; 852 size += pageoff; 853 size = (vm_size_t) round_page(size); 854 855 /* disable wrap around */ 856 if (addr + size < addr) 857 return (EINVAL); 858 859 #ifndef pmap_wired_count 860 error = suser(p->p_ucred, &p->p_acflag); 861 if (error) 862 return (error); 863 #endif 864 865 error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE); 866 return (error == KERN_SUCCESS ? 0 : ENOMEM); 867 } 868 869 /* 870 * Internal version of mmap. 871 * Currently used by mmap, exec, and sys5 shared memory. 872 * Handle is either a vnode pointer or NULL for MAP_ANON. 873 */ 874 int 875 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) 876 register vm_map_t map; 877 register vm_offset_t *addr; 878 register vm_size_t size; 879 vm_prot_t prot, maxprot; 880 register int flags; 881 caddr_t handle; /* XXX should be vp */ 882 vm_ooffset_t foff; 883 { 884 boolean_t fitit; 885 vm_object_t object; 886 struct vnode *vp = NULL; 887 objtype_t type; 888 int rv = KERN_SUCCESS; 889 vm_ooffset_t objsize; 890 int docow; 891 struct proc *p = curproc; 892 893 if (size == 0) 894 return (0); 895 896 objsize = size = round_page(size); 897 898 /* 899 * We currently can only deal with page aligned file offsets. 900 * The check is here rather than in the syscall because the 901 * kernel calls this function internally for other mmaping 902 * operations (such as in exec) and non-aligned offsets will 903 * cause pmap inconsistencies...so we want to be sure to 904 * disallow this in all cases. 905 */ 906 if (foff & PAGE_MASK) 907 return (EINVAL); 908 909 if ((flags & MAP_FIXED) == 0) { 910 fitit = TRUE; 911 *addr = round_page(*addr); 912 } else { 913 if (*addr != trunc_page(*addr)) 914 return (EINVAL); 915 fitit = FALSE; 916 (void) vm_map_remove(map, *addr, *addr + size); 917 } 918 919 /* 920 * Lookup/allocate object. 921 */ 922 if (flags & MAP_ANON) { 923 type = OBJT_DEFAULT; 924 /* 925 * Unnamed anonymous regions always start at 0. 926 */ 927 if (handle == 0) 928 foff = 0; 929 } else { 930 vp = (struct vnode *) handle; 931 if (vp->v_type == VCHR) { 932 type = OBJT_DEVICE; 933 handle = (caddr_t) vp->v_rdev; 934 } else { 935 struct vattr vat; 936 int error; 937 938 error = VOP_GETATTR(vp, &vat, p->p_ucred, p); 939 if (error) 940 return (error); 941 objsize = round_page(vat.va_size); 942 type = OBJT_VNODE; 943 } 944 } 945 946 if (handle == NULL) { 947 object = NULL; 948 } else { 949 object = vm_pager_allocate(type, handle, OFF_TO_IDX(objsize), prot, foff); 950 if (object == NULL) 951 return (type == OBJT_DEVICE ? EINVAL : ENOMEM); 952 } 953 954 /* 955 * Force device mappings to be shared. 956 */ 957 if (type == OBJT_DEVICE) { 958 flags &= ~(MAP_PRIVATE|MAP_COPY); 959 flags |= MAP_SHARED; 960 } 961 962 docow = 0; 963 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { 964 docow = MAP_COPY_ON_WRITE | MAP_COPY_NEEDED; 965 } 966 967 #if defined(VM_PROT_READ_IS_EXEC) 968 if (prot & VM_PROT_READ) 969 prot |= VM_PROT_EXECUTE; 970 971 if (maxprot & VM_PROT_READ) 972 maxprot |= VM_PROT_EXECUTE; 973 #endif 974 975 rv = vm_map_find(map, object, foff, addr, size, fitit, 976 prot, maxprot, docow); 977 978 979 if (rv != KERN_SUCCESS) { 980 /* 981 * Lose the object reference. Will destroy the 982 * object if it's an unnamed anonymous mapping 983 * or named anonymous without other references. 984 */ 985 vm_object_deallocate(object); 986 goto out; 987 } 988 989 /* 990 * "Pre-fault" resident pages. 991 */ 992 if ((type == OBJT_VNODE) && (map->pmap != NULL) && (object != NULL)) { 993 pmap_object_init_pt(map->pmap, *addr, 994 object, (vm_pindex_t) OFF_TO_IDX(foff), size, 1); 995 } 996 997 /* 998 * Shared memory is also shared with children. 999 */ 1000 if (flags & (MAP_SHARED|MAP_INHERIT)) { 1001 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 1002 if (rv != KERN_SUCCESS) { 1003 (void) vm_map_remove(map, *addr, *addr + size); 1004 goto out; 1005 } 1006 } 1007 out: 1008 switch (rv) { 1009 case KERN_SUCCESS: 1010 return (0); 1011 case KERN_INVALID_ADDRESS: 1012 case KERN_NO_SPACE: 1013 return (ENOMEM); 1014 case KERN_PROTECTION_FAILURE: 1015 return (EACCES); 1016 default: 1017 return (EINVAL); 1018 } 1019 } 1020