1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * 40 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 41 * $Id: vm_mmap.c,v 1.49 1996/07/30 03:08:12 dyson Exp $ 42 */ 43 44 /* 45 * Mapped file (mmap) interface to VM 46 */ 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/sysproto.h> 51 #include <sys/filedesc.h> 52 #include <sys/resourcevar.h> 53 #include <sys/proc.h> 54 #include <sys/vnode.h> 55 #include <sys/file.h> 56 #include <sys/mman.h> 57 #include <sys/conf.h> 58 #include <sys/vmmeter.h> 59 60 #include <miscfs/specfs/specdev.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_param.h> 64 #include <vm/vm_prot.h> 65 #include <vm/vm_inherit.h> 66 #include <vm/lock.h> 67 #include <vm/pmap.h> 68 #include <vm/vm_map.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_pager.h> 71 #include <vm/vm_pageout.h> 72 #include <vm/vm_extern.h> 73 #include <vm/vm_kern.h> 74 #include <vm/vm_page.h> 75 76 #ifndef _SYS_SYSPROTO_H_ 77 struct sbrk_args { 78 int incr; 79 }; 80 #endif 81 82 /* ARGSUSED */ 83 int 84 sbrk(p, uap, retval) 85 struct proc *p; 86 struct sbrk_args *uap; 87 int *retval; 88 { 89 90 /* Not yet implemented */ 91 return (EOPNOTSUPP); 92 } 93 94 #ifndef _SYS_SYSPROTO_H_ 95 struct sstk_args { 96 int incr; 97 }; 98 #endif 99 100 /* ARGSUSED */ 101 int 102 sstk(p, uap, retval) 103 struct proc *p; 104 struct sstk_args *uap; 105 int *retval; 106 { 107 108 /* Not yet implemented */ 109 return (EOPNOTSUPP); 110 } 111 112 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 113 #ifndef _SYS_SYSPROTO_H_ 114 struct getpagesize_args { 115 int dummy; 116 }; 117 #endif 118 119 /* ARGSUSED */ 120 int 121 ogetpagesize(p, uap, retval) 122 struct proc *p; 123 struct getpagesize_args *uap; 124 int *retval; 125 { 126 127 *retval = PAGE_SIZE; 128 return (0); 129 } 130 #endif /* COMPAT_43 || COMPAT_SUNOS */ 131 132 #ifndef _SYS_SYSPROTO_H_ 133 struct mmap_args { 134 caddr_t addr; 135 size_t len; 136 int prot; 137 int flags; 138 int fd; 139 long pad; 140 off_t pos; 141 }; 142 #endif 143 144 int 145 mmap(p, uap, retval) 146 struct proc *p; 147 register struct mmap_args *uap; 148 int *retval; 149 { 150 register struct filedesc *fdp = p->p_fd; 151 register struct file *fp; 152 struct vnode *vp; 153 vm_offset_t addr; 154 vm_size_t size, pageoff; 155 vm_prot_t prot, maxprot; 156 caddr_t handle; 157 int flags, error; 158 159 prot = uap->prot & VM_PROT_ALL; 160 flags = uap->flags; 161 /* 162 * Address (if FIXED) must be page aligned. Size is implicitly rounded 163 * to a page boundary. 164 */ 165 addr = (vm_offset_t) uap->addr; 166 if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) || 167 (ssize_t) uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1)) 168 return (EINVAL); 169 170 /* 171 * Round page if not already disallowed by above test 172 * XXX: Is there any point in the MAP_FIXED align requirement above? 173 */ 174 size = uap->len; 175 pageoff = (addr & PAGE_MASK); 176 addr -= pageoff; 177 size += pageoff; 178 size = (vm_size_t) round_page(size); 179 180 /* 181 * Check for illegal addresses. Watch out for address wrap... Note 182 * that VM_*_ADDRESS are not constants due to casts (argh). 183 */ 184 if (flags & MAP_FIXED) { 185 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 186 return (EINVAL); 187 #ifndef i386 188 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 189 return (EINVAL); 190 #endif 191 if (addr + size < addr) 192 return (EINVAL); 193 } 194 /* 195 * XXX if no hint provided for a non-fixed mapping place it after the 196 * end of the largest possible heap. 197 * 198 * There should really be a pmap call to determine a reasonable location. 199 */ 200 if (addr == 0 && (flags & MAP_FIXED) == 0) 201 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ); 202 if (flags & MAP_ANON) { 203 /* 204 * Mapping blank space is trivial. 205 */ 206 handle = NULL; 207 maxprot = VM_PROT_ALL; 208 } else { 209 /* 210 * Mapping file, get fp for validation. Obtain vnode and make 211 * sure it is of appropriate type. 212 */ 213 if (((unsigned) uap->fd) >= fdp->fd_nfiles || 214 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 215 return (EBADF); 216 if (fp->f_type != DTYPE_VNODE) 217 return (EINVAL); 218 vp = (struct vnode *) fp->f_data; 219 if (vp->v_type != VREG && vp->v_type != VCHR) 220 return (EINVAL); 221 /* 222 * XXX hack to handle use of /dev/zero to map anon memory (ala 223 * SunOS). 224 */ 225 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 226 handle = NULL; 227 maxprot = VM_PROT_ALL; 228 flags |= MAP_ANON; 229 } else { 230 /* 231 * Ensure that file and memory protections are 232 * compatible. Note that we only worry about 233 * writability if mapping is shared; in this case, 234 * current and max prot are dictated by the open file. 235 * XXX use the vnode instead? Problem is: what 236 * credentials do we use for determination? What if 237 * proc does a setuid? 238 */ 239 maxprot = VM_PROT_EXECUTE; /* ??? */ 240 if (fp->f_flag & FREAD) 241 maxprot |= VM_PROT_READ; 242 else if (prot & PROT_READ) 243 return (EACCES); 244 if (flags & MAP_SHARED) { 245 if (fp->f_flag & FWRITE) 246 maxprot |= VM_PROT_WRITE; 247 else if (prot & PROT_WRITE) 248 return (EACCES); 249 } else 250 maxprot |= VM_PROT_WRITE; 251 handle = (caddr_t) vp; 252 } 253 } 254 error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 255 flags, handle, uap->pos); 256 if (error == 0) 257 *retval = (int) addr; 258 return (error); 259 } 260 261 #ifdef COMPAT_43 262 #ifndef _SYS_SYSPROTO_H_ 263 struct ommap_args { 264 caddr_t addr; 265 int len; 266 int prot; 267 int flags; 268 int fd; 269 long pos; 270 }; 271 #endif 272 int 273 ommap(p, uap, retval) 274 struct proc *p; 275 register struct ommap_args *uap; 276 int *retval; 277 { 278 struct mmap_args nargs; 279 static const char cvtbsdprot[8] = { 280 0, 281 PROT_EXEC, 282 PROT_WRITE, 283 PROT_EXEC | PROT_WRITE, 284 PROT_READ, 285 PROT_EXEC | PROT_READ, 286 PROT_WRITE | PROT_READ, 287 PROT_EXEC | PROT_WRITE | PROT_READ, 288 }; 289 290 #define OMAP_ANON 0x0002 291 #define OMAP_COPY 0x0020 292 #define OMAP_SHARED 0x0010 293 #define OMAP_FIXED 0x0100 294 #define OMAP_INHERIT 0x0800 295 296 nargs.addr = uap->addr; 297 nargs.len = uap->len; 298 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 299 nargs.flags = 0; 300 if (uap->flags & OMAP_ANON) 301 nargs.flags |= MAP_ANON; 302 if (uap->flags & OMAP_COPY) 303 nargs.flags |= MAP_COPY; 304 if (uap->flags & OMAP_SHARED) 305 nargs.flags |= MAP_SHARED; 306 else 307 nargs.flags |= MAP_PRIVATE; 308 if (uap->flags & OMAP_FIXED) 309 nargs.flags |= MAP_FIXED; 310 if (uap->flags & OMAP_INHERIT) 311 nargs.flags |= MAP_INHERIT; 312 nargs.fd = uap->fd; 313 nargs.pos = uap->pos; 314 return (mmap(p, &nargs, retval)); 315 } 316 #endif /* COMPAT_43 */ 317 318 319 #ifndef _SYS_SYSPROTO_H_ 320 struct msync_args { 321 caddr_t addr; 322 int len; 323 int flags; 324 }; 325 #endif 326 int 327 msync(p, uap, retval) 328 struct proc *p; 329 struct msync_args *uap; 330 int *retval; 331 { 332 vm_offset_t addr; 333 vm_size_t size, pageoff; 334 int flags; 335 vm_map_t map; 336 int rv; 337 338 addr = (vm_offset_t) uap->addr; 339 size = uap->len; 340 flags = uap->flags; 341 342 pageoff = (addr & PAGE_MASK); 343 addr -= pageoff; 344 size += pageoff; 345 size = (vm_size_t) round_page(size); 346 if (addr + size < addr) 347 return(EINVAL); 348 349 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 350 return (EINVAL); 351 352 map = &p->p_vmspace->vm_map; 353 354 /* 355 * XXX Gak! If size is zero we are supposed to sync "all modified 356 * pages with the region containing addr". Unfortunately, we don't 357 * really keep track of individual mmaps so we approximate by flushing 358 * the range of the map entry containing addr. This can be incorrect 359 * if the region splits or is coalesced with a neighbor. 360 */ 361 if (size == 0) { 362 vm_map_entry_t entry; 363 364 vm_map_lock_read(map); 365 rv = vm_map_lookup_entry(map, addr, &entry); 366 vm_map_unlock_read(map); 367 if (rv == FALSE) 368 return (EINVAL); 369 addr = entry->start; 370 size = entry->end - entry->start; 371 } 372 373 /* 374 * Clean the pages and interpret the return value. 375 */ 376 rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, 377 (flags & MS_INVALIDATE) != 0); 378 379 switch (rv) { 380 case KERN_SUCCESS: 381 break; 382 case KERN_INVALID_ADDRESS: 383 return (EINVAL); /* Sun returns ENOMEM? */ 384 case KERN_FAILURE: 385 return (EIO); 386 default: 387 return (EINVAL); 388 } 389 390 return (0); 391 } 392 393 #ifndef _SYS_SYSPROTO_H_ 394 struct munmap_args { 395 caddr_t addr; 396 size_t len; 397 }; 398 #endif 399 int 400 munmap(p, uap, retval) 401 register struct proc *p; 402 register struct munmap_args *uap; 403 int *retval; 404 { 405 vm_offset_t addr; 406 vm_size_t size, pageoff; 407 vm_map_t map; 408 409 addr = (vm_offset_t) uap->addr; 410 size = uap->len; 411 412 pageoff = (addr & PAGE_MASK); 413 addr -= pageoff; 414 size += pageoff; 415 size = (vm_size_t) round_page(size); 416 if (addr + size < addr) 417 return(EINVAL); 418 419 if (size == 0) 420 return (0); 421 422 /* 423 * Check for illegal addresses. Watch out for address wrap... Note 424 * that VM_*_ADDRESS are not constants due to casts (argh). 425 */ 426 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 427 return (EINVAL); 428 #ifndef i386 429 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 430 return (EINVAL); 431 #endif 432 if (addr + size < addr) 433 return (EINVAL); 434 map = &p->p_vmspace->vm_map; 435 /* 436 * Make sure entire range is allocated. 437 */ 438 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 439 return (EINVAL); 440 /* returns nothing but KERN_SUCCESS anyway */ 441 (void) vm_map_remove(map, addr, addr + size); 442 return (0); 443 } 444 445 void 446 munmapfd(p, fd) 447 struct proc *p; 448 int fd; 449 { 450 /* 451 * XXX should unmap any regions mapped to this file 452 */ 453 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; 454 } 455 456 #ifndef _SYS_SYSPROTO_H_ 457 struct mprotect_args { 458 caddr_t addr; 459 size_t len; 460 int prot; 461 }; 462 #endif 463 int 464 mprotect(p, uap, retval) 465 struct proc *p; 466 struct mprotect_args *uap; 467 int *retval; 468 { 469 vm_offset_t addr; 470 vm_size_t size, pageoff; 471 register vm_prot_t prot; 472 473 addr = (vm_offset_t) uap->addr; 474 size = uap->len; 475 prot = uap->prot & VM_PROT_ALL; 476 477 pageoff = (addr & PAGE_MASK); 478 addr -= pageoff; 479 size += pageoff; 480 size = (vm_size_t) round_page(size); 481 if (addr + size < addr) 482 return(EINVAL); 483 484 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, 485 FALSE)) { 486 case KERN_SUCCESS: 487 return (0); 488 case KERN_PROTECTION_FAILURE: 489 return (EACCES); 490 } 491 return (EINVAL); 492 } 493 494 #ifndef _SYS_SYSPROTO_H_ 495 struct minherit_args { 496 caddr_t addr; 497 size_t len; 498 int inherit; 499 }; 500 #endif 501 int 502 minherit(p, uap, retval) 503 struct proc *p; 504 struct minherit_args *uap; 505 int *retval; 506 { 507 vm_offset_t addr; 508 vm_size_t size, pageoff; 509 register vm_inherit_t inherit; 510 511 addr = (vm_offset_t)uap->addr; 512 size = uap->len; 513 inherit = uap->inherit; 514 515 pageoff = (addr & PAGE_MASK); 516 addr -= pageoff; 517 size += pageoff; 518 size = (vm_size_t) round_page(size); 519 if (addr + size < addr) 520 return(EINVAL); 521 522 switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, 523 inherit)) { 524 case KERN_SUCCESS: 525 return (0); 526 case KERN_PROTECTION_FAILURE: 527 return (EACCES); 528 } 529 return (EINVAL); 530 } 531 532 #ifndef _SYS_SYSPROTO_H_ 533 struct madvise_args { 534 caddr_t addr; 535 size_t len; 536 int behav; 537 }; 538 #endif 539 540 /* ARGSUSED */ 541 int 542 madvise(p, uap, retval) 543 struct proc *p; 544 struct madvise_args *uap; 545 int *retval; 546 { 547 vm_map_t map; 548 pmap_t pmap; 549 vm_offset_t start, end; 550 /* 551 * Check for illegal addresses. Watch out for address wrap... Note 552 * that VM_*_ADDRESS are not constants due to casts (argh). 553 */ 554 if (VM_MAXUSER_ADDRESS > 0 && 555 ((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS) 556 return (EINVAL); 557 #ifndef i386 558 if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS) 559 return (EINVAL); 560 #endif 561 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 562 return (EINVAL); 563 564 /* 565 * Since this routine is only advisory, we default to conservative 566 * behavior. 567 */ 568 start = trunc_page((vm_offset_t) uap->addr); 569 end = round_page((vm_offset_t) uap->addr + uap->len); 570 571 map = &p->p_vmspace->vm_map; 572 pmap = &p->p_vmspace->vm_pmap; 573 574 vm_map_madvise(map, pmap, start, end, uap->behav); 575 576 /* Not yet implemented */ 577 return (0); 578 } 579 580 #ifndef _SYS_SYSPROTO_H_ 581 struct mincore_args { 582 caddr_t addr; 583 size_t len; 584 char *vec; 585 }; 586 #endif 587 588 /* ARGSUSED */ 589 int 590 mincore(p, uap, retval) 591 struct proc *p; 592 struct mincore_args *uap; 593 int *retval; 594 { 595 vm_offset_t addr, first_addr; 596 vm_offset_t end, cend; 597 pmap_t pmap; 598 vm_map_t map; 599 char *vec; 600 int error; 601 int vecindex, lastvecindex; 602 register vm_map_entry_t current; 603 vm_map_entry_t entry; 604 int mincoreinfo; 605 606 /* 607 * Make sure that the addresses presented are valid for user 608 * mode. 609 */ 610 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 611 end = addr + (vm_size_t)round_page(uap->len); 612 if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS) 613 return (EINVAL); 614 if (end < addr) 615 return (EINVAL); 616 617 /* 618 * Address of byte vector 619 */ 620 vec = uap->vec; 621 622 map = &p->p_vmspace->vm_map; 623 pmap = &p->p_vmspace->vm_pmap; 624 625 vm_map_lock(map); 626 627 /* 628 * Not needed here 629 */ 630 #if 0 631 VM_MAP_RANGE_CHECK(map, addr, end); 632 #endif 633 634 if (!vm_map_lookup_entry(map, addr, &entry)) 635 entry = entry->next; 636 637 /* 638 * Do this on a map entry basis so that if the pages are not 639 * in the current processes address space, we can easily look 640 * up the pages elsewhere. 641 */ 642 lastvecindex = -1; 643 for(current = entry; 644 (current != &map->header) && (current->start < end); 645 current = current->next) { 646 647 /* 648 * ignore submaps (for now) or null objects 649 */ 650 if (current->is_a_map || current->is_sub_map || 651 current->object.vm_object == NULL) 652 continue; 653 654 /* 655 * limit this scan to the current map entry and the 656 * limits for the mincore call 657 */ 658 if (addr < current->start) 659 addr = current->start; 660 cend = current->end; 661 if (cend > end) 662 cend = end; 663 664 /* 665 * scan this entry one page at a time 666 */ 667 while(addr < cend) { 668 /* 669 * Check pmap first, it is likely faster, also 670 * it can provide info as to whether we are the 671 * one referencing or modifying the page. 672 */ 673 mincoreinfo = pmap_mincore(pmap, addr); 674 if (!mincoreinfo) { 675 vm_pindex_t pindex; 676 vm_ooffset_t offset; 677 vm_page_t m; 678 /* 679 * calculate the page index into the object 680 */ 681 offset = current->offset + (addr - current->start); 682 pindex = OFF_TO_IDX(offset); 683 m = vm_page_lookup(current->object.vm_object, 684 pindex); 685 /* 686 * if the page is resident, then gather information about 687 * it. 688 */ 689 if (m) { 690 mincoreinfo = MINCORE_INCORE; 691 if (m->dirty || 692 pmap_is_modified(VM_PAGE_TO_PHYS(m))) 693 mincoreinfo |= MINCORE_MODIFIED_OTHER; 694 if ((m->flags & PG_REFERENCED) || 695 pmap_is_referenced(VM_PAGE_TO_PHYS(m))) 696 mincoreinfo |= MINCORE_REFERENCED_OTHER; 697 } 698 } 699 700 /* 701 * calculate index into user supplied byte vector 702 */ 703 vecindex = OFF_TO_IDX(addr - first_addr); 704 705 /* 706 * If we have skipped map entries, we need to make sure that 707 * the byte vector is zeroed for those skipped entries. 708 */ 709 while((lastvecindex + 1) < vecindex) { 710 error = subyte( vec + lastvecindex, 0); 711 if (error) { 712 vm_map_unlock(map); 713 return (EFAULT); 714 } 715 ++lastvecindex; 716 } 717 718 /* 719 * Pass the page information to the user 720 */ 721 error = subyte( vec + vecindex, mincoreinfo); 722 if (error) { 723 vm_map_unlock(map); 724 return (EFAULT); 725 } 726 lastvecindex = vecindex; 727 addr += PAGE_SIZE; 728 } 729 } 730 731 /* 732 * Zero the last entries in the byte vector. 733 */ 734 vecindex = OFF_TO_IDX(end - first_addr); 735 while((lastvecindex + 1) < vecindex) { 736 error = subyte( vec + lastvecindex, 0); 737 if (error) { 738 vm_map_unlock(map); 739 return (EFAULT); 740 } 741 ++lastvecindex; 742 } 743 744 vm_map_unlock(map); 745 return (0); 746 } 747 748 #ifndef _SYS_SYSPROTO_H_ 749 struct mlock_args { 750 caddr_t addr; 751 size_t len; 752 }; 753 #endif 754 int 755 mlock(p, uap, retval) 756 struct proc *p; 757 struct mlock_args *uap; 758 int *retval; 759 { 760 vm_offset_t addr; 761 vm_size_t size, pageoff; 762 int error; 763 764 addr = (vm_offset_t) uap->addr; 765 size = uap->len; 766 767 pageoff = (addr & PAGE_MASK); 768 addr -= pageoff; 769 size += pageoff; 770 size = (vm_size_t) round_page(size); 771 772 /* disable wrap around */ 773 if (addr + size < addr) 774 return (EINVAL); 775 776 if (atop(size) + cnt.v_wire_count > vm_page_max_wired) 777 return (EAGAIN); 778 779 #ifdef pmap_wired_count 780 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 781 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 782 return (EAGAIN); 783 #else 784 error = suser(p->p_ucred, &p->p_acflag); 785 if (error) 786 return (error); 787 #endif 788 789 error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE); 790 return (error == KERN_SUCCESS ? 0 : ENOMEM); 791 } 792 793 #ifndef _SYS_SYSPROTO_H_ 794 struct munlock_args { 795 caddr_t addr; 796 size_t len; 797 }; 798 #endif 799 int 800 munlock(p, uap, retval) 801 struct proc *p; 802 struct munlock_args *uap; 803 int *retval; 804 { 805 vm_offset_t addr; 806 vm_size_t size, pageoff; 807 int error; 808 809 addr = (vm_offset_t) uap->addr; 810 size = uap->len; 811 812 pageoff = (addr & PAGE_MASK); 813 addr -= pageoff; 814 size += pageoff; 815 size = (vm_size_t) round_page(size); 816 817 /* disable wrap around */ 818 if (addr + size < addr) 819 return (EINVAL); 820 821 #ifndef pmap_wired_count 822 error = suser(p->p_ucred, &p->p_acflag); 823 if (error) 824 return (error); 825 #endif 826 827 error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE); 828 return (error == KERN_SUCCESS ? 0 : ENOMEM); 829 } 830 831 /* 832 * Internal version of mmap. 833 * Currently used by mmap, exec, and sys5 shared memory. 834 * Handle is either a vnode pointer or NULL for MAP_ANON. 835 */ 836 int 837 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) 838 register vm_map_t map; 839 register vm_offset_t *addr; 840 register vm_size_t size; 841 vm_prot_t prot, maxprot; 842 register int flags; 843 caddr_t handle; /* XXX should be vp */ 844 vm_ooffset_t foff; 845 { 846 boolean_t fitit; 847 vm_object_t object, object2; 848 struct vnode *vp = NULL; 849 objtype_t type; 850 int rv = KERN_SUCCESS; 851 vm_ooffset_t objsize; 852 int docow; 853 struct proc *p = curproc; 854 855 if (size == 0) 856 return (0); 857 858 objsize = size = round_page(size); 859 860 /* 861 * We currently can only deal with page aligned file offsets. 862 * The check is here rather than in the syscall because the 863 * kernel calls this function internally for other mmaping 864 * operations (such as in exec) and non-aligned offsets will 865 * cause pmap inconsistencies...so we want to be sure to 866 * disallow this in all cases. 867 */ 868 if (foff & PAGE_MASK) 869 return (EINVAL); 870 871 if ((flags & MAP_FIXED) == 0) { 872 fitit = TRUE; 873 *addr = round_page(*addr); 874 } else { 875 if (*addr != trunc_page(*addr)) 876 return (EINVAL); 877 fitit = FALSE; 878 (void) vm_map_remove(map, *addr, *addr + size); 879 } 880 881 /* 882 * Lookup/allocate object. 883 */ 884 if (flags & MAP_ANON) { 885 type = OBJT_SWAP; 886 /* 887 * Unnamed anonymous regions always start at 0. 888 */ 889 if (handle == 0) 890 foff = 0; 891 } else { 892 vp = (struct vnode *) handle; 893 if (vp->v_type == VCHR) { 894 type = OBJT_DEVICE; 895 handle = (caddr_t) vp->v_rdev; 896 } else { 897 struct vattr vat; 898 int error; 899 900 error = VOP_GETATTR(vp, &vat, p->p_ucred, p); 901 if (error) 902 return (error); 903 objsize = round_page(vat.va_size); 904 type = OBJT_VNODE; 905 } 906 } 907 object = vm_pager_allocate(type, handle, OFF_TO_IDX(objsize), prot, foff); 908 if (object == NULL) 909 return (type == OBJT_DEVICE ? EINVAL : ENOMEM); 910 911 /* 912 * Force device mappings to be shared. 913 */ 914 if (type == OBJT_DEVICE) { 915 flags &= ~(MAP_PRIVATE|MAP_COPY); 916 flags |= MAP_SHARED; 917 } 918 919 object2 = NULL; 920 docow = 0; 921 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { 922 docow = MAP_COPY_ON_WRITE; 923 if (objsize < size) { 924 object2 = vm_object_allocate( OBJT_DEFAULT, 925 OFF_TO_IDX(size - (foff & ~PAGE_MASK))); 926 object2->backing_object = object; 927 object2->backing_object_offset = foff; 928 TAILQ_INSERT_TAIL(&object->shadow_head, 929 object2, shadow_list); 930 ++object->shadow_count; 931 } else { 932 docow |= MAP_COPY_NEEDED; 933 } 934 } 935 936 if (object2) 937 rv = vm_map_find(map, object2, 0, addr, size, fitit, 938 prot, maxprot, docow); 939 else 940 rv = vm_map_find(map, object, foff, addr, size, fitit, 941 prot, maxprot, docow); 942 943 944 if (rv != KERN_SUCCESS) { 945 /* 946 * Lose the object reference. Will destroy the 947 * object if it's an unnamed anonymous mapping 948 * or named anonymous without other references. 949 */ 950 if (object2) 951 vm_object_deallocate(object2); 952 else 953 vm_object_deallocate(object); 954 goto out; 955 } 956 957 /* 958 * "Pre-fault" resident pages. 959 */ 960 if ((type == OBJT_VNODE) && (map->pmap != NULL)) { 961 pmap_object_init_pt(map->pmap, *addr, 962 object, (vm_pindex_t) OFF_TO_IDX(foff), size, 1); 963 } 964 965 /* 966 * Shared memory is also shared with children. 967 */ 968 if (flags & (MAP_SHARED|MAP_INHERIT)) { 969 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 970 if (rv != KERN_SUCCESS) { 971 (void) vm_map_remove(map, *addr, *addr + size); 972 goto out; 973 } 974 } 975 out: 976 switch (rv) { 977 case KERN_SUCCESS: 978 return (0); 979 case KERN_INVALID_ADDRESS: 980 case KERN_NO_SPACE: 981 return (ENOMEM); 982 case KERN_PROTECTION_FAILURE: 983 return (EACCES); 984 default: 985 return (EINVAL); 986 } 987 } 988