1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * 40 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 41 * $Id: vm_mmap.c,v 1.100 1999/06/05 18:21:53 alc Exp $ 42 */ 43 44 /* 45 * Mapped file (mmap) interface to VM 46 */ 47 48 #include "opt_compat.h" 49 #include "opt_rlimit.h" 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/sysproto.h> 54 #include <sys/filedesc.h> 55 #include <sys/proc.h> 56 #include <sys/vnode.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/mman.h> 60 #include <sys/conf.h> 61 #include <sys/stat.h> 62 #include <sys/vmmeter.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_prot.h> 67 #include <vm/vm_inherit.h> 68 #include <sys/lock.h> 69 #include <vm/pmap.h> 70 #include <vm/vm_map.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_pager.h> 74 #include <vm/vm_pageout.h> 75 #include <vm/vm_extern.h> 76 #include <vm/vm_page.h> 77 78 #ifndef _SYS_SYSPROTO_H_ 79 struct sbrk_args { 80 int incr; 81 }; 82 #endif 83 84 /* ARGSUSED */ 85 int 86 sbrk(p, uap) 87 struct proc *p; 88 struct sbrk_args *uap; 89 { 90 91 /* Not yet implemented */ 92 return (EOPNOTSUPP); 93 } 94 95 #ifndef _SYS_SYSPROTO_H_ 96 struct sstk_args { 97 int incr; 98 }; 99 #endif 100 101 /* ARGSUSED */ 102 int 103 sstk(p, uap) 104 struct proc *p; 105 struct sstk_args *uap; 106 { 107 108 /* Not yet implemented */ 109 return (EOPNOTSUPP); 110 } 111 112 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 113 #ifndef _SYS_SYSPROTO_H_ 114 struct getpagesize_args { 115 int dummy; 116 }; 117 #endif 118 119 /* ARGSUSED */ 120 int 121 ogetpagesize(p, uap) 122 struct proc *p; 123 struct getpagesize_args *uap; 124 { 125 126 p->p_retval[0] = PAGE_SIZE; 127 return (0); 128 } 129 #endif /* COMPAT_43 || COMPAT_SUNOS */ 130 131 132 /* 133 * Memory Map (mmap) system call. Note that the file offset 134 * and address are allowed to be NOT page aligned, though if 135 * the MAP_FIXED flag it set, both must have the same remainder 136 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 137 * page-aligned, the actual mapping starts at trunc_page(addr) 138 * and the return value is adjusted up by the page offset. 139 */ 140 #ifndef _SYS_SYSPROTO_H_ 141 struct mmap_args { 142 void *addr; 143 size_t len; 144 int prot; 145 int flags; 146 int fd; 147 long pad; 148 off_t pos; 149 }; 150 #endif 151 152 int 153 mmap(p, uap) 154 struct proc *p; 155 register struct mmap_args *uap; 156 { 157 register struct filedesc *fdp = p->p_fd; 158 register struct file *fp; 159 struct vnode *vp; 160 vm_offset_t addr; 161 vm_size_t size, pageoff; 162 vm_prot_t prot, maxprot; 163 void *handle; 164 int flags, error; 165 int disablexworkaround; 166 off_t pos; 167 168 addr = (vm_offset_t) uap->addr; 169 size = uap->len; 170 prot = uap->prot & VM_PROT_ALL; 171 flags = uap->flags; 172 pos = uap->pos; 173 174 /* make sure mapping fits into numeric range etc */ 175 if ((ssize_t) uap->len < 0 || 176 ((flags & MAP_ANON) && uap->fd != -1)) 177 return (EINVAL); 178 179 if (flags & MAP_STACK) { 180 if ((uap->fd != -1) || 181 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 182 return (EINVAL); 183 flags |= MAP_ANON; 184 pos = 0; 185 } 186 187 /* 188 * Align the file position to a page boundary, 189 * and save its page offset component. 190 */ 191 pageoff = (pos & PAGE_MASK); 192 pos -= pageoff; 193 194 /* Adjust size for rounding (on both ends). */ 195 size += pageoff; /* low end... */ 196 size = (vm_size_t) round_page(size); /* hi end */ 197 198 /* 199 * Check for illegal addresses. Watch out for address wrap... Note 200 * that VM_*_ADDRESS are not constants due to casts (argh). 201 */ 202 if (flags & MAP_FIXED) { 203 /* 204 * The specified address must have the same remainder 205 * as the file offset taken modulo PAGE_SIZE, so it 206 * should be aligned after adjustment by pageoff. 207 */ 208 addr -= pageoff; 209 if (addr & PAGE_MASK) 210 return (EINVAL); 211 /* Address range must be all in user VM space. */ 212 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 213 return (EINVAL); 214 #ifndef i386 215 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 216 return (EINVAL); 217 #endif 218 if (addr + size < addr) 219 return (EINVAL); 220 } 221 /* 222 * XXX for non-fixed mappings where no hint is provided or 223 * the hint would fall in the potential heap space, 224 * place it after the end of the largest possible heap. 225 * 226 * There should really be a pmap call to determine a reasonable 227 * location. 228 */ 229 else if (addr == 0 || 230 (addr >= round_page((vm_offset_t)p->p_vmspace->vm_taddr) && 231 addr < round_page((vm_offset_t)p->p_vmspace->vm_daddr + MAXDSIZ))) 232 addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr + MAXDSIZ); 233 234 if (flags & MAP_ANON) { 235 /* 236 * Mapping blank space is trivial. 237 */ 238 handle = NULL; 239 maxprot = VM_PROT_ALL; 240 pos = 0; 241 } else { 242 /* 243 * Mapping file, get fp for validation. Obtain vnode and make 244 * sure it is of appropriate type. 245 */ 246 if (((unsigned) uap->fd) >= fdp->fd_nfiles || 247 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 248 return (EBADF); 249 if (fp->f_type != DTYPE_VNODE) 250 return (EINVAL); 251 vp = (struct vnode *) fp->f_data; 252 if (vp->v_type != VREG && vp->v_type != VCHR) 253 return (EINVAL); 254 /* 255 * XXX hack to handle use of /dev/zero to map anon memory (ala 256 * SunOS). 257 */ 258 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 259 handle = NULL; 260 maxprot = VM_PROT_ALL; 261 flags |= MAP_ANON; 262 pos = 0; 263 } else { 264 /* 265 * cdevs does not provide private mappings of any kind. 266 */ 267 /* 268 * However, for XIG X server to continue to work, 269 * we should allow the superuser to do it anyway. 270 * We only allow it at securelevel < 1. 271 * (Because the XIG X server writes directly to video 272 * memory via /dev/mem, it should never work at any 273 * other securelevel. 274 * XXX this will have to go 275 */ 276 if (securelevel >= 1) 277 disablexworkaround = 1; 278 else 279 disablexworkaround = suser(p); 280 if (vp->v_type == VCHR && disablexworkaround && 281 (flags & (MAP_PRIVATE|MAP_COPY))) 282 return (EINVAL); 283 /* 284 * Ensure that file and memory protections are 285 * compatible. Note that we only worry about 286 * writability if mapping is shared; in this case, 287 * current and max prot are dictated by the open file. 288 * XXX use the vnode instead? Problem is: what 289 * credentials do we use for determination? What if 290 * proc does a setuid? 291 */ 292 maxprot = VM_PROT_EXECUTE; /* ??? */ 293 if (fp->f_flag & FREAD) 294 maxprot |= VM_PROT_READ; 295 else if (prot & PROT_READ) 296 return (EACCES); 297 /* 298 * If we are sharing potential changes (either via 299 * MAP_SHARED or via the implicit sharing of character 300 * device mappings), and we are trying to get write 301 * permission although we opened it without asking 302 * for it, bail out. Check for superuser, only if 303 * we're at securelevel < 1, to allow the XIG X server 304 * to continue to work. 305 */ 306 307 if ((flags & MAP_SHARED) != 0 || 308 (vp->v_type == VCHR && disablexworkaround)) { 309 if ((fp->f_flag & FWRITE) != 0) { 310 struct vattr va; 311 if ((error = 312 VOP_GETATTR(vp, &va, 313 p->p_ucred, p))) 314 return (error); 315 if ((va.va_flags & 316 (IMMUTABLE|APPEND)) == 0) 317 maxprot |= VM_PROT_WRITE; 318 else if (prot & PROT_WRITE) 319 return (EPERM); 320 } else if ((prot & PROT_WRITE) != 0) 321 return (EACCES); 322 } else 323 maxprot |= VM_PROT_WRITE; 324 325 handle = (void *)vp; 326 } 327 } 328 error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 329 flags, handle, pos); 330 if (error == 0) 331 p->p_retval[0] = (register_t) (addr + pageoff); 332 return (error); 333 } 334 335 #ifdef COMPAT_43 336 #ifndef _SYS_SYSPROTO_H_ 337 struct ommap_args { 338 caddr_t addr; 339 int len; 340 int prot; 341 int flags; 342 int fd; 343 long pos; 344 }; 345 #endif 346 int 347 ommap(p, uap) 348 struct proc *p; 349 register struct ommap_args *uap; 350 { 351 struct mmap_args nargs; 352 static const char cvtbsdprot[8] = { 353 0, 354 PROT_EXEC, 355 PROT_WRITE, 356 PROT_EXEC | PROT_WRITE, 357 PROT_READ, 358 PROT_EXEC | PROT_READ, 359 PROT_WRITE | PROT_READ, 360 PROT_EXEC | PROT_WRITE | PROT_READ, 361 }; 362 363 #define OMAP_ANON 0x0002 364 #define OMAP_COPY 0x0020 365 #define OMAP_SHARED 0x0010 366 #define OMAP_FIXED 0x0100 367 #define OMAP_INHERIT 0x0800 368 369 nargs.addr = uap->addr; 370 nargs.len = uap->len; 371 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 372 nargs.flags = 0; 373 if (uap->flags & OMAP_ANON) 374 nargs.flags |= MAP_ANON; 375 if (uap->flags & OMAP_COPY) 376 nargs.flags |= MAP_COPY; 377 if (uap->flags & OMAP_SHARED) 378 nargs.flags |= MAP_SHARED; 379 else 380 nargs.flags |= MAP_PRIVATE; 381 if (uap->flags & OMAP_FIXED) 382 nargs.flags |= MAP_FIXED; 383 if (uap->flags & OMAP_INHERIT) 384 nargs.flags |= MAP_INHERIT; 385 nargs.fd = uap->fd; 386 nargs.pos = uap->pos; 387 return (mmap(p, &nargs)); 388 } 389 #endif /* COMPAT_43 */ 390 391 392 #ifndef _SYS_SYSPROTO_H_ 393 struct msync_args { 394 void *addr; 395 int len; 396 int flags; 397 }; 398 #endif 399 int 400 msync(p, uap) 401 struct proc *p; 402 struct msync_args *uap; 403 { 404 vm_offset_t addr; 405 vm_size_t size, pageoff; 406 int flags; 407 vm_map_t map; 408 int rv; 409 410 addr = (vm_offset_t) uap->addr; 411 size = uap->len; 412 flags = uap->flags; 413 414 pageoff = (addr & PAGE_MASK); 415 addr -= pageoff; 416 size += pageoff; 417 size = (vm_size_t) round_page(size); 418 if (addr + size < addr) 419 return(EINVAL); 420 421 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 422 return (EINVAL); 423 424 map = &p->p_vmspace->vm_map; 425 426 /* 427 * XXX Gak! If size is zero we are supposed to sync "all modified 428 * pages with the region containing addr". Unfortunately, we don't 429 * really keep track of individual mmaps so we approximate by flushing 430 * the range of the map entry containing addr. This can be incorrect 431 * if the region splits or is coalesced with a neighbor. 432 */ 433 if (size == 0) { 434 vm_map_entry_t entry; 435 436 vm_map_lock_read(map); 437 rv = vm_map_lookup_entry(map, addr, &entry); 438 vm_map_unlock_read(map); 439 if (rv == FALSE) 440 return (EINVAL); 441 addr = entry->start; 442 size = entry->end - entry->start; 443 } 444 445 /* 446 * Clean the pages and interpret the return value. 447 */ 448 rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, 449 (flags & MS_INVALIDATE) != 0); 450 451 switch (rv) { 452 case KERN_SUCCESS: 453 break; 454 case KERN_INVALID_ADDRESS: 455 return (EINVAL); /* Sun returns ENOMEM? */ 456 case KERN_FAILURE: 457 return (EIO); 458 default: 459 return (EINVAL); 460 } 461 462 return (0); 463 } 464 465 #ifndef _SYS_SYSPROTO_H_ 466 struct munmap_args { 467 void *addr; 468 size_t len; 469 }; 470 #endif 471 int 472 munmap(p, uap) 473 register struct proc *p; 474 register struct munmap_args *uap; 475 { 476 vm_offset_t addr; 477 vm_size_t size, pageoff; 478 vm_map_t map; 479 480 addr = (vm_offset_t) uap->addr; 481 size = uap->len; 482 483 pageoff = (addr & PAGE_MASK); 484 addr -= pageoff; 485 size += pageoff; 486 size = (vm_size_t) round_page(size); 487 if (addr + size < addr) 488 return(EINVAL); 489 490 if (size == 0) 491 return (0); 492 493 /* 494 * Check for illegal addresses. Watch out for address wrap... Note 495 * that VM_*_ADDRESS are not constants due to casts (argh). 496 */ 497 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 498 return (EINVAL); 499 #ifndef i386 500 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 501 return (EINVAL); 502 #endif 503 map = &p->p_vmspace->vm_map; 504 /* 505 * Make sure entire range is allocated. 506 */ 507 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 508 return (EINVAL); 509 /* returns nothing but KERN_SUCCESS anyway */ 510 (void) vm_map_remove(map, addr, addr + size); 511 return (0); 512 } 513 514 void 515 munmapfd(p, fd) 516 struct proc *p; 517 int fd; 518 { 519 /* 520 * XXX should unmap any regions mapped to this file 521 */ 522 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; 523 } 524 525 #ifndef _SYS_SYSPROTO_H_ 526 struct mprotect_args { 527 const void *addr; 528 size_t len; 529 int prot; 530 }; 531 #endif 532 int 533 mprotect(p, uap) 534 struct proc *p; 535 struct mprotect_args *uap; 536 { 537 vm_offset_t addr; 538 vm_size_t size, pageoff; 539 register vm_prot_t prot; 540 541 addr = (vm_offset_t) uap->addr; 542 size = uap->len; 543 prot = uap->prot & VM_PROT_ALL; 544 #if defined(VM_PROT_READ_IS_EXEC) 545 if (prot & VM_PROT_READ) 546 prot |= VM_PROT_EXECUTE; 547 #endif 548 549 pageoff = (addr & PAGE_MASK); 550 addr -= pageoff; 551 size += pageoff; 552 size = (vm_size_t) round_page(size); 553 if (addr + size < addr) 554 return(EINVAL); 555 556 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, 557 FALSE)) { 558 case KERN_SUCCESS: 559 return (0); 560 case KERN_PROTECTION_FAILURE: 561 return (EACCES); 562 } 563 return (EINVAL); 564 } 565 566 #ifndef _SYS_SYSPROTO_H_ 567 struct minherit_args { 568 void *addr; 569 size_t len; 570 int inherit; 571 }; 572 #endif 573 int 574 minherit(p, uap) 575 struct proc *p; 576 struct minherit_args *uap; 577 { 578 vm_offset_t addr; 579 vm_size_t size, pageoff; 580 register vm_inherit_t inherit; 581 582 addr = (vm_offset_t)uap->addr; 583 size = uap->len; 584 inherit = uap->inherit; 585 586 pageoff = (addr & PAGE_MASK); 587 addr -= pageoff; 588 size += pageoff; 589 size = (vm_size_t) round_page(size); 590 if (addr + size < addr) 591 return(EINVAL); 592 593 switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, 594 inherit)) { 595 case KERN_SUCCESS: 596 return (0); 597 case KERN_PROTECTION_FAILURE: 598 return (EACCES); 599 } 600 return (EINVAL); 601 } 602 603 #ifndef _SYS_SYSPROTO_H_ 604 struct madvise_args { 605 void *addr; 606 size_t len; 607 int behav; 608 }; 609 #endif 610 611 /* ARGSUSED */ 612 int 613 madvise(p, uap) 614 struct proc *p; 615 struct madvise_args *uap; 616 { 617 vm_offset_t start, end; 618 /* 619 * Check for illegal addresses. Watch out for address wrap... Note 620 * that VM_*_ADDRESS are not constants due to casts (argh). 621 */ 622 if (VM_MAXUSER_ADDRESS > 0 && 623 ((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS) 624 return (EINVAL); 625 #ifndef i386 626 if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS) 627 return (EINVAL); 628 #endif 629 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 630 return (EINVAL); 631 632 /* 633 * Since this routine is only advisory, we default to conservative 634 * behavior. 635 */ 636 start = trunc_page((vm_offset_t) uap->addr); 637 end = round_page((vm_offset_t) uap->addr + uap->len); 638 639 vm_map_madvise(&p->p_vmspace->vm_map, start, end, uap->behav); 640 641 return (0); 642 } 643 644 #ifndef _SYS_SYSPROTO_H_ 645 struct mincore_args { 646 const void *addr; 647 size_t len; 648 char *vec; 649 }; 650 #endif 651 652 /* ARGSUSED */ 653 int 654 mincore(p, uap) 655 struct proc *p; 656 struct mincore_args *uap; 657 { 658 vm_offset_t addr, first_addr; 659 vm_offset_t end, cend; 660 pmap_t pmap; 661 vm_map_t map; 662 char *vec; 663 int error; 664 int vecindex, lastvecindex; 665 register vm_map_entry_t current; 666 vm_map_entry_t entry; 667 int mincoreinfo; 668 unsigned int timestamp; 669 670 /* 671 * Make sure that the addresses presented are valid for user 672 * mode. 673 */ 674 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 675 end = addr + (vm_size_t)round_page(uap->len); 676 if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS) 677 return (EINVAL); 678 if (end < addr) 679 return (EINVAL); 680 681 /* 682 * Address of byte vector 683 */ 684 vec = uap->vec; 685 686 map = &p->p_vmspace->vm_map; 687 pmap = vmspace_pmap(p->p_vmspace); 688 689 vm_map_lock_read(map); 690 RestartScan: 691 timestamp = map->timestamp; 692 693 if (!vm_map_lookup_entry(map, addr, &entry)) 694 entry = entry->next; 695 696 /* 697 * Do this on a map entry basis so that if the pages are not 698 * in the current processes address space, we can easily look 699 * up the pages elsewhere. 700 */ 701 lastvecindex = -1; 702 for(current = entry; 703 (current != &map->header) && (current->start < end); 704 current = current->next) { 705 706 /* 707 * ignore submaps (for now) or null objects 708 */ 709 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 710 current->object.vm_object == NULL) 711 continue; 712 713 /* 714 * limit this scan to the current map entry and the 715 * limits for the mincore call 716 */ 717 if (addr < current->start) 718 addr = current->start; 719 cend = current->end; 720 if (cend > end) 721 cend = end; 722 723 /* 724 * scan this entry one page at a time 725 */ 726 while(addr < cend) { 727 /* 728 * Check pmap first, it is likely faster, also 729 * it can provide info as to whether we are the 730 * one referencing or modifying the page. 731 */ 732 mincoreinfo = pmap_mincore(pmap, addr); 733 if (!mincoreinfo) { 734 vm_pindex_t pindex; 735 vm_ooffset_t offset; 736 vm_page_t m; 737 /* 738 * calculate the page index into the object 739 */ 740 offset = current->offset + (addr - current->start); 741 pindex = OFF_TO_IDX(offset); 742 m = vm_page_lookup(current->object.vm_object, 743 pindex); 744 /* 745 * if the page is resident, then gather information about 746 * it. 747 */ 748 if (m) { 749 mincoreinfo = MINCORE_INCORE; 750 if (m->dirty || 751 pmap_is_modified(VM_PAGE_TO_PHYS(m))) 752 mincoreinfo |= MINCORE_MODIFIED_OTHER; 753 if ((m->flags & PG_REFERENCED) || 754 pmap_ts_referenced(VM_PAGE_TO_PHYS(m))) { 755 vm_page_flag_set(m, PG_REFERENCED); 756 mincoreinfo |= MINCORE_REFERENCED_OTHER; 757 } 758 } 759 } 760 761 /* 762 * subyte may page fault. In case it needs to modify 763 * the map, we release the lock. 764 */ 765 vm_map_unlock_read(map); 766 767 /* 768 * calculate index into user supplied byte vector 769 */ 770 vecindex = OFF_TO_IDX(addr - first_addr); 771 772 /* 773 * If we have skipped map entries, we need to make sure that 774 * the byte vector is zeroed for those skipped entries. 775 */ 776 while((lastvecindex + 1) < vecindex) { 777 error = subyte( vec + lastvecindex, 0); 778 if (error) { 779 return (EFAULT); 780 } 781 ++lastvecindex; 782 } 783 784 /* 785 * Pass the page information to the user 786 */ 787 error = subyte( vec + vecindex, mincoreinfo); 788 if (error) { 789 return (EFAULT); 790 } 791 792 /* 793 * If the map has changed, due to the subyte, the previous 794 * output may be invalid. 795 */ 796 vm_map_lock_read(map); 797 if (timestamp != map->timestamp) 798 goto RestartScan; 799 800 lastvecindex = vecindex; 801 addr += PAGE_SIZE; 802 } 803 } 804 805 /* 806 * subyte may page fault. In case it needs to modify 807 * the map, we release the lock. 808 */ 809 vm_map_unlock_read(map); 810 811 /* 812 * Zero the last entries in the byte vector. 813 */ 814 vecindex = OFF_TO_IDX(end - first_addr); 815 while((lastvecindex + 1) < vecindex) { 816 error = subyte( vec + lastvecindex, 0); 817 if (error) { 818 return (EFAULT); 819 } 820 ++lastvecindex; 821 } 822 823 /* 824 * If the map has changed, due to the subyte, the previous 825 * output may be invalid. 826 */ 827 vm_map_lock_read(map); 828 if (timestamp != map->timestamp) 829 goto RestartScan; 830 vm_map_unlock_read(map); 831 832 return (0); 833 } 834 835 #ifndef _SYS_SYSPROTO_H_ 836 struct mlock_args { 837 const void *addr; 838 size_t len; 839 }; 840 #endif 841 int 842 mlock(p, uap) 843 struct proc *p; 844 struct mlock_args *uap; 845 { 846 vm_offset_t addr; 847 vm_size_t size, pageoff; 848 int error; 849 850 addr = (vm_offset_t) uap->addr; 851 size = uap->len; 852 853 pageoff = (addr & PAGE_MASK); 854 addr -= pageoff; 855 size += pageoff; 856 size = (vm_size_t) round_page(size); 857 858 /* disable wrap around */ 859 if (addr + size < addr) 860 return (EINVAL); 861 862 if (atop(size) + cnt.v_wire_count > vm_page_max_wired) 863 return (EAGAIN); 864 865 #ifdef pmap_wired_count 866 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 867 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 868 return (ENOMEM); 869 #else 870 error = suser(p); 871 if (error) 872 return (error); 873 #endif 874 875 error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE); 876 return (error == KERN_SUCCESS ? 0 : ENOMEM); 877 } 878 879 #ifndef _SYS_SYSPROTO_H_ 880 struct mlockall_args { 881 int how; 882 }; 883 #endif 884 885 int 886 mlockall(p, uap) 887 struct proc *p; 888 struct mlockall_args *uap; 889 { 890 return 0; 891 } 892 893 #ifndef _SYS_SYSPROTO_H_ 894 struct mlockall_args { 895 int how; 896 }; 897 #endif 898 899 int 900 munlockall(p, uap) 901 struct proc *p; 902 struct munlockall_args *uap; 903 { 904 return 0; 905 } 906 907 #ifndef _SYS_SYSPROTO_H_ 908 struct munlock_args { 909 const void *addr; 910 size_t len; 911 }; 912 #endif 913 int 914 munlock(p, uap) 915 struct proc *p; 916 struct munlock_args *uap; 917 { 918 vm_offset_t addr; 919 vm_size_t size, pageoff; 920 int error; 921 922 addr = (vm_offset_t) uap->addr; 923 size = uap->len; 924 925 pageoff = (addr & PAGE_MASK); 926 addr -= pageoff; 927 size += pageoff; 928 size = (vm_size_t) round_page(size); 929 930 /* disable wrap around */ 931 if (addr + size < addr) 932 return (EINVAL); 933 934 #ifndef pmap_wired_count 935 error = suser(p); 936 if (error) 937 return (error); 938 #endif 939 940 error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE); 941 return (error == KERN_SUCCESS ? 0 : ENOMEM); 942 } 943 944 /* 945 * Internal version of mmap. 946 * Currently used by mmap, exec, and sys5 shared memory. 947 * Handle is either a vnode pointer or NULL for MAP_ANON. 948 */ 949 int 950 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 951 vm_prot_t maxprot, int flags, 952 void *handle, 953 vm_ooffset_t foff) 954 { 955 boolean_t fitit; 956 vm_object_t object; 957 struct vnode *vp = NULL; 958 objtype_t type; 959 int rv = KERN_SUCCESS; 960 vm_ooffset_t objsize; 961 int docow; 962 struct proc *p = curproc; 963 964 if (size == 0) 965 return (0); 966 967 objsize = size = round_page(size); 968 969 /* 970 * We currently can only deal with page aligned file offsets. 971 * The check is here rather than in the syscall because the 972 * kernel calls this function internally for other mmaping 973 * operations (such as in exec) and non-aligned offsets will 974 * cause pmap inconsistencies...so we want to be sure to 975 * disallow this in all cases. 976 */ 977 if (foff & PAGE_MASK) 978 return (EINVAL); 979 980 if ((flags & MAP_FIXED) == 0) { 981 fitit = TRUE; 982 *addr = round_page(*addr); 983 } else { 984 if (*addr != trunc_page(*addr)) 985 return (EINVAL); 986 fitit = FALSE; 987 (void) vm_map_remove(map, *addr, *addr + size); 988 } 989 990 /* 991 * Lookup/allocate object. 992 */ 993 if (flags & MAP_ANON) { 994 type = OBJT_DEFAULT; 995 /* 996 * Unnamed anonymous regions always start at 0. 997 */ 998 if (handle == 0) 999 foff = 0; 1000 } else { 1001 vp = (struct vnode *) handle; 1002 if (vp->v_type == VCHR) { 1003 type = OBJT_DEVICE; 1004 handle = (void *)(intptr_t)vp->v_rdev; 1005 } else { 1006 struct vattr vat; 1007 int error; 1008 1009 error = VOP_GETATTR(vp, &vat, p->p_ucred, p); 1010 if (error) 1011 return (error); 1012 objsize = round_page(vat.va_size); 1013 type = OBJT_VNODE; 1014 } 1015 } 1016 1017 if (handle == NULL) { 1018 object = NULL; 1019 docow = 0; 1020 } else { 1021 object = vm_pager_allocate(type, 1022 handle, objsize, prot, foff); 1023 if (object == NULL) 1024 return (type == OBJT_DEVICE ? EINVAL : ENOMEM); 1025 docow = MAP_PREFAULT_PARTIAL; 1026 } 1027 1028 /* 1029 * Force device mappings to be shared. 1030 */ 1031 if (type == OBJT_DEVICE) { 1032 flags &= ~(MAP_PRIVATE|MAP_COPY); 1033 flags |= MAP_SHARED; 1034 } 1035 1036 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { 1037 docow |= MAP_COPY_ON_WRITE; 1038 } 1039 1040 #if defined(VM_PROT_READ_IS_EXEC) 1041 if (prot & VM_PROT_READ) 1042 prot |= VM_PROT_EXECUTE; 1043 1044 if (maxprot & VM_PROT_READ) 1045 maxprot |= VM_PROT_EXECUTE; 1046 #endif 1047 1048 if (fitit) { 1049 *addr = pmap_addr_hint(object, *addr, size); 1050 } 1051 1052 if (flags & MAP_STACK) 1053 rv = vm_map_stack (map, *addr, size, prot, 1054 maxprot, docow); 1055 else 1056 rv = vm_map_find(map, object, foff, addr, size, fitit, 1057 prot, maxprot, docow); 1058 1059 if (rv != KERN_SUCCESS) { 1060 /* 1061 * Lose the object reference. Will destroy the 1062 * object if it's an unnamed anonymous mapping 1063 * or named anonymous without other references. 1064 */ 1065 vm_object_deallocate(object); 1066 goto out; 1067 } 1068 1069 /* 1070 * Shared memory is also shared with children. 1071 */ 1072 if (flags & (MAP_SHARED|MAP_INHERIT)) { 1073 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 1074 if (rv != KERN_SUCCESS) { 1075 (void) vm_map_remove(map, *addr, *addr + size); 1076 goto out; 1077 } 1078 } 1079 out: 1080 switch (rv) { 1081 case KERN_SUCCESS: 1082 return (0); 1083 case KERN_INVALID_ADDRESS: 1084 case KERN_NO_SPACE: 1085 return (ENOMEM); 1086 case KERN_PROTECTION_FAILURE: 1087 return (EACCES); 1088 default: 1089 return (EINVAL); 1090 } 1091 } 1092