1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * 40 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 41 * $FreeBSD$ 42 */ 43 44 /* 45 * Mapped file (mmap) interface to VM 46 */ 47 48 #include "opt_compat.h" 49 #include "opt_rlimit.h" 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/sysproto.h> 54 #include <sys/filedesc.h> 55 #include <sys/proc.h> 56 #include <sys/vnode.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/mman.h> 60 #include <sys/conf.h> 61 #include <sys/stat.h> 62 #include <sys/vmmeter.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <sys/lock.h> 67 #include <vm/pmap.h> 68 #include <vm/vm_map.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_page.h> 71 #include <vm/vm_pager.h> 72 #include <vm/vm_pageout.h> 73 #include <vm/vm_extern.h> 74 #include <vm/vm_page.h> 75 76 #ifndef _SYS_SYSPROTO_H_ 77 struct sbrk_args { 78 int incr; 79 }; 80 #endif 81 82 /* ARGSUSED */ 83 int 84 sbrk(p, uap) 85 struct proc *p; 86 struct sbrk_args *uap; 87 { 88 89 /* Not yet implemented */ 90 return (EOPNOTSUPP); 91 } 92 93 #ifndef _SYS_SYSPROTO_H_ 94 struct sstk_args { 95 int incr; 96 }; 97 #endif 98 99 /* ARGSUSED */ 100 int 101 sstk(p, uap) 102 struct proc *p; 103 struct sstk_args *uap; 104 { 105 106 /* Not yet implemented */ 107 return (EOPNOTSUPP); 108 } 109 110 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 111 #ifndef _SYS_SYSPROTO_H_ 112 struct getpagesize_args { 113 int dummy; 114 }; 115 #endif 116 117 /* ARGSUSED */ 118 int 119 ogetpagesize(p, uap) 120 struct proc *p; 121 struct getpagesize_args *uap; 122 { 123 124 p->p_retval[0] = PAGE_SIZE; 125 return (0); 126 } 127 #endif /* COMPAT_43 || COMPAT_SUNOS */ 128 129 130 /* 131 * Memory Map (mmap) system call. Note that the file offset 132 * and address are allowed to be NOT page aligned, though if 133 * the MAP_FIXED flag it set, both must have the same remainder 134 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 135 * page-aligned, the actual mapping starts at trunc_page(addr) 136 * and the return value is adjusted up by the page offset. 137 * 138 * Generally speaking, only character devices which are themselves 139 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 140 * there would be no cache coherency between a descriptor and a VM mapping 141 * both to the same character device. 142 * 143 * Block devices can be mmap'd no matter what they represent. Cache coherency 144 * is maintained as long as you do not write directly to the underlying 145 * character device. 146 */ 147 #ifndef _SYS_SYSPROTO_H_ 148 struct mmap_args { 149 void *addr; 150 size_t len; 151 int prot; 152 int flags; 153 int fd; 154 long pad; 155 off_t pos; 156 }; 157 #endif 158 159 int 160 mmap(p, uap) 161 struct proc *p; 162 register struct mmap_args *uap; 163 { 164 register struct filedesc *fdp = p->p_fd; 165 register struct file *fp; 166 struct vnode *vp; 167 vm_offset_t addr; 168 vm_size_t size, pageoff; 169 vm_prot_t prot, maxprot; 170 void *handle; 171 int flags, error; 172 int disablexworkaround; 173 off_t pos; 174 175 addr = (vm_offset_t) uap->addr; 176 size = uap->len; 177 prot = uap->prot & VM_PROT_ALL; 178 flags = uap->flags; 179 pos = uap->pos; 180 181 /* make sure mapping fits into numeric range etc */ 182 if ((ssize_t) uap->len < 0 || 183 ((flags & MAP_ANON) && uap->fd != -1)) 184 return (EINVAL); 185 186 if (flags & MAP_STACK) { 187 if ((uap->fd != -1) || 188 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 189 return (EINVAL); 190 flags |= MAP_ANON; 191 pos = 0; 192 } 193 194 /* 195 * Align the file position to a page boundary, 196 * and save its page offset component. 197 */ 198 pageoff = (pos & PAGE_MASK); 199 pos -= pageoff; 200 201 /* Adjust size for rounding (on both ends). */ 202 size += pageoff; /* low end... */ 203 size = (vm_size_t) round_page(size); /* hi end */ 204 205 /* 206 * Check for illegal addresses. Watch out for address wrap... Note 207 * that VM_*_ADDRESS are not constants due to casts (argh). 208 */ 209 if (flags & MAP_FIXED) { 210 /* 211 * The specified address must have the same remainder 212 * as the file offset taken modulo PAGE_SIZE, so it 213 * should be aligned after adjustment by pageoff. 214 */ 215 addr -= pageoff; 216 if (addr & PAGE_MASK) 217 return (EINVAL); 218 /* Address range must be all in user VM space. */ 219 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 220 return (EINVAL); 221 #ifndef i386 222 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 223 return (EINVAL); 224 #endif 225 if (addr + size < addr) 226 return (EINVAL); 227 } 228 /* 229 * XXX for non-fixed mappings where no hint is provided or 230 * the hint would fall in the potential heap space, 231 * place it after the end of the largest possible heap. 232 * 233 * There should really be a pmap call to determine a reasonable 234 * location. 235 */ 236 else if (addr == 0 || 237 (addr >= round_page((vm_offset_t)p->p_vmspace->vm_taddr) && 238 addr < round_page((vm_offset_t)p->p_vmspace->vm_daddr + MAXDSIZ))) 239 addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr + MAXDSIZ); 240 241 if (flags & MAP_ANON) { 242 /* 243 * Mapping blank space is trivial. 244 */ 245 handle = NULL; 246 maxprot = VM_PROT_ALL; 247 pos = 0; 248 } else { 249 /* 250 * Mapping file, get fp for validation. Obtain vnode and make 251 * sure it is of appropriate type. 252 */ 253 if (((unsigned) uap->fd) >= fdp->fd_nfiles || 254 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 255 return (EBADF); 256 if (fp->f_type != DTYPE_VNODE) 257 return (EINVAL); 258 vp = (struct vnode *) fp->f_data; 259 if (vp->v_type != VREG && vp->v_type != VCHR) 260 return (EINVAL); 261 /* 262 * XXX hack to handle use of /dev/zero to map anon memory (ala 263 * SunOS). 264 */ 265 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 266 handle = NULL; 267 maxprot = VM_PROT_ALL; 268 flags |= MAP_ANON; 269 pos = 0; 270 } else { 271 /* 272 * cdevs does not provide private mappings of any kind. 273 */ 274 /* 275 * However, for XIG X server to continue to work, 276 * we should allow the superuser to do it anyway. 277 * We only allow it at securelevel < 1. 278 * (Because the XIG X server writes directly to video 279 * memory via /dev/mem, it should never work at any 280 * other securelevel. 281 * XXX this will have to go 282 */ 283 if (securelevel >= 1) 284 disablexworkaround = 1; 285 else 286 disablexworkaround = suser(p); 287 if (vp->v_type == VCHR && disablexworkaround && 288 (flags & (MAP_PRIVATE|MAP_COPY))) 289 return (EINVAL); 290 /* 291 * Ensure that file and memory protections are 292 * compatible. Note that we only worry about 293 * writability if mapping is shared; in this case, 294 * current and max prot are dictated by the open file. 295 * XXX use the vnode instead? Problem is: what 296 * credentials do we use for determination? What if 297 * proc does a setuid? 298 */ 299 maxprot = VM_PROT_EXECUTE; /* ??? */ 300 if (fp->f_flag & FREAD) 301 maxprot |= VM_PROT_READ; 302 else if (prot & PROT_READ) 303 return (EACCES); 304 /* 305 * If we are sharing potential changes (either via 306 * MAP_SHARED or via the implicit sharing of character 307 * device mappings), and we are trying to get write 308 * permission although we opened it without asking 309 * for it, bail out. Check for superuser, only if 310 * we're at securelevel < 1, to allow the XIG X server 311 * to continue to work. 312 */ 313 314 if ((flags & MAP_SHARED) != 0 || 315 (vp->v_type == VCHR && disablexworkaround)) { 316 if ((fp->f_flag & FWRITE) != 0) { 317 struct vattr va; 318 if ((error = 319 VOP_GETATTR(vp, &va, 320 p->p_ucred, p))) 321 return (error); 322 if ((va.va_flags & 323 (IMMUTABLE|APPEND)) == 0) 324 maxprot |= VM_PROT_WRITE; 325 else if (prot & PROT_WRITE) 326 return (EPERM); 327 } else if ((prot & PROT_WRITE) != 0) 328 return (EACCES); 329 } else 330 maxprot |= VM_PROT_WRITE; 331 332 handle = (void *)vp; 333 } 334 } 335 error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 336 flags, handle, pos); 337 if (error == 0) 338 p->p_retval[0] = (register_t) (addr + pageoff); 339 return (error); 340 } 341 342 #ifdef COMPAT_43 343 #ifndef _SYS_SYSPROTO_H_ 344 struct ommap_args { 345 caddr_t addr; 346 int len; 347 int prot; 348 int flags; 349 int fd; 350 long pos; 351 }; 352 #endif 353 int 354 ommap(p, uap) 355 struct proc *p; 356 register struct ommap_args *uap; 357 { 358 struct mmap_args nargs; 359 static const char cvtbsdprot[8] = { 360 0, 361 PROT_EXEC, 362 PROT_WRITE, 363 PROT_EXEC | PROT_WRITE, 364 PROT_READ, 365 PROT_EXEC | PROT_READ, 366 PROT_WRITE | PROT_READ, 367 PROT_EXEC | PROT_WRITE | PROT_READ, 368 }; 369 370 #define OMAP_ANON 0x0002 371 #define OMAP_COPY 0x0020 372 #define OMAP_SHARED 0x0010 373 #define OMAP_FIXED 0x0100 374 #define OMAP_INHERIT 0x0800 375 376 nargs.addr = uap->addr; 377 nargs.len = uap->len; 378 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 379 nargs.flags = 0; 380 if (uap->flags & OMAP_ANON) 381 nargs.flags |= MAP_ANON; 382 if (uap->flags & OMAP_COPY) 383 nargs.flags |= MAP_COPY; 384 if (uap->flags & OMAP_SHARED) 385 nargs.flags |= MAP_SHARED; 386 else 387 nargs.flags |= MAP_PRIVATE; 388 if (uap->flags & OMAP_FIXED) 389 nargs.flags |= MAP_FIXED; 390 if (uap->flags & OMAP_INHERIT) 391 nargs.flags |= MAP_INHERIT; 392 nargs.fd = uap->fd; 393 nargs.pos = uap->pos; 394 return (mmap(p, &nargs)); 395 } 396 #endif /* COMPAT_43 */ 397 398 399 #ifndef _SYS_SYSPROTO_H_ 400 struct msync_args { 401 void *addr; 402 int len; 403 int flags; 404 }; 405 #endif 406 int 407 msync(p, uap) 408 struct proc *p; 409 struct msync_args *uap; 410 { 411 vm_offset_t addr; 412 vm_size_t size, pageoff; 413 int flags; 414 vm_map_t map; 415 int rv; 416 417 addr = (vm_offset_t) uap->addr; 418 size = uap->len; 419 flags = uap->flags; 420 421 pageoff = (addr & PAGE_MASK); 422 addr -= pageoff; 423 size += pageoff; 424 size = (vm_size_t) round_page(size); 425 if (addr + size < addr) 426 return(EINVAL); 427 428 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 429 return (EINVAL); 430 431 map = &p->p_vmspace->vm_map; 432 433 /* 434 * XXX Gak! If size is zero we are supposed to sync "all modified 435 * pages with the region containing addr". Unfortunately, we don't 436 * really keep track of individual mmaps so we approximate by flushing 437 * the range of the map entry containing addr. This can be incorrect 438 * if the region splits or is coalesced with a neighbor. 439 */ 440 if (size == 0) { 441 vm_map_entry_t entry; 442 443 vm_map_lock_read(map); 444 rv = vm_map_lookup_entry(map, addr, &entry); 445 vm_map_unlock_read(map); 446 if (rv == FALSE) 447 return (EINVAL); 448 addr = entry->start; 449 size = entry->end - entry->start; 450 } 451 452 /* 453 * Clean the pages and interpret the return value. 454 */ 455 rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, 456 (flags & MS_INVALIDATE) != 0); 457 458 switch (rv) { 459 case KERN_SUCCESS: 460 break; 461 case KERN_INVALID_ADDRESS: 462 return (EINVAL); /* Sun returns ENOMEM? */ 463 case KERN_FAILURE: 464 return (EIO); 465 default: 466 return (EINVAL); 467 } 468 469 return (0); 470 } 471 472 #ifndef _SYS_SYSPROTO_H_ 473 struct munmap_args { 474 void *addr; 475 size_t len; 476 }; 477 #endif 478 int 479 munmap(p, uap) 480 register struct proc *p; 481 register struct munmap_args *uap; 482 { 483 vm_offset_t addr; 484 vm_size_t size, pageoff; 485 vm_map_t map; 486 487 addr = (vm_offset_t) uap->addr; 488 size = uap->len; 489 490 pageoff = (addr & PAGE_MASK); 491 addr -= pageoff; 492 size += pageoff; 493 size = (vm_size_t) round_page(size); 494 if (addr + size < addr) 495 return(EINVAL); 496 497 if (size == 0) 498 return (0); 499 500 /* 501 * Check for illegal addresses. Watch out for address wrap... Note 502 * that VM_*_ADDRESS are not constants due to casts (argh). 503 */ 504 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 505 return (EINVAL); 506 #ifndef i386 507 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 508 return (EINVAL); 509 #endif 510 map = &p->p_vmspace->vm_map; 511 /* 512 * Make sure entire range is allocated. 513 */ 514 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 515 return (EINVAL); 516 /* returns nothing but KERN_SUCCESS anyway */ 517 (void) vm_map_remove(map, addr, addr + size); 518 return (0); 519 } 520 521 void 522 munmapfd(p, fd) 523 struct proc *p; 524 int fd; 525 { 526 /* 527 * XXX should unmap any regions mapped to this file 528 */ 529 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; 530 } 531 532 #ifndef _SYS_SYSPROTO_H_ 533 struct mprotect_args { 534 const void *addr; 535 size_t len; 536 int prot; 537 }; 538 #endif 539 int 540 mprotect(p, uap) 541 struct proc *p; 542 struct mprotect_args *uap; 543 { 544 vm_offset_t addr; 545 vm_size_t size, pageoff; 546 register vm_prot_t prot; 547 548 addr = (vm_offset_t) uap->addr; 549 size = uap->len; 550 prot = uap->prot & VM_PROT_ALL; 551 #if defined(VM_PROT_READ_IS_EXEC) 552 if (prot & VM_PROT_READ) 553 prot |= VM_PROT_EXECUTE; 554 #endif 555 556 pageoff = (addr & PAGE_MASK); 557 addr -= pageoff; 558 size += pageoff; 559 size = (vm_size_t) round_page(size); 560 if (addr + size < addr) 561 return(EINVAL); 562 563 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, 564 FALSE)) { 565 case KERN_SUCCESS: 566 return (0); 567 case KERN_PROTECTION_FAILURE: 568 return (EACCES); 569 } 570 return (EINVAL); 571 } 572 573 #ifndef _SYS_SYSPROTO_H_ 574 struct minherit_args { 575 void *addr; 576 size_t len; 577 int inherit; 578 }; 579 #endif 580 int 581 minherit(p, uap) 582 struct proc *p; 583 struct minherit_args *uap; 584 { 585 vm_offset_t addr; 586 vm_size_t size, pageoff; 587 register vm_inherit_t inherit; 588 589 addr = (vm_offset_t)uap->addr; 590 size = uap->len; 591 inherit = uap->inherit; 592 593 pageoff = (addr & PAGE_MASK); 594 addr -= pageoff; 595 size += pageoff; 596 size = (vm_size_t) round_page(size); 597 if (addr + size < addr) 598 return(EINVAL); 599 600 switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, 601 inherit)) { 602 case KERN_SUCCESS: 603 return (0); 604 case KERN_PROTECTION_FAILURE: 605 return (EACCES); 606 } 607 return (EINVAL); 608 } 609 610 #ifndef _SYS_SYSPROTO_H_ 611 struct madvise_args { 612 void *addr; 613 size_t len; 614 int behav; 615 }; 616 #endif 617 618 /* ARGSUSED */ 619 int 620 madvise(p, uap) 621 struct proc *p; 622 struct madvise_args *uap; 623 { 624 vm_offset_t start, end; 625 626 /* 627 * Check for illegal behavior 628 */ 629 if (uap->behav < 0 || uap->behav > MADV_FREE) 630 return (EINVAL); 631 /* 632 * Check for illegal addresses. Watch out for address wrap... Note 633 * that VM_*_ADDRESS are not constants due to casts (argh). 634 */ 635 if (VM_MAXUSER_ADDRESS > 0 && 636 ((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS) 637 return (EINVAL); 638 #ifndef i386 639 if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS) 640 return (EINVAL); 641 #endif 642 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 643 return (EINVAL); 644 645 /* 646 * Since this routine is only advisory, we default to conservative 647 * behavior. 648 */ 649 start = trunc_page((vm_offset_t) uap->addr); 650 end = round_page((vm_offset_t) uap->addr + uap->len); 651 652 if (vm_map_madvise(&p->p_vmspace->vm_map, start, end, uap->behav)) 653 return (EINVAL); 654 return (0); 655 } 656 657 #ifndef _SYS_SYSPROTO_H_ 658 struct mincore_args { 659 const void *addr; 660 size_t len; 661 char *vec; 662 }; 663 #endif 664 665 /* ARGSUSED */ 666 int 667 mincore(p, uap) 668 struct proc *p; 669 struct mincore_args *uap; 670 { 671 vm_offset_t addr, first_addr; 672 vm_offset_t end, cend; 673 pmap_t pmap; 674 vm_map_t map; 675 char *vec; 676 int error; 677 int vecindex, lastvecindex; 678 register vm_map_entry_t current; 679 vm_map_entry_t entry; 680 int mincoreinfo; 681 unsigned int timestamp; 682 683 /* 684 * Make sure that the addresses presented are valid for user 685 * mode. 686 */ 687 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 688 end = addr + (vm_size_t)round_page(uap->len); 689 if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS) 690 return (EINVAL); 691 if (end < addr) 692 return (EINVAL); 693 694 /* 695 * Address of byte vector 696 */ 697 vec = uap->vec; 698 699 map = &p->p_vmspace->vm_map; 700 pmap = vmspace_pmap(p->p_vmspace); 701 702 vm_map_lock_read(map); 703 RestartScan: 704 timestamp = map->timestamp; 705 706 if (!vm_map_lookup_entry(map, addr, &entry)) 707 entry = entry->next; 708 709 /* 710 * Do this on a map entry basis so that if the pages are not 711 * in the current processes address space, we can easily look 712 * up the pages elsewhere. 713 */ 714 lastvecindex = -1; 715 for(current = entry; 716 (current != &map->header) && (current->start < end); 717 current = current->next) { 718 719 /* 720 * ignore submaps (for now) or null objects 721 */ 722 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 723 current->object.vm_object == NULL) 724 continue; 725 726 /* 727 * limit this scan to the current map entry and the 728 * limits for the mincore call 729 */ 730 if (addr < current->start) 731 addr = current->start; 732 cend = current->end; 733 if (cend > end) 734 cend = end; 735 736 /* 737 * scan this entry one page at a time 738 */ 739 while(addr < cend) { 740 /* 741 * Check pmap first, it is likely faster, also 742 * it can provide info as to whether we are the 743 * one referencing or modifying the page. 744 */ 745 mincoreinfo = pmap_mincore(pmap, addr); 746 if (!mincoreinfo) { 747 vm_pindex_t pindex; 748 vm_ooffset_t offset; 749 vm_page_t m; 750 /* 751 * calculate the page index into the object 752 */ 753 offset = current->offset + (addr - current->start); 754 pindex = OFF_TO_IDX(offset); 755 m = vm_page_lookup(current->object.vm_object, 756 pindex); 757 /* 758 * if the page is resident, then gather information about 759 * it. 760 */ 761 if (m) { 762 mincoreinfo = MINCORE_INCORE; 763 if (m->dirty || 764 pmap_is_modified(VM_PAGE_TO_PHYS(m))) 765 mincoreinfo |= MINCORE_MODIFIED_OTHER; 766 if ((m->flags & PG_REFERENCED) || 767 pmap_ts_referenced(VM_PAGE_TO_PHYS(m))) { 768 vm_page_flag_set(m, PG_REFERENCED); 769 mincoreinfo |= MINCORE_REFERENCED_OTHER; 770 } 771 } 772 } 773 774 /* 775 * subyte may page fault. In case it needs to modify 776 * the map, we release the lock. 777 */ 778 vm_map_unlock_read(map); 779 780 /* 781 * calculate index into user supplied byte vector 782 */ 783 vecindex = OFF_TO_IDX(addr - first_addr); 784 785 /* 786 * If we have skipped map entries, we need to make sure that 787 * the byte vector is zeroed for those skipped entries. 788 */ 789 while((lastvecindex + 1) < vecindex) { 790 error = subyte( vec + lastvecindex, 0); 791 if (error) { 792 return (EFAULT); 793 } 794 ++lastvecindex; 795 } 796 797 /* 798 * Pass the page information to the user 799 */ 800 error = subyte( vec + vecindex, mincoreinfo); 801 if (error) { 802 return (EFAULT); 803 } 804 805 /* 806 * If the map has changed, due to the subyte, the previous 807 * output may be invalid. 808 */ 809 vm_map_lock_read(map); 810 if (timestamp != map->timestamp) 811 goto RestartScan; 812 813 lastvecindex = vecindex; 814 addr += PAGE_SIZE; 815 } 816 } 817 818 /* 819 * subyte may page fault. In case it needs to modify 820 * the map, we release the lock. 821 */ 822 vm_map_unlock_read(map); 823 824 /* 825 * Zero the last entries in the byte vector. 826 */ 827 vecindex = OFF_TO_IDX(end - first_addr); 828 while((lastvecindex + 1) < vecindex) { 829 error = subyte( vec + lastvecindex, 0); 830 if (error) { 831 return (EFAULT); 832 } 833 ++lastvecindex; 834 } 835 836 /* 837 * If the map has changed, due to the subyte, the previous 838 * output may be invalid. 839 */ 840 vm_map_lock_read(map); 841 if (timestamp != map->timestamp) 842 goto RestartScan; 843 vm_map_unlock_read(map); 844 845 return (0); 846 } 847 848 #ifndef _SYS_SYSPROTO_H_ 849 struct mlock_args { 850 const void *addr; 851 size_t len; 852 }; 853 #endif 854 int 855 mlock(p, uap) 856 struct proc *p; 857 struct mlock_args *uap; 858 { 859 vm_offset_t addr; 860 vm_size_t size, pageoff; 861 int error; 862 863 addr = (vm_offset_t) uap->addr; 864 size = uap->len; 865 866 pageoff = (addr & PAGE_MASK); 867 addr -= pageoff; 868 size += pageoff; 869 size = (vm_size_t) round_page(size); 870 871 /* disable wrap around */ 872 if (addr + size < addr) 873 return (EINVAL); 874 875 if (atop(size) + cnt.v_wire_count > vm_page_max_wired) 876 return (EAGAIN); 877 878 #ifdef pmap_wired_count 879 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 880 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 881 return (ENOMEM); 882 #else 883 error = suser(p); 884 if (error) 885 return (error); 886 #endif 887 888 error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE); 889 return (error == KERN_SUCCESS ? 0 : ENOMEM); 890 } 891 892 #ifndef _SYS_SYSPROTO_H_ 893 struct mlockall_args { 894 int how; 895 }; 896 #endif 897 898 int 899 mlockall(p, uap) 900 struct proc *p; 901 struct mlockall_args *uap; 902 { 903 return 0; 904 } 905 906 #ifndef _SYS_SYSPROTO_H_ 907 struct mlockall_args { 908 int how; 909 }; 910 #endif 911 912 int 913 munlockall(p, uap) 914 struct proc *p; 915 struct munlockall_args *uap; 916 { 917 return 0; 918 } 919 920 #ifndef _SYS_SYSPROTO_H_ 921 struct munlock_args { 922 const void *addr; 923 size_t len; 924 }; 925 #endif 926 int 927 munlock(p, uap) 928 struct proc *p; 929 struct munlock_args *uap; 930 { 931 vm_offset_t addr; 932 vm_size_t size, pageoff; 933 int error; 934 935 addr = (vm_offset_t) uap->addr; 936 size = uap->len; 937 938 pageoff = (addr & PAGE_MASK); 939 addr -= pageoff; 940 size += pageoff; 941 size = (vm_size_t) round_page(size); 942 943 /* disable wrap around */ 944 if (addr + size < addr) 945 return (EINVAL); 946 947 #ifndef pmap_wired_count 948 error = suser(p); 949 if (error) 950 return (error); 951 #endif 952 953 error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE); 954 return (error == KERN_SUCCESS ? 0 : ENOMEM); 955 } 956 957 /* 958 * Internal version of mmap. 959 * Currently used by mmap, exec, and sys5 shared memory. 960 * Handle is either a vnode pointer or NULL for MAP_ANON. 961 */ 962 int 963 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 964 vm_prot_t maxprot, int flags, 965 void *handle, 966 vm_ooffset_t foff) 967 { 968 boolean_t fitit; 969 vm_object_t object; 970 struct vnode *vp = NULL; 971 objtype_t type; 972 int rv = KERN_SUCCESS; 973 vm_ooffset_t objsize; 974 int docow; 975 struct proc *p = curproc; 976 977 if (size == 0) 978 return (0); 979 980 objsize = size = round_page(size); 981 982 /* 983 * We currently can only deal with page aligned file offsets. 984 * The check is here rather than in the syscall because the 985 * kernel calls this function internally for other mmaping 986 * operations (such as in exec) and non-aligned offsets will 987 * cause pmap inconsistencies...so we want to be sure to 988 * disallow this in all cases. 989 */ 990 if (foff & PAGE_MASK) 991 return (EINVAL); 992 993 if ((flags & MAP_FIXED) == 0) { 994 fitit = TRUE; 995 *addr = round_page(*addr); 996 } else { 997 if (*addr != trunc_page(*addr)) 998 return (EINVAL); 999 fitit = FALSE; 1000 (void) vm_map_remove(map, *addr, *addr + size); 1001 } 1002 1003 /* 1004 * Lookup/allocate object. 1005 */ 1006 if (flags & MAP_ANON) { 1007 type = OBJT_DEFAULT; 1008 /* 1009 * Unnamed anonymous regions always start at 0. 1010 */ 1011 if (handle == 0) 1012 foff = 0; 1013 } else { 1014 vp = (struct vnode *) handle; 1015 if (vp->v_type == VCHR) { 1016 type = OBJT_DEVICE; 1017 handle = (void *)(intptr_t)vp->v_rdev; 1018 } else { 1019 struct vattr vat; 1020 int error; 1021 1022 error = VOP_GETATTR(vp, &vat, p->p_ucred, p); 1023 if (error) 1024 return (error); 1025 objsize = round_page(vat.va_size); 1026 type = OBJT_VNODE; 1027 } 1028 } 1029 1030 if (handle == NULL) { 1031 object = NULL; 1032 docow = 0; 1033 } else { 1034 object = vm_pager_allocate(type, 1035 handle, objsize, prot, foff); 1036 if (object == NULL) 1037 return (type == OBJT_DEVICE ? EINVAL : ENOMEM); 1038 docow = MAP_PREFAULT_PARTIAL; 1039 } 1040 1041 /* 1042 * Force device mappings to be shared. 1043 */ 1044 if (type == OBJT_DEVICE) { 1045 flags &= ~(MAP_PRIVATE|MAP_COPY); 1046 flags |= MAP_SHARED; 1047 } 1048 1049 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { 1050 docow |= MAP_COPY_ON_WRITE; 1051 } 1052 1053 #if defined(VM_PROT_READ_IS_EXEC) 1054 if (prot & VM_PROT_READ) 1055 prot |= VM_PROT_EXECUTE; 1056 1057 if (maxprot & VM_PROT_READ) 1058 maxprot |= VM_PROT_EXECUTE; 1059 #endif 1060 1061 if (fitit) { 1062 *addr = pmap_addr_hint(object, *addr, size); 1063 } 1064 1065 if (flags & MAP_STACK) 1066 rv = vm_map_stack (map, *addr, size, prot, 1067 maxprot, docow); 1068 else 1069 rv = vm_map_find(map, object, foff, addr, size, fitit, 1070 prot, maxprot, docow); 1071 1072 if (rv != KERN_SUCCESS) { 1073 /* 1074 * Lose the object reference. Will destroy the 1075 * object if it's an unnamed anonymous mapping 1076 * or named anonymous without other references. 1077 */ 1078 vm_object_deallocate(object); 1079 goto out; 1080 } 1081 1082 /* 1083 * Shared memory is also shared with children. 1084 */ 1085 if (flags & (MAP_SHARED|MAP_INHERIT)) { 1086 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 1087 if (rv != KERN_SUCCESS) { 1088 (void) vm_map_remove(map, *addr, *addr + size); 1089 goto out; 1090 } 1091 } 1092 out: 1093 switch (rv) { 1094 case KERN_SUCCESS: 1095 return (0); 1096 case KERN_INVALID_ADDRESS: 1097 case KERN_NO_SPACE: 1098 return (ENOMEM); 1099 case KERN_PROTECTION_FAILURE: 1100 return (EACCES); 1101 default: 1102 return (EINVAL); 1103 } 1104 } 1105