1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * 40 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 41 * $FreeBSD$ 42 */ 43 44 /* 45 * Mapped file (mmap) interface to VM 46 */ 47 48 #include "opt_compat.h" 49 #include "opt_rlimit.h" 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/sysproto.h> 54 #include <sys/filedesc.h> 55 #include <sys/proc.h> 56 #include <sys/vnode.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/mman.h> 60 #include <sys/conf.h> 61 #include <sys/stat.h> 62 #include <sys/vmmeter.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_prot.h> 67 #include <vm/vm_inherit.h> 68 #include <sys/lock.h> 69 #include <vm/pmap.h> 70 #include <vm/vm_map.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_pager.h> 74 #include <vm/vm_pageout.h> 75 #include <vm/vm_extern.h> 76 #include <vm/vm_page.h> 77 78 #ifndef _SYS_SYSPROTO_H_ 79 struct sbrk_args { 80 int incr; 81 }; 82 #endif 83 84 /* ARGSUSED */ 85 int 86 sbrk(p, uap) 87 struct proc *p; 88 struct sbrk_args *uap; 89 { 90 91 /* Not yet implemented */ 92 return (EOPNOTSUPP); 93 } 94 95 #ifndef _SYS_SYSPROTO_H_ 96 struct sstk_args { 97 int incr; 98 }; 99 #endif 100 101 /* ARGSUSED */ 102 int 103 sstk(p, uap) 104 struct proc *p; 105 struct sstk_args *uap; 106 { 107 108 /* Not yet implemented */ 109 return (EOPNOTSUPP); 110 } 111 112 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 113 #ifndef _SYS_SYSPROTO_H_ 114 struct getpagesize_args { 115 int dummy; 116 }; 117 #endif 118 119 /* ARGSUSED */ 120 int 121 ogetpagesize(p, uap) 122 struct proc *p; 123 struct getpagesize_args *uap; 124 { 125 126 p->p_retval[0] = PAGE_SIZE; 127 return (0); 128 } 129 #endif /* COMPAT_43 || COMPAT_SUNOS */ 130 131 132 /* 133 * Memory Map (mmap) system call. Note that the file offset 134 * and address are allowed to be NOT page aligned, though if 135 * the MAP_FIXED flag it set, both must have the same remainder 136 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 137 * page-aligned, the actual mapping starts at trunc_page(addr) 138 * and the return value is adjusted up by the page offset. 139 * 140 * Generally speaking, only character devices which are themselves 141 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 142 * there would be no cache coherency between a descriptor and a VM mapping 143 * both to the same character device. 144 * 145 * Block devices can be mmap'd no matter what they represent. Cache coherency 146 * is maintained as long as you do not write directly to the underlying 147 * character device. 148 */ 149 #ifndef _SYS_SYSPROTO_H_ 150 struct mmap_args { 151 void *addr; 152 size_t len; 153 int prot; 154 int flags; 155 int fd; 156 long pad; 157 off_t pos; 158 }; 159 #endif 160 161 int 162 mmap(p, uap) 163 struct proc *p; 164 register struct mmap_args *uap; 165 { 166 register struct filedesc *fdp = p->p_fd; 167 register struct file *fp; 168 struct vnode *vp; 169 vm_offset_t addr; 170 vm_size_t size, pageoff; 171 vm_prot_t prot, maxprot; 172 void *handle; 173 int flags, error; 174 int disablexworkaround; 175 off_t pos; 176 177 addr = (vm_offset_t) uap->addr; 178 size = uap->len; 179 prot = uap->prot & VM_PROT_ALL; 180 flags = uap->flags; 181 pos = uap->pos; 182 183 /* make sure mapping fits into numeric range etc */ 184 if ((ssize_t) uap->len < 0 || 185 ((flags & MAP_ANON) && uap->fd != -1)) 186 return (EINVAL); 187 188 if (flags & MAP_STACK) { 189 if ((uap->fd != -1) || 190 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 191 return (EINVAL); 192 flags |= MAP_ANON; 193 pos = 0; 194 } 195 196 /* 197 * Align the file position to a page boundary, 198 * and save its page offset component. 199 */ 200 pageoff = (pos & PAGE_MASK); 201 pos -= pageoff; 202 203 /* Adjust size for rounding (on both ends). */ 204 size += pageoff; /* low end... */ 205 size = (vm_size_t) round_page(size); /* hi end */ 206 207 /* 208 * Check for illegal addresses. Watch out for address wrap... Note 209 * that VM_*_ADDRESS are not constants due to casts (argh). 210 */ 211 if (flags & MAP_FIXED) { 212 /* 213 * The specified address must have the same remainder 214 * as the file offset taken modulo PAGE_SIZE, so it 215 * should be aligned after adjustment by pageoff. 216 */ 217 addr -= pageoff; 218 if (addr & PAGE_MASK) 219 return (EINVAL); 220 /* Address range must be all in user VM space. */ 221 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 222 return (EINVAL); 223 #ifndef i386 224 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 225 return (EINVAL); 226 #endif 227 if (addr + size < addr) 228 return (EINVAL); 229 } 230 /* 231 * XXX for non-fixed mappings where no hint is provided or 232 * the hint would fall in the potential heap space, 233 * place it after the end of the largest possible heap. 234 * 235 * There should really be a pmap call to determine a reasonable 236 * location. 237 */ 238 else if (addr == 0 || 239 (addr >= round_page((vm_offset_t)p->p_vmspace->vm_taddr) && 240 addr < round_page((vm_offset_t)p->p_vmspace->vm_daddr + MAXDSIZ))) 241 addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr + MAXDSIZ); 242 243 if (flags & MAP_ANON) { 244 /* 245 * Mapping blank space is trivial. 246 */ 247 handle = NULL; 248 maxprot = VM_PROT_ALL; 249 pos = 0; 250 } else { 251 /* 252 * Mapping file, get fp for validation. Obtain vnode and make 253 * sure it is of appropriate type. 254 */ 255 if (((unsigned) uap->fd) >= fdp->fd_nfiles || 256 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 257 return (EBADF); 258 if (fp->f_type != DTYPE_VNODE) 259 return (EINVAL); 260 vp = (struct vnode *) fp->f_data; 261 if (vp->v_type != VREG && vp->v_type != VCHR) 262 return (EINVAL); 263 /* 264 * XXX hack to handle use of /dev/zero to map anon memory (ala 265 * SunOS). 266 */ 267 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 268 handle = NULL; 269 maxprot = VM_PROT_ALL; 270 flags |= MAP_ANON; 271 pos = 0; 272 } else { 273 /* 274 * cdevs does not provide private mappings of any kind. 275 */ 276 /* 277 * However, for XIG X server to continue to work, 278 * we should allow the superuser to do it anyway. 279 * We only allow it at securelevel < 1. 280 * (Because the XIG X server writes directly to video 281 * memory via /dev/mem, it should never work at any 282 * other securelevel. 283 * XXX this will have to go 284 */ 285 if (securelevel >= 1) 286 disablexworkaround = 1; 287 else 288 disablexworkaround = suser(p); 289 if (vp->v_type == VCHR && disablexworkaround && 290 (flags & (MAP_PRIVATE|MAP_COPY))) 291 return (EINVAL); 292 /* 293 * Ensure that file and memory protections are 294 * compatible. Note that we only worry about 295 * writability if mapping is shared; in this case, 296 * current and max prot are dictated by the open file. 297 * XXX use the vnode instead? Problem is: what 298 * credentials do we use for determination? What if 299 * proc does a setuid? 300 */ 301 maxprot = VM_PROT_EXECUTE; /* ??? */ 302 if (fp->f_flag & FREAD) 303 maxprot |= VM_PROT_READ; 304 else if (prot & PROT_READ) 305 return (EACCES); 306 /* 307 * If we are sharing potential changes (either via 308 * MAP_SHARED or via the implicit sharing of character 309 * device mappings), and we are trying to get write 310 * permission although we opened it without asking 311 * for it, bail out. Check for superuser, only if 312 * we're at securelevel < 1, to allow the XIG X server 313 * to continue to work. 314 */ 315 316 if ((flags & MAP_SHARED) != 0 || 317 (vp->v_type == VCHR && disablexworkaround)) { 318 if ((fp->f_flag & FWRITE) != 0) { 319 struct vattr va; 320 if ((error = 321 VOP_GETATTR(vp, &va, 322 p->p_ucred, p))) 323 return (error); 324 if ((va.va_flags & 325 (IMMUTABLE|APPEND)) == 0) 326 maxprot |= VM_PROT_WRITE; 327 else if (prot & PROT_WRITE) 328 return (EPERM); 329 } else if ((prot & PROT_WRITE) != 0) 330 return (EACCES); 331 } else 332 maxprot |= VM_PROT_WRITE; 333 334 handle = (void *)vp; 335 } 336 } 337 error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 338 flags, handle, pos); 339 if (error == 0) 340 p->p_retval[0] = (register_t) (addr + pageoff); 341 return (error); 342 } 343 344 #ifdef COMPAT_43 345 #ifndef _SYS_SYSPROTO_H_ 346 struct ommap_args { 347 caddr_t addr; 348 int len; 349 int prot; 350 int flags; 351 int fd; 352 long pos; 353 }; 354 #endif 355 int 356 ommap(p, uap) 357 struct proc *p; 358 register struct ommap_args *uap; 359 { 360 struct mmap_args nargs; 361 static const char cvtbsdprot[8] = { 362 0, 363 PROT_EXEC, 364 PROT_WRITE, 365 PROT_EXEC | PROT_WRITE, 366 PROT_READ, 367 PROT_EXEC | PROT_READ, 368 PROT_WRITE | PROT_READ, 369 PROT_EXEC | PROT_WRITE | PROT_READ, 370 }; 371 372 #define OMAP_ANON 0x0002 373 #define OMAP_COPY 0x0020 374 #define OMAP_SHARED 0x0010 375 #define OMAP_FIXED 0x0100 376 #define OMAP_INHERIT 0x0800 377 378 nargs.addr = uap->addr; 379 nargs.len = uap->len; 380 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 381 nargs.flags = 0; 382 if (uap->flags & OMAP_ANON) 383 nargs.flags |= MAP_ANON; 384 if (uap->flags & OMAP_COPY) 385 nargs.flags |= MAP_COPY; 386 if (uap->flags & OMAP_SHARED) 387 nargs.flags |= MAP_SHARED; 388 else 389 nargs.flags |= MAP_PRIVATE; 390 if (uap->flags & OMAP_FIXED) 391 nargs.flags |= MAP_FIXED; 392 if (uap->flags & OMAP_INHERIT) 393 nargs.flags |= MAP_INHERIT; 394 nargs.fd = uap->fd; 395 nargs.pos = uap->pos; 396 return (mmap(p, &nargs)); 397 } 398 #endif /* COMPAT_43 */ 399 400 401 #ifndef _SYS_SYSPROTO_H_ 402 struct msync_args { 403 void *addr; 404 int len; 405 int flags; 406 }; 407 #endif 408 int 409 msync(p, uap) 410 struct proc *p; 411 struct msync_args *uap; 412 { 413 vm_offset_t addr; 414 vm_size_t size, pageoff; 415 int flags; 416 vm_map_t map; 417 int rv; 418 419 addr = (vm_offset_t) uap->addr; 420 size = uap->len; 421 flags = uap->flags; 422 423 pageoff = (addr & PAGE_MASK); 424 addr -= pageoff; 425 size += pageoff; 426 size = (vm_size_t) round_page(size); 427 if (addr + size < addr) 428 return(EINVAL); 429 430 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 431 return (EINVAL); 432 433 map = &p->p_vmspace->vm_map; 434 435 /* 436 * XXX Gak! If size is zero we are supposed to sync "all modified 437 * pages with the region containing addr". Unfortunately, we don't 438 * really keep track of individual mmaps so we approximate by flushing 439 * the range of the map entry containing addr. This can be incorrect 440 * if the region splits or is coalesced with a neighbor. 441 */ 442 if (size == 0) { 443 vm_map_entry_t entry; 444 445 vm_map_lock_read(map); 446 rv = vm_map_lookup_entry(map, addr, &entry); 447 vm_map_unlock_read(map); 448 if (rv == FALSE) 449 return (EINVAL); 450 addr = entry->start; 451 size = entry->end - entry->start; 452 } 453 454 /* 455 * Clean the pages and interpret the return value. 456 */ 457 rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, 458 (flags & MS_INVALIDATE) != 0); 459 460 switch (rv) { 461 case KERN_SUCCESS: 462 break; 463 case KERN_INVALID_ADDRESS: 464 return (EINVAL); /* Sun returns ENOMEM? */ 465 case KERN_FAILURE: 466 return (EIO); 467 default: 468 return (EINVAL); 469 } 470 471 return (0); 472 } 473 474 #ifndef _SYS_SYSPROTO_H_ 475 struct munmap_args { 476 void *addr; 477 size_t len; 478 }; 479 #endif 480 int 481 munmap(p, uap) 482 register struct proc *p; 483 register struct munmap_args *uap; 484 { 485 vm_offset_t addr; 486 vm_size_t size, pageoff; 487 vm_map_t map; 488 489 addr = (vm_offset_t) uap->addr; 490 size = uap->len; 491 492 pageoff = (addr & PAGE_MASK); 493 addr -= pageoff; 494 size += pageoff; 495 size = (vm_size_t) round_page(size); 496 if (addr + size < addr) 497 return(EINVAL); 498 499 if (size == 0) 500 return (0); 501 502 /* 503 * Check for illegal addresses. Watch out for address wrap... Note 504 * that VM_*_ADDRESS are not constants due to casts (argh). 505 */ 506 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 507 return (EINVAL); 508 #ifndef i386 509 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 510 return (EINVAL); 511 #endif 512 map = &p->p_vmspace->vm_map; 513 /* 514 * Make sure entire range is allocated. 515 */ 516 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 517 return (EINVAL); 518 /* returns nothing but KERN_SUCCESS anyway */ 519 (void) vm_map_remove(map, addr, addr + size); 520 return (0); 521 } 522 523 void 524 munmapfd(p, fd) 525 struct proc *p; 526 int fd; 527 { 528 /* 529 * XXX should unmap any regions mapped to this file 530 */ 531 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; 532 } 533 534 #ifndef _SYS_SYSPROTO_H_ 535 struct mprotect_args { 536 const void *addr; 537 size_t len; 538 int prot; 539 }; 540 #endif 541 int 542 mprotect(p, uap) 543 struct proc *p; 544 struct mprotect_args *uap; 545 { 546 vm_offset_t addr; 547 vm_size_t size, pageoff; 548 register vm_prot_t prot; 549 550 addr = (vm_offset_t) uap->addr; 551 size = uap->len; 552 prot = uap->prot & VM_PROT_ALL; 553 #if defined(VM_PROT_READ_IS_EXEC) 554 if (prot & VM_PROT_READ) 555 prot |= VM_PROT_EXECUTE; 556 #endif 557 558 pageoff = (addr & PAGE_MASK); 559 addr -= pageoff; 560 size += pageoff; 561 size = (vm_size_t) round_page(size); 562 if (addr + size < addr) 563 return(EINVAL); 564 565 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, 566 FALSE)) { 567 case KERN_SUCCESS: 568 return (0); 569 case KERN_PROTECTION_FAILURE: 570 return (EACCES); 571 } 572 return (EINVAL); 573 } 574 575 #ifndef _SYS_SYSPROTO_H_ 576 struct minherit_args { 577 void *addr; 578 size_t len; 579 int inherit; 580 }; 581 #endif 582 int 583 minherit(p, uap) 584 struct proc *p; 585 struct minherit_args *uap; 586 { 587 vm_offset_t addr; 588 vm_size_t size, pageoff; 589 register vm_inherit_t inherit; 590 591 addr = (vm_offset_t)uap->addr; 592 size = uap->len; 593 inherit = uap->inherit; 594 595 pageoff = (addr & PAGE_MASK); 596 addr -= pageoff; 597 size += pageoff; 598 size = (vm_size_t) round_page(size); 599 if (addr + size < addr) 600 return(EINVAL); 601 602 switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, 603 inherit)) { 604 case KERN_SUCCESS: 605 return (0); 606 case KERN_PROTECTION_FAILURE: 607 return (EACCES); 608 } 609 return (EINVAL); 610 } 611 612 #ifndef _SYS_SYSPROTO_H_ 613 struct madvise_args { 614 void *addr; 615 size_t len; 616 int behav; 617 }; 618 #endif 619 620 /* ARGSUSED */ 621 int 622 madvise(p, uap) 623 struct proc *p; 624 struct madvise_args *uap; 625 { 626 vm_offset_t start, end; 627 628 /* 629 * Check for illegal behavior 630 */ 631 if (uap->behav < 0 || uap->behav > MADV_FREE) 632 return (EINVAL); 633 /* 634 * Check for illegal addresses. Watch out for address wrap... Note 635 * that VM_*_ADDRESS are not constants due to casts (argh). 636 */ 637 if (VM_MAXUSER_ADDRESS > 0 && 638 ((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS) 639 return (EINVAL); 640 #ifndef i386 641 if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS) 642 return (EINVAL); 643 #endif 644 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 645 return (EINVAL); 646 647 /* 648 * Since this routine is only advisory, we default to conservative 649 * behavior. 650 */ 651 start = trunc_page((vm_offset_t) uap->addr); 652 end = round_page((vm_offset_t) uap->addr + uap->len); 653 654 if (vm_map_madvise(&p->p_vmspace->vm_map, start, end, uap->behav)) 655 return (EINVAL); 656 return (0); 657 } 658 659 #ifndef _SYS_SYSPROTO_H_ 660 struct mincore_args { 661 const void *addr; 662 size_t len; 663 char *vec; 664 }; 665 #endif 666 667 /* ARGSUSED */ 668 int 669 mincore(p, uap) 670 struct proc *p; 671 struct mincore_args *uap; 672 { 673 vm_offset_t addr, first_addr; 674 vm_offset_t end, cend; 675 pmap_t pmap; 676 vm_map_t map; 677 char *vec; 678 int error; 679 int vecindex, lastvecindex; 680 register vm_map_entry_t current; 681 vm_map_entry_t entry; 682 int mincoreinfo; 683 unsigned int timestamp; 684 685 /* 686 * Make sure that the addresses presented are valid for user 687 * mode. 688 */ 689 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 690 end = addr + (vm_size_t)round_page(uap->len); 691 if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS) 692 return (EINVAL); 693 if (end < addr) 694 return (EINVAL); 695 696 /* 697 * Address of byte vector 698 */ 699 vec = uap->vec; 700 701 map = &p->p_vmspace->vm_map; 702 pmap = vmspace_pmap(p->p_vmspace); 703 704 vm_map_lock_read(map); 705 RestartScan: 706 timestamp = map->timestamp; 707 708 if (!vm_map_lookup_entry(map, addr, &entry)) 709 entry = entry->next; 710 711 /* 712 * Do this on a map entry basis so that if the pages are not 713 * in the current processes address space, we can easily look 714 * up the pages elsewhere. 715 */ 716 lastvecindex = -1; 717 for(current = entry; 718 (current != &map->header) && (current->start < end); 719 current = current->next) { 720 721 /* 722 * ignore submaps (for now) or null objects 723 */ 724 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 725 current->object.vm_object == NULL) 726 continue; 727 728 /* 729 * limit this scan to the current map entry and the 730 * limits for the mincore call 731 */ 732 if (addr < current->start) 733 addr = current->start; 734 cend = current->end; 735 if (cend > end) 736 cend = end; 737 738 /* 739 * scan this entry one page at a time 740 */ 741 while(addr < cend) { 742 /* 743 * Check pmap first, it is likely faster, also 744 * it can provide info as to whether we are the 745 * one referencing or modifying the page. 746 */ 747 mincoreinfo = pmap_mincore(pmap, addr); 748 if (!mincoreinfo) { 749 vm_pindex_t pindex; 750 vm_ooffset_t offset; 751 vm_page_t m; 752 /* 753 * calculate the page index into the object 754 */ 755 offset = current->offset + (addr - current->start); 756 pindex = OFF_TO_IDX(offset); 757 m = vm_page_lookup(current->object.vm_object, 758 pindex); 759 /* 760 * if the page is resident, then gather information about 761 * it. 762 */ 763 if (m) { 764 mincoreinfo = MINCORE_INCORE; 765 if (m->dirty || 766 pmap_is_modified(VM_PAGE_TO_PHYS(m))) 767 mincoreinfo |= MINCORE_MODIFIED_OTHER; 768 if ((m->flags & PG_REFERENCED) || 769 pmap_ts_referenced(VM_PAGE_TO_PHYS(m))) { 770 vm_page_flag_set(m, PG_REFERENCED); 771 mincoreinfo |= MINCORE_REFERENCED_OTHER; 772 } 773 } 774 } 775 776 /* 777 * subyte may page fault. In case it needs to modify 778 * the map, we release the lock. 779 */ 780 vm_map_unlock_read(map); 781 782 /* 783 * calculate index into user supplied byte vector 784 */ 785 vecindex = OFF_TO_IDX(addr - first_addr); 786 787 /* 788 * If we have skipped map entries, we need to make sure that 789 * the byte vector is zeroed for those skipped entries. 790 */ 791 while((lastvecindex + 1) < vecindex) { 792 error = subyte( vec + lastvecindex, 0); 793 if (error) { 794 return (EFAULT); 795 } 796 ++lastvecindex; 797 } 798 799 /* 800 * Pass the page information to the user 801 */ 802 error = subyte( vec + vecindex, mincoreinfo); 803 if (error) { 804 return (EFAULT); 805 } 806 807 /* 808 * If the map has changed, due to the subyte, the previous 809 * output may be invalid. 810 */ 811 vm_map_lock_read(map); 812 if (timestamp != map->timestamp) 813 goto RestartScan; 814 815 lastvecindex = vecindex; 816 addr += PAGE_SIZE; 817 } 818 } 819 820 /* 821 * subyte may page fault. In case it needs to modify 822 * the map, we release the lock. 823 */ 824 vm_map_unlock_read(map); 825 826 /* 827 * Zero the last entries in the byte vector. 828 */ 829 vecindex = OFF_TO_IDX(end - first_addr); 830 while((lastvecindex + 1) < vecindex) { 831 error = subyte( vec + lastvecindex, 0); 832 if (error) { 833 return (EFAULT); 834 } 835 ++lastvecindex; 836 } 837 838 /* 839 * If the map has changed, due to the subyte, the previous 840 * output may be invalid. 841 */ 842 vm_map_lock_read(map); 843 if (timestamp != map->timestamp) 844 goto RestartScan; 845 vm_map_unlock_read(map); 846 847 return (0); 848 } 849 850 #ifndef _SYS_SYSPROTO_H_ 851 struct mlock_args { 852 const void *addr; 853 size_t len; 854 }; 855 #endif 856 int 857 mlock(p, uap) 858 struct proc *p; 859 struct mlock_args *uap; 860 { 861 vm_offset_t addr; 862 vm_size_t size, pageoff; 863 int error; 864 865 addr = (vm_offset_t) uap->addr; 866 size = uap->len; 867 868 pageoff = (addr & PAGE_MASK); 869 addr -= pageoff; 870 size += pageoff; 871 size = (vm_size_t) round_page(size); 872 873 /* disable wrap around */ 874 if (addr + size < addr) 875 return (EINVAL); 876 877 if (atop(size) + cnt.v_wire_count > vm_page_max_wired) 878 return (EAGAIN); 879 880 #ifdef pmap_wired_count 881 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 882 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 883 return (ENOMEM); 884 #else 885 error = suser(p); 886 if (error) 887 return (error); 888 #endif 889 890 error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE); 891 return (error == KERN_SUCCESS ? 0 : ENOMEM); 892 } 893 894 #ifndef _SYS_SYSPROTO_H_ 895 struct mlockall_args { 896 int how; 897 }; 898 #endif 899 900 int 901 mlockall(p, uap) 902 struct proc *p; 903 struct mlockall_args *uap; 904 { 905 return 0; 906 } 907 908 #ifndef _SYS_SYSPROTO_H_ 909 struct mlockall_args { 910 int how; 911 }; 912 #endif 913 914 int 915 munlockall(p, uap) 916 struct proc *p; 917 struct munlockall_args *uap; 918 { 919 return 0; 920 } 921 922 #ifndef _SYS_SYSPROTO_H_ 923 struct munlock_args { 924 const void *addr; 925 size_t len; 926 }; 927 #endif 928 int 929 munlock(p, uap) 930 struct proc *p; 931 struct munlock_args *uap; 932 { 933 vm_offset_t addr; 934 vm_size_t size, pageoff; 935 int error; 936 937 addr = (vm_offset_t) uap->addr; 938 size = uap->len; 939 940 pageoff = (addr & PAGE_MASK); 941 addr -= pageoff; 942 size += pageoff; 943 size = (vm_size_t) round_page(size); 944 945 /* disable wrap around */ 946 if (addr + size < addr) 947 return (EINVAL); 948 949 #ifndef pmap_wired_count 950 error = suser(p); 951 if (error) 952 return (error); 953 #endif 954 955 error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE); 956 return (error == KERN_SUCCESS ? 0 : ENOMEM); 957 } 958 959 /* 960 * Internal version of mmap. 961 * Currently used by mmap, exec, and sys5 shared memory. 962 * Handle is either a vnode pointer or NULL for MAP_ANON. 963 */ 964 int 965 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 966 vm_prot_t maxprot, int flags, 967 void *handle, 968 vm_ooffset_t foff) 969 { 970 boolean_t fitit; 971 vm_object_t object; 972 struct vnode *vp = NULL; 973 objtype_t type; 974 int rv = KERN_SUCCESS; 975 vm_ooffset_t objsize; 976 int docow; 977 struct proc *p = curproc; 978 979 if (size == 0) 980 return (0); 981 982 objsize = size = round_page(size); 983 984 /* 985 * We currently can only deal with page aligned file offsets. 986 * The check is here rather than in the syscall because the 987 * kernel calls this function internally for other mmaping 988 * operations (such as in exec) and non-aligned offsets will 989 * cause pmap inconsistencies...so we want to be sure to 990 * disallow this in all cases. 991 */ 992 if (foff & PAGE_MASK) 993 return (EINVAL); 994 995 if ((flags & MAP_FIXED) == 0) { 996 fitit = TRUE; 997 *addr = round_page(*addr); 998 } else { 999 if (*addr != trunc_page(*addr)) 1000 return (EINVAL); 1001 fitit = FALSE; 1002 (void) vm_map_remove(map, *addr, *addr + size); 1003 } 1004 1005 /* 1006 * Lookup/allocate object. 1007 */ 1008 if (flags & MAP_ANON) { 1009 type = OBJT_DEFAULT; 1010 /* 1011 * Unnamed anonymous regions always start at 0. 1012 */ 1013 if (handle == 0) 1014 foff = 0; 1015 } else { 1016 vp = (struct vnode *) handle; 1017 if (vp->v_type == VCHR) { 1018 type = OBJT_DEVICE; 1019 handle = (void *)(intptr_t)vp->v_rdev; 1020 } else { 1021 struct vattr vat; 1022 int error; 1023 1024 error = VOP_GETATTR(vp, &vat, p->p_ucred, p); 1025 if (error) 1026 return (error); 1027 objsize = round_page(vat.va_size); 1028 type = OBJT_VNODE; 1029 } 1030 } 1031 1032 if (handle == NULL) { 1033 object = NULL; 1034 docow = 0; 1035 } else { 1036 object = vm_pager_allocate(type, 1037 handle, objsize, prot, foff); 1038 if (object == NULL) 1039 return (type == OBJT_DEVICE ? EINVAL : ENOMEM); 1040 docow = MAP_PREFAULT_PARTIAL; 1041 } 1042 1043 /* 1044 * Force device mappings to be shared. 1045 */ 1046 if (type == OBJT_DEVICE) { 1047 flags &= ~(MAP_PRIVATE|MAP_COPY); 1048 flags |= MAP_SHARED; 1049 } 1050 1051 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { 1052 docow |= MAP_COPY_ON_WRITE; 1053 } 1054 1055 #if defined(VM_PROT_READ_IS_EXEC) 1056 if (prot & VM_PROT_READ) 1057 prot |= VM_PROT_EXECUTE; 1058 1059 if (maxprot & VM_PROT_READ) 1060 maxprot |= VM_PROT_EXECUTE; 1061 #endif 1062 1063 if (fitit) { 1064 *addr = pmap_addr_hint(object, *addr, size); 1065 } 1066 1067 if (flags & MAP_STACK) 1068 rv = vm_map_stack (map, *addr, size, prot, 1069 maxprot, docow); 1070 else 1071 rv = vm_map_find(map, object, foff, addr, size, fitit, 1072 prot, maxprot, docow); 1073 1074 if (rv != KERN_SUCCESS) { 1075 /* 1076 * Lose the object reference. Will destroy the 1077 * object if it's an unnamed anonymous mapping 1078 * or named anonymous without other references. 1079 */ 1080 vm_object_deallocate(object); 1081 goto out; 1082 } 1083 1084 /* 1085 * Shared memory is also shared with children. 1086 */ 1087 if (flags & (MAP_SHARED|MAP_INHERIT)) { 1088 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 1089 if (rv != KERN_SUCCESS) { 1090 (void) vm_map_remove(map, *addr, *addr + size); 1091 goto out; 1092 } 1093 } 1094 out: 1095 switch (rv) { 1096 case KERN_SUCCESS: 1097 return (0); 1098 case KERN_INVALID_ADDRESS: 1099 case KERN_NO_SPACE: 1100 return (ENOMEM); 1101 case KERN_PROTECTION_FAILURE: 1102 return (EACCES); 1103 default: 1104 return (EINVAL); 1105 } 1106 } 1107