1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * 40 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 41 * $Id: vm_mmap.c,v 1.85 1998/12/09 20:22:21 dt Exp $ 42 */ 43 44 /* 45 * Mapped file (mmap) interface to VM 46 */ 47 48 #include "opt_compat.h" 49 #include "opt_rlimit.h" 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/sysproto.h> 54 #include <sys/filedesc.h> 55 #include <sys/proc.h> 56 #include <sys/vnode.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/mman.h> 60 #include <sys/conf.h> 61 #include <sys/stat.h> 62 #include <sys/vmmeter.h> 63 64 #include <miscfs/specfs/specdev.h> 65 66 #include <vm/vm.h> 67 #include <vm/vm_param.h> 68 #include <vm/vm_prot.h> 69 #include <vm/vm_inherit.h> 70 #include <sys/lock.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_pager.h> 75 #include <vm/vm_pageout.h> 76 #include <vm/vm_extern.h> 77 #include <vm/vm_page.h> 78 79 #ifndef _SYS_SYSPROTO_H_ 80 struct sbrk_args { 81 int incr; 82 }; 83 #endif 84 85 /* ARGSUSED */ 86 int 87 sbrk(p, uap) 88 struct proc *p; 89 struct sbrk_args *uap; 90 { 91 92 /* Not yet implemented */ 93 return (EOPNOTSUPP); 94 } 95 96 #ifndef _SYS_SYSPROTO_H_ 97 struct sstk_args { 98 int incr; 99 }; 100 #endif 101 102 /* ARGSUSED */ 103 int 104 sstk(p, uap) 105 struct proc *p; 106 struct sstk_args *uap; 107 { 108 109 /* Not yet implemented */ 110 return (EOPNOTSUPP); 111 } 112 113 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 114 #ifndef _SYS_SYSPROTO_H_ 115 struct getpagesize_args { 116 int dummy; 117 }; 118 #endif 119 120 /* ARGSUSED */ 121 int 122 ogetpagesize(p, uap) 123 struct proc *p; 124 struct getpagesize_args *uap; 125 { 126 127 p->p_retval[0] = PAGE_SIZE; 128 return (0); 129 } 130 #endif /* COMPAT_43 || COMPAT_SUNOS */ 131 132 133 /* 134 * Memory Map (mmap) system call. Note that the file offset 135 * and address are allowed to be NOT page aligned, though if 136 * the MAP_FIXED flag it set, both must have the same remainder 137 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 138 * page-aligned, the actual mapping starts at trunc_page(addr) 139 * and the return value is adjusted up by the page offset. 140 */ 141 #ifndef _SYS_SYSPROTO_H_ 142 struct mmap_args { 143 void *addr; 144 size_t len; 145 int prot; 146 int flags; 147 int fd; 148 long pad; 149 off_t pos; 150 }; 151 #endif 152 153 int 154 mmap(p, uap) 155 struct proc *p; 156 register struct mmap_args *uap; 157 { 158 register struct filedesc *fdp = p->p_fd; 159 register struct file *fp; 160 struct vnode *vp; 161 vm_offset_t addr; 162 vm_size_t size, pageoff; 163 vm_prot_t prot, maxprot; 164 void *handle; 165 int flags, error; 166 int disablexworkaround; 167 off_t pos; 168 169 addr = (vm_offset_t) uap->addr; 170 size = uap->len; 171 prot = uap->prot & VM_PROT_ALL; 172 flags = uap->flags; 173 pos = uap->pos; 174 175 /* make sure mapping fits into numeric range etc */ 176 if ((ssize_t) uap->len < 0 || 177 ((flags & MAP_ANON) && uap->fd != -1)) 178 return (EINVAL); 179 180 #ifdef VM_STACK 181 if (flags & MAP_STACK) { 182 if ((uap->fd != -1) || 183 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 184 return (EINVAL); 185 flags |= MAP_ANON; 186 pos = 0; 187 } 188 #endif 189 /* 190 * Align the file position to a page boundary, 191 * and save its page offset component. 192 */ 193 pageoff = (pos & PAGE_MASK); 194 pos -= pageoff; 195 196 /* Adjust size for rounding (on both ends). */ 197 size += pageoff; /* low end... */ 198 size = (vm_size_t) round_page(size); /* hi end */ 199 200 /* 201 * Check for illegal addresses. Watch out for address wrap... Note 202 * that VM_*_ADDRESS are not constants due to casts (argh). 203 */ 204 if (flags & MAP_FIXED) { 205 /* 206 * The specified address must have the same remainder 207 * as the file offset taken modulo PAGE_SIZE, so it 208 * should be aligned after adjustment by pageoff. 209 */ 210 addr -= pageoff; 211 if (addr & PAGE_MASK) 212 return (EINVAL); 213 /* Address range must be all in user VM space. */ 214 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 215 return (EINVAL); 216 #ifndef i386 217 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 218 return (EINVAL); 219 #endif 220 if (addr + size < addr) 221 return (EINVAL); 222 } 223 /* 224 * XXX for non-fixed mappings where no hint is provided or 225 * the hint would fall in the potential heap space, 226 * place it after the end of the largest possible heap. 227 * 228 * There should really be a pmap call to determine a reasonable 229 * location. 230 */ 231 else if (addr < round_page((vm_offset_t)p->p_vmspace->vm_daddr + MAXDSIZ)) 232 addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr + MAXDSIZ); 233 234 if (flags & MAP_ANON) { 235 /* 236 * Mapping blank space is trivial. 237 */ 238 handle = NULL; 239 maxprot = VM_PROT_ALL; 240 pos = 0; 241 } else { 242 /* 243 * Mapping file, get fp for validation. Obtain vnode and make 244 * sure it is of appropriate type. 245 */ 246 if (((unsigned) uap->fd) >= fdp->fd_nfiles || 247 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 248 return (EBADF); 249 if (fp->f_type != DTYPE_VNODE) 250 return (EINVAL); 251 vp = (struct vnode *) fp->f_data; 252 if (vp->v_type != VREG && vp->v_type != VCHR) 253 return (EINVAL); 254 /* 255 * XXX hack to handle use of /dev/zero to map anon memory (ala 256 * SunOS). 257 */ 258 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 259 handle = NULL; 260 maxprot = VM_PROT_ALL; 261 flags |= MAP_ANON; 262 pos = 0; 263 } else { 264 /* 265 * cdevs does not provide private mappings of any kind. 266 */ 267 /* 268 * However, for XIG X server to continue to work, 269 * we should allow the superuser to do it anyway. 270 * We only allow it at securelevel < 1. 271 * (Because the XIG X server writes directly to video 272 * memory via /dev/mem, it should never work at any 273 * other securelevel. 274 * XXX this will have to go 275 */ 276 if (securelevel >= 1) 277 disablexworkaround = 1; 278 else 279 disablexworkaround = suser(p->p_ucred, 280 &p->p_acflag); 281 if (vp->v_type == VCHR && disablexworkaround && 282 (flags & (MAP_PRIVATE|MAP_COPY))) 283 return (EINVAL); 284 /* 285 * Ensure that file and memory protections are 286 * compatible. Note that we only worry about 287 * writability if mapping is shared; in this case, 288 * current and max prot are dictated by the open file. 289 * XXX use the vnode instead? Problem is: what 290 * credentials do we use for determination? What if 291 * proc does a setuid? 292 */ 293 maxprot = VM_PROT_EXECUTE; /* ??? */ 294 if (fp->f_flag & FREAD) 295 maxprot |= VM_PROT_READ; 296 else if (prot & PROT_READ) 297 return (EACCES); 298 /* 299 * If we are sharing potential changes (either via 300 * MAP_SHARED or via the implicit sharing of character 301 * device mappings), and we are trying to get write 302 * permission although we opened it without asking 303 * for it, bail out. Check for superuser, only if 304 * we're at securelevel < 1, to allow the XIG X server 305 * to continue to work. 306 */ 307 308 if ((flags & MAP_SHARED) != 0 || 309 (vp->v_type == VCHR && disablexworkaround)) { 310 if ((fp->f_flag & FWRITE) != 0) { 311 struct vattr va; 312 if ((error = 313 VOP_GETATTR(vp, &va, 314 p->p_ucred, p))) 315 return (error); 316 if ((va.va_flags & 317 (IMMUTABLE|APPEND)) == 0) 318 maxprot |= VM_PROT_WRITE; 319 else if (prot & PROT_WRITE) 320 return (EPERM); 321 } else if ((prot & PROT_WRITE) != 0) 322 return (EACCES); 323 } else 324 maxprot |= VM_PROT_WRITE; 325 326 handle = (void *)vp; 327 } 328 } 329 error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 330 flags, handle, pos); 331 if (error == 0) 332 p->p_retval[0] = (register_t) (addr + pageoff); 333 return (error); 334 } 335 336 #ifdef COMPAT_43 337 #ifndef _SYS_SYSPROTO_H_ 338 struct ommap_args { 339 caddr_t addr; 340 int len; 341 int prot; 342 int flags; 343 int fd; 344 long pos; 345 }; 346 #endif 347 int 348 ommap(p, uap) 349 struct proc *p; 350 register struct ommap_args *uap; 351 { 352 struct mmap_args nargs; 353 static const char cvtbsdprot[8] = { 354 0, 355 PROT_EXEC, 356 PROT_WRITE, 357 PROT_EXEC | PROT_WRITE, 358 PROT_READ, 359 PROT_EXEC | PROT_READ, 360 PROT_WRITE | PROT_READ, 361 PROT_EXEC | PROT_WRITE | PROT_READ, 362 }; 363 364 #define OMAP_ANON 0x0002 365 #define OMAP_COPY 0x0020 366 #define OMAP_SHARED 0x0010 367 #define OMAP_FIXED 0x0100 368 #define OMAP_INHERIT 0x0800 369 370 nargs.addr = uap->addr; 371 nargs.len = uap->len; 372 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 373 nargs.flags = 0; 374 if (uap->flags & OMAP_ANON) 375 nargs.flags |= MAP_ANON; 376 if (uap->flags & OMAP_COPY) 377 nargs.flags |= MAP_COPY; 378 if (uap->flags & OMAP_SHARED) 379 nargs.flags |= MAP_SHARED; 380 else 381 nargs.flags |= MAP_PRIVATE; 382 if (uap->flags & OMAP_FIXED) 383 nargs.flags |= MAP_FIXED; 384 if (uap->flags & OMAP_INHERIT) 385 nargs.flags |= MAP_INHERIT; 386 nargs.fd = uap->fd; 387 nargs.pos = uap->pos; 388 return (mmap(p, &nargs)); 389 } 390 #endif /* COMPAT_43 */ 391 392 393 #ifndef _SYS_SYSPROTO_H_ 394 struct msync_args { 395 void *addr; 396 int len; 397 int flags; 398 }; 399 #endif 400 int 401 msync(p, uap) 402 struct proc *p; 403 struct msync_args *uap; 404 { 405 vm_offset_t addr; 406 vm_size_t size, pageoff; 407 int flags; 408 vm_map_t map; 409 int rv; 410 411 addr = (vm_offset_t) uap->addr; 412 size = uap->len; 413 flags = uap->flags; 414 415 pageoff = (addr & PAGE_MASK); 416 addr -= pageoff; 417 size += pageoff; 418 size = (vm_size_t) round_page(size); 419 if (addr + size < addr) 420 return(EINVAL); 421 422 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 423 return (EINVAL); 424 425 map = &p->p_vmspace->vm_map; 426 427 /* 428 * XXX Gak! If size is zero we are supposed to sync "all modified 429 * pages with the region containing addr". Unfortunately, we don't 430 * really keep track of individual mmaps so we approximate by flushing 431 * the range of the map entry containing addr. This can be incorrect 432 * if the region splits or is coalesced with a neighbor. 433 */ 434 if (size == 0) { 435 vm_map_entry_t entry; 436 437 vm_map_lock_read(map); 438 rv = vm_map_lookup_entry(map, addr, &entry); 439 vm_map_unlock_read(map); 440 if (rv == FALSE) 441 return (EINVAL); 442 addr = entry->start; 443 size = entry->end - entry->start; 444 } 445 446 /* 447 * Clean the pages and interpret the return value. 448 */ 449 rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, 450 (flags & MS_INVALIDATE) != 0); 451 452 switch (rv) { 453 case KERN_SUCCESS: 454 break; 455 case KERN_INVALID_ADDRESS: 456 return (EINVAL); /* Sun returns ENOMEM? */ 457 case KERN_FAILURE: 458 return (EIO); 459 default: 460 return (EINVAL); 461 } 462 463 return (0); 464 } 465 466 #ifndef _SYS_SYSPROTO_H_ 467 struct munmap_args { 468 void *addr; 469 size_t len; 470 }; 471 #endif 472 int 473 munmap(p, uap) 474 register struct proc *p; 475 register struct munmap_args *uap; 476 { 477 vm_offset_t addr; 478 vm_size_t size, pageoff; 479 vm_map_t map; 480 481 addr = (vm_offset_t) uap->addr; 482 size = uap->len; 483 484 pageoff = (addr & PAGE_MASK); 485 addr -= pageoff; 486 size += pageoff; 487 size = (vm_size_t) round_page(size); 488 if (addr + size < addr) 489 return(EINVAL); 490 491 if (size == 0) 492 return (0); 493 494 /* 495 * Check for illegal addresses. Watch out for address wrap... Note 496 * that VM_*_ADDRESS are not constants due to casts (argh). 497 */ 498 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 499 return (EINVAL); 500 #ifndef i386 501 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 502 return (EINVAL); 503 #endif 504 map = &p->p_vmspace->vm_map; 505 /* 506 * Make sure entire range is allocated. 507 */ 508 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 509 return (EINVAL); 510 /* returns nothing but KERN_SUCCESS anyway */ 511 (void) vm_map_remove(map, addr, addr + size); 512 return (0); 513 } 514 515 void 516 munmapfd(p, fd) 517 struct proc *p; 518 int fd; 519 { 520 /* 521 * XXX should unmap any regions mapped to this file 522 */ 523 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; 524 } 525 526 #ifndef _SYS_SYSPROTO_H_ 527 struct mprotect_args { 528 const void *addr; 529 size_t len; 530 int prot; 531 }; 532 #endif 533 int 534 mprotect(p, uap) 535 struct proc *p; 536 struct mprotect_args *uap; 537 { 538 vm_offset_t addr; 539 vm_size_t size, pageoff; 540 register vm_prot_t prot; 541 542 addr = (vm_offset_t) uap->addr; 543 size = uap->len; 544 prot = uap->prot & VM_PROT_ALL; 545 #if defined(VM_PROT_READ_IS_EXEC) 546 if (prot & VM_PROT_READ) 547 prot |= VM_PROT_EXECUTE; 548 #endif 549 550 pageoff = (addr & PAGE_MASK); 551 addr -= pageoff; 552 size += pageoff; 553 size = (vm_size_t) round_page(size); 554 if (addr + size < addr) 555 return(EINVAL); 556 557 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, 558 FALSE)) { 559 case KERN_SUCCESS: 560 return (0); 561 case KERN_PROTECTION_FAILURE: 562 return (EACCES); 563 } 564 return (EINVAL); 565 } 566 567 #ifndef _SYS_SYSPROTO_H_ 568 struct minherit_args { 569 void *addr; 570 size_t len; 571 int inherit; 572 }; 573 #endif 574 int 575 minherit(p, uap) 576 struct proc *p; 577 struct minherit_args *uap; 578 { 579 vm_offset_t addr; 580 vm_size_t size, pageoff; 581 register vm_inherit_t inherit; 582 583 addr = (vm_offset_t)uap->addr; 584 size = uap->len; 585 inherit = uap->inherit; 586 587 pageoff = (addr & PAGE_MASK); 588 addr -= pageoff; 589 size += pageoff; 590 size = (vm_size_t) round_page(size); 591 if (addr + size < addr) 592 return(EINVAL); 593 594 switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, 595 inherit)) { 596 case KERN_SUCCESS: 597 return (0); 598 case KERN_PROTECTION_FAILURE: 599 return (EACCES); 600 } 601 return (EINVAL); 602 } 603 604 #ifndef _SYS_SYSPROTO_H_ 605 struct madvise_args { 606 void *addr; 607 size_t len; 608 int behav; 609 }; 610 #endif 611 612 /* ARGSUSED */ 613 int 614 madvise(p, uap) 615 struct proc *p; 616 struct madvise_args *uap; 617 { 618 vm_map_t map; 619 pmap_t pmap; 620 vm_offset_t start, end; 621 /* 622 * Check for illegal addresses. Watch out for address wrap... Note 623 * that VM_*_ADDRESS are not constants due to casts (argh). 624 */ 625 if (VM_MAXUSER_ADDRESS > 0 && 626 ((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS) 627 return (EINVAL); 628 #ifndef i386 629 if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS) 630 return (EINVAL); 631 #endif 632 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 633 return (EINVAL); 634 635 /* 636 * Since this routine is only advisory, we default to conservative 637 * behavior. 638 */ 639 start = trunc_page((vm_offset_t) uap->addr); 640 end = round_page((vm_offset_t) uap->addr + uap->len); 641 642 map = &p->p_vmspace->vm_map; 643 pmap = &p->p_vmspace->vm_pmap; 644 645 vm_map_madvise(map, pmap, start, end, uap->behav); 646 647 return (0); 648 } 649 650 #ifndef _SYS_SYSPROTO_H_ 651 struct mincore_args { 652 const void *addr; 653 size_t len; 654 char *vec; 655 }; 656 #endif 657 658 /* ARGSUSED */ 659 int 660 mincore(p, uap) 661 struct proc *p; 662 struct mincore_args *uap; 663 { 664 vm_offset_t addr, first_addr; 665 vm_offset_t end, cend; 666 pmap_t pmap; 667 vm_map_t map; 668 char *vec; 669 int error; 670 int vecindex, lastvecindex; 671 register vm_map_entry_t current; 672 vm_map_entry_t entry; 673 int mincoreinfo; 674 675 /* 676 * Make sure that the addresses presented are valid for user 677 * mode. 678 */ 679 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 680 end = addr + (vm_size_t)round_page(uap->len); 681 if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS) 682 return (EINVAL); 683 if (end < addr) 684 return (EINVAL); 685 686 /* 687 * Address of byte vector 688 */ 689 vec = uap->vec; 690 691 map = &p->p_vmspace->vm_map; 692 pmap = &p->p_vmspace->vm_pmap; 693 694 vm_map_lock(map); 695 696 if (!vm_map_lookup_entry(map, addr, &entry)) 697 entry = entry->next; 698 699 /* 700 * Do this on a map entry basis so that if the pages are not 701 * in the current processes address space, we can easily look 702 * up the pages elsewhere. 703 */ 704 lastvecindex = -1; 705 for(current = entry; 706 (current != &map->header) && (current->start < end); 707 current = current->next) { 708 709 /* 710 * ignore submaps (for now) or null objects 711 */ 712 if ((current->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) || 713 current->object.vm_object == NULL) 714 continue; 715 716 /* 717 * limit this scan to the current map entry and the 718 * limits for the mincore call 719 */ 720 if (addr < current->start) 721 addr = current->start; 722 cend = current->end; 723 if (cend > end) 724 cend = end; 725 726 /* 727 * scan this entry one page at a time 728 */ 729 while(addr < cend) { 730 /* 731 * Check pmap first, it is likely faster, also 732 * it can provide info as to whether we are the 733 * one referencing or modifying the page. 734 */ 735 mincoreinfo = pmap_mincore(pmap, addr); 736 if (!mincoreinfo) { 737 vm_pindex_t pindex; 738 vm_ooffset_t offset; 739 vm_page_t m; 740 /* 741 * calculate the page index into the object 742 */ 743 offset = current->offset + (addr - current->start); 744 pindex = OFF_TO_IDX(offset); 745 m = vm_page_lookup(current->object.vm_object, 746 pindex); 747 /* 748 * if the page is resident, then gather information about 749 * it. 750 */ 751 if (m) { 752 mincoreinfo = MINCORE_INCORE; 753 if (m->dirty || 754 pmap_is_modified(VM_PAGE_TO_PHYS(m))) 755 mincoreinfo |= MINCORE_MODIFIED_OTHER; 756 if ((m->flags & PG_REFERENCED) || 757 pmap_ts_referenced(VM_PAGE_TO_PHYS(m))) { 758 vm_page_flag_set(m, PG_REFERENCED); 759 mincoreinfo |= MINCORE_REFERENCED_OTHER; 760 } 761 } 762 } 763 764 /* 765 * calculate index into user supplied byte vector 766 */ 767 vecindex = OFF_TO_IDX(addr - first_addr); 768 769 /* 770 * If we have skipped map entries, we need to make sure that 771 * the byte vector is zeroed for those skipped entries. 772 */ 773 while((lastvecindex + 1) < vecindex) { 774 error = subyte( vec + lastvecindex, 0); 775 if (error) { 776 vm_map_unlock(map); 777 return (EFAULT); 778 } 779 ++lastvecindex; 780 } 781 782 /* 783 * Pass the page information to the user 784 */ 785 error = subyte( vec + vecindex, mincoreinfo); 786 if (error) { 787 vm_map_unlock(map); 788 return (EFAULT); 789 } 790 lastvecindex = vecindex; 791 addr += PAGE_SIZE; 792 } 793 } 794 795 /* 796 * Zero the last entries in the byte vector. 797 */ 798 vecindex = OFF_TO_IDX(end - first_addr); 799 while((lastvecindex + 1) < vecindex) { 800 error = subyte( vec + lastvecindex, 0); 801 if (error) { 802 vm_map_unlock(map); 803 return (EFAULT); 804 } 805 ++lastvecindex; 806 } 807 808 vm_map_unlock(map); 809 return (0); 810 } 811 812 #ifndef _SYS_SYSPROTO_H_ 813 struct mlock_args { 814 const void *addr; 815 size_t len; 816 }; 817 #endif 818 int 819 mlock(p, uap) 820 struct proc *p; 821 struct mlock_args *uap; 822 { 823 vm_offset_t addr; 824 vm_size_t size, pageoff; 825 int error; 826 827 addr = (vm_offset_t) uap->addr; 828 size = uap->len; 829 830 pageoff = (addr & PAGE_MASK); 831 addr -= pageoff; 832 size += pageoff; 833 size = (vm_size_t) round_page(size); 834 835 /* disable wrap around */ 836 if (addr + size < addr) 837 return (EINVAL); 838 839 if (atop(size) + cnt.v_wire_count > vm_page_max_wired) 840 return (EAGAIN); 841 842 #ifdef pmap_wired_count 843 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 844 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 845 return (ENOMEM); 846 #else 847 error = suser(p->p_ucred, &p->p_acflag); 848 if (error) 849 return (error); 850 #endif 851 852 error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE); 853 return (error == KERN_SUCCESS ? 0 : ENOMEM); 854 } 855 856 #ifndef _SYS_SYSPROTO_H_ 857 struct mlockall_args { 858 int how; 859 }; 860 #endif 861 862 int 863 mlockall(p, uap) 864 struct proc *p; 865 struct mlockall_args *uap; 866 { 867 return 0; 868 } 869 870 #ifndef _SYS_SYSPROTO_H_ 871 struct mlockall_args { 872 int how; 873 }; 874 #endif 875 876 int 877 munlockall(p, uap) 878 struct proc *p; 879 struct munlockall_args *uap; 880 { 881 return 0; 882 } 883 884 #ifndef _SYS_SYSPROTO_H_ 885 struct munlock_args { 886 const void *addr; 887 size_t len; 888 }; 889 #endif 890 int 891 munlock(p, uap) 892 struct proc *p; 893 struct munlock_args *uap; 894 { 895 vm_offset_t addr; 896 vm_size_t size, pageoff; 897 int error; 898 899 addr = (vm_offset_t) uap->addr; 900 size = uap->len; 901 902 pageoff = (addr & PAGE_MASK); 903 addr -= pageoff; 904 size += pageoff; 905 size = (vm_size_t) round_page(size); 906 907 /* disable wrap around */ 908 if (addr + size < addr) 909 return (EINVAL); 910 911 #ifndef pmap_wired_count 912 error = suser(p->p_ucred, &p->p_acflag); 913 if (error) 914 return (error); 915 #endif 916 917 error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE); 918 return (error == KERN_SUCCESS ? 0 : ENOMEM); 919 } 920 921 /* 922 * Internal version of mmap. 923 * Currently used by mmap, exec, and sys5 shared memory. 924 * Handle is either a vnode pointer or NULL for MAP_ANON. 925 */ 926 int 927 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 928 vm_prot_t maxprot, int flags, 929 void *handle, 930 vm_ooffset_t foff) 931 { 932 boolean_t fitit; 933 vm_object_t object; 934 struct vnode *vp = NULL; 935 objtype_t type; 936 int rv = KERN_SUCCESS; 937 vm_ooffset_t objsize; 938 int docow; 939 struct proc *p = curproc; 940 941 if (size == 0) 942 return (0); 943 944 objsize = size = round_page(size); 945 946 /* 947 * We currently can only deal with page aligned file offsets. 948 * The check is here rather than in the syscall because the 949 * kernel calls this function internally for other mmaping 950 * operations (such as in exec) and non-aligned offsets will 951 * cause pmap inconsistencies...so we want to be sure to 952 * disallow this in all cases. 953 */ 954 if (foff & PAGE_MASK) 955 return (EINVAL); 956 957 if ((flags & MAP_FIXED) == 0) { 958 fitit = TRUE; 959 *addr = round_page(*addr); 960 } else { 961 if (*addr != trunc_page(*addr)) 962 return (EINVAL); 963 fitit = FALSE; 964 (void) vm_map_remove(map, *addr, *addr + size); 965 } 966 967 /* 968 * Lookup/allocate object. 969 */ 970 if (flags & MAP_ANON) { 971 type = OBJT_DEFAULT; 972 /* 973 * Unnamed anonymous regions always start at 0. 974 */ 975 if (handle == 0) 976 foff = 0; 977 } else { 978 vp = (struct vnode *) handle; 979 if (vp->v_type == VCHR) { 980 type = OBJT_DEVICE; 981 handle = (void *)(intptr_t)vp->v_rdev; 982 } else { 983 struct vattr vat; 984 int error; 985 986 error = VOP_GETATTR(vp, &vat, p->p_ucred, p); 987 if (error) 988 return (error); 989 objsize = round_page(vat.va_size); 990 type = OBJT_VNODE; 991 } 992 } 993 994 if (handle == NULL) { 995 object = NULL; 996 } else { 997 object = vm_pager_allocate(type, 998 handle, objsize, prot, foff); 999 if (object == NULL) 1000 return (type == OBJT_DEVICE ? EINVAL : ENOMEM); 1001 } 1002 1003 /* 1004 * Force device mappings to be shared. 1005 */ 1006 if (type == OBJT_DEVICE) { 1007 flags &= ~(MAP_PRIVATE|MAP_COPY); 1008 flags |= MAP_SHARED; 1009 } 1010 1011 docow = 0; 1012 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { 1013 docow = MAP_COPY_ON_WRITE | MAP_COPY_NEEDED; 1014 } 1015 1016 #if defined(VM_PROT_READ_IS_EXEC) 1017 if (prot & VM_PROT_READ) 1018 prot |= VM_PROT_EXECUTE; 1019 1020 if (maxprot & VM_PROT_READ) 1021 maxprot |= VM_PROT_EXECUTE; 1022 #endif 1023 1024 if (fitit) { 1025 *addr = pmap_addr_hint(object, *addr, size); 1026 } 1027 1028 #ifdef VM_STACK 1029 if (flags & MAP_STACK) 1030 rv = vm_map_stack (map, *addr, size, prot, 1031 maxprot, docow); 1032 else 1033 #endif 1034 rv = vm_map_find(map, object, foff, addr, size, fitit, 1035 prot, maxprot, docow); 1036 1037 if (rv != KERN_SUCCESS) { 1038 /* 1039 * Lose the object reference. Will destroy the 1040 * object if it's an unnamed anonymous mapping 1041 * or named anonymous without other references. 1042 */ 1043 vm_object_deallocate(object); 1044 goto out; 1045 } 1046 1047 /* 1048 * "Pre-fault" resident pages. 1049 */ 1050 if ((map->pmap != NULL) && (object != NULL)) { 1051 pmap_object_init_pt(map->pmap, *addr, 1052 object, (vm_pindex_t) OFF_TO_IDX(foff), size, 1); 1053 } 1054 1055 /* 1056 * Shared memory is also shared with children. 1057 */ 1058 if (flags & (MAP_SHARED|MAP_INHERIT)) { 1059 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 1060 if (rv != KERN_SUCCESS) { 1061 (void) vm_map_remove(map, *addr, *addr + size); 1062 goto out; 1063 } 1064 } 1065 out: 1066 switch (rv) { 1067 case KERN_SUCCESS: 1068 return (0); 1069 case KERN_INVALID_ADDRESS: 1070 case KERN_NO_SPACE: 1071 return (ENOMEM); 1072 case KERN_PROTECTION_FAILURE: 1073 return (EACCES); 1074 default: 1075 return (EINVAL); 1076 } 1077 } 1078