1 /*- 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35 * 36 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37 */ 38 39 /* 40 * Mapped file (mmap) interface to VM 41 */ 42 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include "opt_compat.h" 47 #include "opt_hwpmc_hooks.h" 48 #include "opt_vm.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/capsicum.h> 53 #include <sys/kernel.h> 54 #include <sys/lock.h> 55 #include <sys/mutex.h> 56 #include <sys/sysproto.h> 57 #include <sys/filedesc.h> 58 #include <sys/priv.h> 59 #include <sys/proc.h> 60 #include <sys/procctl.h> 61 #include <sys/racct.h> 62 #include <sys/resource.h> 63 #include <sys/resourcevar.h> 64 #include <sys/rwlock.h> 65 #include <sys/sysctl.h> 66 #include <sys/vnode.h> 67 #include <sys/fcntl.h> 68 #include <sys/file.h> 69 #include <sys/mman.h> 70 #include <sys/mount.h> 71 #include <sys/conf.h> 72 #include <sys/stat.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysent.h> 75 #include <sys/vmmeter.h> 76 77 #include <security/mac/mac_framework.h> 78 79 #include <vm/vm.h> 80 #include <vm/vm_param.h> 81 #include <vm/pmap.h> 82 #include <vm/vm_map.h> 83 #include <vm/vm_object.h> 84 #include <vm/vm_page.h> 85 #include <vm/vm_pager.h> 86 #include <vm/vm_pageout.h> 87 #include <vm/vm_extern.h> 88 #include <vm/vm_page.h> 89 #include <vm/vnode_pager.h> 90 91 #ifdef HWPMC_HOOKS 92 #include <sys/pmckern.h> 93 #endif 94 95 int old_mlock = 0; 96 SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0, 97 "Do not apply RLIMIT_MEMLOCK on mlockall"); 98 99 #ifdef MAP_32BIT 100 #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) 101 #endif 102 103 #ifndef _SYS_SYSPROTO_H_ 104 struct sbrk_args { 105 int incr; 106 }; 107 #endif 108 109 /* 110 * MPSAFE 111 */ 112 /* ARGSUSED */ 113 int 114 sys_sbrk(td, uap) 115 struct thread *td; 116 struct sbrk_args *uap; 117 { 118 /* Not yet implemented */ 119 return (EOPNOTSUPP); 120 } 121 122 #ifndef _SYS_SYSPROTO_H_ 123 struct sstk_args { 124 int incr; 125 }; 126 #endif 127 128 /* 129 * MPSAFE 130 */ 131 /* ARGSUSED */ 132 int 133 sys_sstk(td, uap) 134 struct thread *td; 135 struct sstk_args *uap; 136 { 137 /* Not yet implemented */ 138 return (EOPNOTSUPP); 139 } 140 141 #if defined(COMPAT_43) 142 #ifndef _SYS_SYSPROTO_H_ 143 struct getpagesize_args { 144 int dummy; 145 }; 146 #endif 147 148 int 149 ogetpagesize(td, uap) 150 struct thread *td; 151 struct getpagesize_args *uap; 152 { 153 /* MP SAFE */ 154 td->td_retval[0] = PAGE_SIZE; 155 return (0); 156 } 157 #endif /* COMPAT_43 */ 158 159 160 /* 161 * Memory Map (mmap) system call. Note that the file offset 162 * and address are allowed to be NOT page aligned, though if 163 * the MAP_FIXED flag it set, both must have the same remainder 164 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 165 * page-aligned, the actual mapping starts at trunc_page(addr) 166 * and the return value is adjusted up by the page offset. 167 * 168 * Generally speaking, only character devices which are themselves 169 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 170 * there would be no cache coherency between a descriptor and a VM mapping 171 * both to the same character device. 172 */ 173 #ifndef _SYS_SYSPROTO_H_ 174 struct mmap_args { 175 void *addr; 176 size_t len; 177 int prot; 178 int flags; 179 int fd; 180 long pad; 181 off_t pos; 182 }; 183 #endif 184 185 /* 186 * MPSAFE 187 */ 188 int 189 sys_mmap(td, uap) 190 struct thread *td; 191 struct mmap_args *uap; 192 { 193 struct file *fp; 194 vm_offset_t addr; 195 vm_size_t size, pageoff; 196 vm_prot_t cap_maxprot; 197 int align, error, flags, prot; 198 off_t pos; 199 struct vmspace *vms = td->td_proc->p_vmspace; 200 cap_rights_t rights; 201 202 addr = (vm_offset_t) uap->addr; 203 size = uap->len; 204 prot = uap->prot; 205 flags = uap->flags; 206 pos = uap->pos; 207 208 fp = NULL; 209 210 /* 211 * Ignore old flags that used to be defined but did not do anything. 212 */ 213 flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040); 214 215 /* 216 * Enforce the constraints. 217 * Mapping of length 0 is only allowed for old binaries. 218 * Anonymous mapping shall specify -1 as filedescriptor and 219 * zero position for new code. Be nice to ancient a.out 220 * binaries and correct pos for anonymous mapping, since old 221 * ld.so sometimes issues anonymous map requests with non-zero 222 * pos. 223 */ 224 if (!SV_CURPROC_FLAG(SV_AOUT)) { 225 if ((uap->len == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) || 226 ((flags & MAP_ANON) != 0 && (uap->fd != -1 || pos != 0))) 227 return (EINVAL); 228 } else { 229 if ((flags & MAP_ANON) != 0) 230 pos = 0; 231 } 232 233 if (flags & MAP_STACK) { 234 if ((uap->fd != -1) || 235 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 236 return (EINVAL); 237 flags |= MAP_ANON; 238 pos = 0; 239 } 240 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE | 241 MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE | 242 MAP_PREFAULT_READ | 243 #ifdef MAP_32BIT 244 MAP_32BIT | 245 #endif 246 MAP_ALIGNMENT_MASK)) != 0) 247 return (EINVAL); 248 if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL) 249 return (EINVAL); 250 if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED | MAP_PRIVATE)) 251 return (EINVAL); 252 if (prot != PROT_NONE && 253 (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) 254 return (EINVAL); 255 256 /* 257 * Align the file position to a page boundary, 258 * and save its page offset component. 259 */ 260 pageoff = (pos & PAGE_MASK); 261 pos -= pageoff; 262 263 /* Adjust size for rounding (on both ends). */ 264 size += pageoff; /* low end... */ 265 size = (vm_size_t) round_page(size); /* hi end */ 266 267 /* Ensure alignment is at least a page and fits in a pointer. */ 268 align = flags & MAP_ALIGNMENT_MASK; 269 if (align != 0 && align != MAP_ALIGNED_SUPER && 270 (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY || 271 align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT)) 272 return (EINVAL); 273 274 /* 275 * Check for illegal addresses. Watch out for address wrap... Note 276 * that VM_*_ADDRESS are not constants due to casts (argh). 277 */ 278 if (flags & MAP_FIXED) { 279 /* 280 * The specified address must have the same remainder 281 * as the file offset taken modulo PAGE_SIZE, so it 282 * should be aligned after adjustment by pageoff. 283 */ 284 addr -= pageoff; 285 if (addr & PAGE_MASK) 286 return (EINVAL); 287 288 /* Address range must be all in user VM space. */ 289 if (addr < vm_map_min(&vms->vm_map) || 290 addr + size > vm_map_max(&vms->vm_map)) 291 return (EINVAL); 292 if (addr + size < addr) 293 return (EINVAL); 294 #ifdef MAP_32BIT 295 if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR) 296 return (EINVAL); 297 } else if (flags & MAP_32BIT) { 298 /* 299 * For MAP_32BIT, override the hint if it is too high and 300 * do not bother moving the mapping past the heap (since 301 * the heap is usually above 2GB). 302 */ 303 if (addr + size > MAP_32BIT_MAX_ADDR) 304 addr = 0; 305 #endif 306 } else { 307 /* 308 * XXX for non-fixed mappings where no hint is provided or 309 * the hint would fall in the potential heap space, 310 * place it after the end of the largest possible heap. 311 * 312 * There should really be a pmap call to determine a reasonable 313 * location. 314 */ 315 if (addr == 0 || 316 (addr >= round_page((vm_offset_t)vms->vm_taddr) && 317 addr < round_page((vm_offset_t)vms->vm_daddr + 318 lim_max(td, RLIMIT_DATA)))) 319 addr = round_page((vm_offset_t)vms->vm_daddr + 320 lim_max(td, RLIMIT_DATA)); 321 } 322 if (size == 0) { 323 /* 324 * Return success without mapping anything for old 325 * binaries that request a page-aligned mapping of 326 * length 0. For modern binaries, this function 327 * returns an error earlier. 328 */ 329 error = 0; 330 } else if (flags & MAP_ANON) { 331 /* 332 * Mapping blank space is trivial. 333 * 334 * This relies on VM_PROT_* matching PROT_*. 335 */ 336 error = vm_mmap_object(&vms->vm_map, &addr, size, prot, 337 VM_PROT_ALL, flags, NULL, pos, FALSE, td); 338 } else { 339 /* 340 * Mapping file, get fp for validation and don't let the 341 * descriptor disappear on us if we block. Check capability 342 * rights, but also return the maximum rights to be combined 343 * with maxprot later. 344 */ 345 cap_rights_init(&rights, CAP_MMAP); 346 if (prot & PROT_READ) 347 cap_rights_set(&rights, CAP_MMAP_R); 348 if ((flags & MAP_SHARED) != 0) { 349 if (prot & PROT_WRITE) 350 cap_rights_set(&rights, CAP_MMAP_W); 351 } 352 if (prot & PROT_EXEC) 353 cap_rights_set(&rights, CAP_MMAP_X); 354 error = fget_mmap(td, uap->fd, &rights, &cap_maxprot, &fp); 355 if (error != 0) 356 goto done; 357 if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 && 358 td->td_proc->p_osrel >= P_OSREL_MAP_FSTRICT) { 359 error = EINVAL; 360 goto done; 361 } 362 363 /* This relies on VM_PROT_* matching PROT_*. */ 364 error = fo_mmap(fp, &vms->vm_map, &addr, size, prot, 365 cap_maxprot, flags, pos, td); 366 } 367 368 if (error == 0) 369 td->td_retval[0] = (register_t) (addr + pageoff); 370 done: 371 if (fp) 372 fdrop(fp, td); 373 374 return (error); 375 } 376 377 #if defined(COMPAT_FREEBSD6) 378 int 379 freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 380 { 381 struct mmap_args oargs; 382 383 oargs.addr = uap->addr; 384 oargs.len = uap->len; 385 oargs.prot = uap->prot; 386 oargs.flags = uap->flags; 387 oargs.fd = uap->fd; 388 oargs.pos = uap->pos; 389 return (sys_mmap(td, &oargs)); 390 } 391 #endif 392 393 #ifdef COMPAT_43 394 #ifndef _SYS_SYSPROTO_H_ 395 struct ommap_args { 396 caddr_t addr; 397 int len; 398 int prot; 399 int flags; 400 int fd; 401 long pos; 402 }; 403 #endif 404 int 405 ommap(td, uap) 406 struct thread *td; 407 struct ommap_args *uap; 408 { 409 struct mmap_args nargs; 410 static const char cvtbsdprot[8] = { 411 0, 412 PROT_EXEC, 413 PROT_WRITE, 414 PROT_EXEC | PROT_WRITE, 415 PROT_READ, 416 PROT_EXEC | PROT_READ, 417 PROT_WRITE | PROT_READ, 418 PROT_EXEC | PROT_WRITE | PROT_READ, 419 }; 420 421 #define OMAP_ANON 0x0002 422 #define OMAP_COPY 0x0020 423 #define OMAP_SHARED 0x0010 424 #define OMAP_FIXED 0x0100 425 426 nargs.addr = uap->addr; 427 nargs.len = uap->len; 428 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 429 #ifdef COMPAT_FREEBSD32 430 #if defined(__amd64__) 431 if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && 432 nargs.prot != 0) 433 nargs.prot |= PROT_EXEC; 434 #endif 435 #endif 436 nargs.flags = 0; 437 if (uap->flags & OMAP_ANON) 438 nargs.flags |= MAP_ANON; 439 if (uap->flags & OMAP_COPY) 440 nargs.flags |= MAP_COPY; 441 if (uap->flags & OMAP_SHARED) 442 nargs.flags |= MAP_SHARED; 443 else 444 nargs.flags |= MAP_PRIVATE; 445 if (uap->flags & OMAP_FIXED) 446 nargs.flags |= MAP_FIXED; 447 nargs.fd = uap->fd; 448 nargs.pos = uap->pos; 449 return (sys_mmap(td, &nargs)); 450 } 451 #endif /* COMPAT_43 */ 452 453 454 #ifndef _SYS_SYSPROTO_H_ 455 struct msync_args { 456 void *addr; 457 size_t len; 458 int flags; 459 }; 460 #endif 461 /* 462 * MPSAFE 463 */ 464 int 465 sys_msync(td, uap) 466 struct thread *td; 467 struct msync_args *uap; 468 { 469 vm_offset_t addr; 470 vm_size_t size, pageoff; 471 int flags; 472 vm_map_t map; 473 int rv; 474 475 addr = (vm_offset_t) uap->addr; 476 size = uap->len; 477 flags = uap->flags; 478 479 pageoff = (addr & PAGE_MASK); 480 addr -= pageoff; 481 size += pageoff; 482 size = (vm_size_t) round_page(size); 483 if (addr + size < addr) 484 return (EINVAL); 485 486 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 487 return (EINVAL); 488 489 map = &td->td_proc->p_vmspace->vm_map; 490 491 /* 492 * Clean the pages and interpret the return value. 493 */ 494 rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 495 (flags & MS_INVALIDATE) != 0); 496 switch (rv) { 497 case KERN_SUCCESS: 498 return (0); 499 case KERN_INVALID_ADDRESS: 500 return (ENOMEM); 501 case KERN_INVALID_ARGUMENT: 502 return (EBUSY); 503 case KERN_FAILURE: 504 return (EIO); 505 default: 506 return (EINVAL); 507 } 508 } 509 510 #ifndef _SYS_SYSPROTO_H_ 511 struct munmap_args { 512 void *addr; 513 size_t len; 514 }; 515 #endif 516 /* 517 * MPSAFE 518 */ 519 int 520 sys_munmap(td, uap) 521 struct thread *td; 522 struct munmap_args *uap; 523 { 524 #ifdef HWPMC_HOOKS 525 struct pmckern_map_out pkm; 526 vm_map_entry_t entry; 527 #endif 528 vm_offset_t addr; 529 vm_size_t size, pageoff; 530 vm_map_t map; 531 532 addr = (vm_offset_t) uap->addr; 533 size = uap->len; 534 if (size == 0) 535 return (EINVAL); 536 537 pageoff = (addr & PAGE_MASK); 538 addr -= pageoff; 539 size += pageoff; 540 size = (vm_size_t) round_page(size); 541 if (addr + size < addr) 542 return (EINVAL); 543 544 /* 545 * Check for illegal addresses. Watch out for address wrap... 546 */ 547 map = &td->td_proc->p_vmspace->vm_map; 548 if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 549 return (EINVAL); 550 vm_map_lock(map); 551 #ifdef HWPMC_HOOKS 552 /* 553 * Inform hwpmc if the address range being unmapped contains 554 * an executable region. 555 */ 556 pkm.pm_address = (uintptr_t) NULL; 557 if (vm_map_lookup_entry(map, addr, &entry)) { 558 for (; 559 entry != &map->header && entry->start < addr + size; 560 entry = entry->next) { 561 if (vm_map_check_protection(map, entry->start, 562 entry->end, VM_PROT_EXECUTE) == TRUE) { 563 pkm.pm_address = (uintptr_t) addr; 564 pkm.pm_size = (size_t) size; 565 break; 566 } 567 } 568 } 569 #endif 570 vm_map_delete(map, addr, addr + size); 571 572 #ifdef HWPMC_HOOKS 573 /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 574 vm_map_lock_downgrade(map); 575 if (pkm.pm_address != (uintptr_t) NULL) 576 PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 577 vm_map_unlock_read(map); 578 #else 579 vm_map_unlock(map); 580 #endif 581 /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 582 return (0); 583 } 584 585 #ifndef _SYS_SYSPROTO_H_ 586 struct mprotect_args { 587 const void *addr; 588 size_t len; 589 int prot; 590 }; 591 #endif 592 /* 593 * MPSAFE 594 */ 595 int 596 sys_mprotect(td, uap) 597 struct thread *td; 598 struct mprotect_args *uap; 599 { 600 vm_offset_t addr; 601 vm_size_t size, pageoff; 602 vm_prot_t prot; 603 604 addr = (vm_offset_t) uap->addr; 605 size = uap->len; 606 prot = uap->prot & VM_PROT_ALL; 607 608 pageoff = (addr & PAGE_MASK); 609 addr -= pageoff; 610 size += pageoff; 611 size = (vm_size_t) round_page(size); 612 if (addr + size < addr) 613 return (EINVAL); 614 615 switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 616 addr + size, prot, FALSE)) { 617 case KERN_SUCCESS: 618 return (0); 619 case KERN_PROTECTION_FAILURE: 620 return (EACCES); 621 case KERN_RESOURCE_SHORTAGE: 622 return (ENOMEM); 623 } 624 return (EINVAL); 625 } 626 627 #ifndef _SYS_SYSPROTO_H_ 628 struct minherit_args { 629 void *addr; 630 size_t len; 631 int inherit; 632 }; 633 #endif 634 /* 635 * MPSAFE 636 */ 637 int 638 sys_minherit(td, uap) 639 struct thread *td; 640 struct minherit_args *uap; 641 { 642 vm_offset_t addr; 643 vm_size_t size, pageoff; 644 vm_inherit_t inherit; 645 646 addr = (vm_offset_t)uap->addr; 647 size = uap->len; 648 inherit = uap->inherit; 649 650 pageoff = (addr & PAGE_MASK); 651 addr -= pageoff; 652 size += pageoff; 653 size = (vm_size_t) round_page(size); 654 if (addr + size < addr) 655 return (EINVAL); 656 657 switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 658 addr + size, inherit)) { 659 case KERN_SUCCESS: 660 return (0); 661 case KERN_PROTECTION_FAILURE: 662 return (EACCES); 663 } 664 return (EINVAL); 665 } 666 667 #ifndef _SYS_SYSPROTO_H_ 668 struct madvise_args { 669 void *addr; 670 size_t len; 671 int behav; 672 }; 673 #endif 674 675 /* 676 * MPSAFE 677 */ 678 int 679 sys_madvise(td, uap) 680 struct thread *td; 681 struct madvise_args *uap; 682 { 683 vm_offset_t start, end; 684 vm_map_t map; 685 int flags; 686 687 /* 688 * Check for our special case, advising the swap pager we are 689 * "immortal." 690 */ 691 if (uap->behav == MADV_PROTECT) { 692 flags = PPROT_SET; 693 return (kern_procctl(td, P_PID, td->td_proc->p_pid, 694 PROC_SPROTECT, &flags)); 695 } 696 697 /* 698 * Check for illegal behavior 699 */ 700 if (uap->behav < 0 || uap->behav > MADV_CORE) 701 return (EINVAL); 702 /* 703 * Check for illegal addresses. Watch out for address wrap... Note 704 * that VM_*_ADDRESS are not constants due to casts (argh). 705 */ 706 map = &td->td_proc->p_vmspace->vm_map; 707 if ((vm_offset_t)uap->addr < vm_map_min(map) || 708 (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) 709 return (EINVAL); 710 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 711 return (EINVAL); 712 713 /* 714 * Since this routine is only advisory, we default to conservative 715 * behavior. 716 */ 717 start = trunc_page((vm_offset_t) uap->addr); 718 end = round_page((vm_offset_t) uap->addr + uap->len); 719 720 if (vm_map_madvise(map, start, end, uap->behav)) 721 return (EINVAL); 722 return (0); 723 } 724 725 #ifndef _SYS_SYSPROTO_H_ 726 struct mincore_args { 727 const void *addr; 728 size_t len; 729 char *vec; 730 }; 731 #endif 732 733 /* 734 * MPSAFE 735 */ 736 int 737 sys_mincore(td, uap) 738 struct thread *td; 739 struct mincore_args *uap; 740 { 741 vm_offset_t addr, first_addr; 742 vm_offset_t end, cend; 743 pmap_t pmap; 744 vm_map_t map; 745 char *vec; 746 int error = 0; 747 int vecindex, lastvecindex; 748 vm_map_entry_t current; 749 vm_map_entry_t entry; 750 vm_object_t object; 751 vm_paddr_t locked_pa; 752 vm_page_t m; 753 vm_pindex_t pindex; 754 int mincoreinfo; 755 unsigned int timestamp; 756 boolean_t locked; 757 758 /* 759 * Make sure that the addresses presented are valid for user 760 * mode. 761 */ 762 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 763 end = addr + (vm_size_t)round_page(uap->len); 764 map = &td->td_proc->p_vmspace->vm_map; 765 if (end > vm_map_max(map) || end < addr) 766 return (ENOMEM); 767 768 /* 769 * Address of byte vector 770 */ 771 vec = uap->vec; 772 773 pmap = vmspace_pmap(td->td_proc->p_vmspace); 774 775 vm_map_lock_read(map); 776 RestartScan: 777 timestamp = map->timestamp; 778 779 if (!vm_map_lookup_entry(map, addr, &entry)) { 780 vm_map_unlock_read(map); 781 return (ENOMEM); 782 } 783 784 /* 785 * Do this on a map entry basis so that if the pages are not 786 * in the current processes address space, we can easily look 787 * up the pages elsewhere. 788 */ 789 lastvecindex = -1; 790 for (current = entry; 791 (current != &map->header) && (current->start < end); 792 current = current->next) { 793 794 /* 795 * check for contiguity 796 */ 797 if (current->end < end && 798 (entry->next == &map->header || 799 current->next->start > current->end)) { 800 vm_map_unlock_read(map); 801 return (ENOMEM); 802 } 803 804 /* 805 * ignore submaps (for now) or null objects 806 */ 807 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 808 current->object.vm_object == NULL) 809 continue; 810 811 /* 812 * limit this scan to the current map entry and the 813 * limits for the mincore call 814 */ 815 if (addr < current->start) 816 addr = current->start; 817 cend = current->end; 818 if (cend > end) 819 cend = end; 820 821 /* 822 * scan this entry one page at a time 823 */ 824 while (addr < cend) { 825 /* 826 * Check pmap first, it is likely faster, also 827 * it can provide info as to whether we are the 828 * one referencing or modifying the page. 829 */ 830 object = NULL; 831 locked_pa = 0; 832 retry: 833 m = NULL; 834 mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 835 if (locked_pa != 0) { 836 /* 837 * The page is mapped by this process but not 838 * both accessed and modified. It is also 839 * managed. Acquire the object lock so that 840 * other mappings might be examined. 841 */ 842 m = PHYS_TO_VM_PAGE(locked_pa); 843 if (m->object != object) { 844 if (object != NULL) 845 VM_OBJECT_WUNLOCK(object); 846 object = m->object; 847 locked = VM_OBJECT_TRYWLOCK(object); 848 vm_page_unlock(m); 849 if (!locked) { 850 VM_OBJECT_WLOCK(object); 851 vm_page_lock(m); 852 goto retry; 853 } 854 } else 855 vm_page_unlock(m); 856 KASSERT(m->valid == VM_PAGE_BITS_ALL, 857 ("mincore: page %p is mapped but invalid", 858 m)); 859 } else if (mincoreinfo == 0) { 860 /* 861 * The page is not mapped by this process. If 862 * the object implements managed pages, then 863 * determine if the page is resident so that 864 * the mappings might be examined. 865 */ 866 if (current->object.vm_object != object) { 867 if (object != NULL) 868 VM_OBJECT_WUNLOCK(object); 869 object = current->object.vm_object; 870 VM_OBJECT_WLOCK(object); 871 } 872 if (object->type == OBJT_DEFAULT || 873 object->type == OBJT_SWAP || 874 object->type == OBJT_VNODE) { 875 pindex = OFF_TO_IDX(current->offset + 876 (addr - current->start)); 877 m = vm_page_lookup(object, pindex); 878 if (m == NULL && 879 vm_page_is_cached(object, pindex)) 880 mincoreinfo = MINCORE_INCORE; 881 if (m != NULL && m->valid == 0) 882 m = NULL; 883 if (m != NULL) 884 mincoreinfo = MINCORE_INCORE; 885 } 886 } 887 if (m != NULL) { 888 /* Examine other mappings to the page. */ 889 if (m->dirty == 0 && pmap_is_modified(m)) 890 vm_page_dirty(m); 891 if (m->dirty != 0) 892 mincoreinfo |= MINCORE_MODIFIED_OTHER; 893 /* 894 * The first test for PGA_REFERENCED is an 895 * optimization. The second test is 896 * required because a concurrent pmap 897 * operation could clear the last reference 898 * and set PGA_REFERENCED before the call to 899 * pmap_is_referenced(). 900 */ 901 if ((m->aflags & PGA_REFERENCED) != 0 || 902 pmap_is_referenced(m) || 903 (m->aflags & PGA_REFERENCED) != 0) 904 mincoreinfo |= MINCORE_REFERENCED_OTHER; 905 } 906 if (object != NULL) 907 VM_OBJECT_WUNLOCK(object); 908 909 /* 910 * subyte may page fault. In case it needs to modify 911 * the map, we release the lock. 912 */ 913 vm_map_unlock_read(map); 914 915 /* 916 * calculate index into user supplied byte vector 917 */ 918 vecindex = OFF_TO_IDX(addr - first_addr); 919 920 /* 921 * If we have skipped map entries, we need to make sure that 922 * the byte vector is zeroed for those skipped entries. 923 */ 924 while ((lastvecindex + 1) < vecindex) { 925 ++lastvecindex; 926 error = subyte(vec + lastvecindex, 0); 927 if (error) { 928 error = EFAULT; 929 goto done2; 930 } 931 } 932 933 /* 934 * Pass the page information to the user 935 */ 936 error = subyte(vec + vecindex, mincoreinfo); 937 if (error) { 938 error = EFAULT; 939 goto done2; 940 } 941 942 /* 943 * If the map has changed, due to the subyte, the previous 944 * output may be invalid. 945 */ 946 vm_map_lock_read(map); 947 if (timestamp != map->timestamp) 948 goto RestartScan; 949 950 lastvecindex = vecindex; 951 addr += PAGE_SIZE; 952 } 953 } 954 955 /* 956 * subyte may page fault. In case it needs to modify 957 * the map, we release the lock. 958 */ 959 vm_map_unlock_read(map); 960 961 /* 962 * Zero the last entries in the byte vector. 963 */ 964 vecindex = OFF_TO_IDX(end - first_addr); 965 while ((lastvecindex + 1) < vecindex) { 966 ++lastvecindex; 967 error = subyte(vec + lastvecindex, 0); 968 if (error) { 969 error = EFAULT; 970 goto done2; 971 } 972 } 973 974 /* 975 * If the map has changed, due to the subyte, the previous 976 * output may be invalid. 977 */ 978 vm_map_lock_read(map); 979 if (timestamp != map->timestamp) 980 goto RestartScan; 981 vm_map_unlock_read(map); 982 done2: 983 return (error); 984 } 985 986 #ifndef _SYS_SYSPROTO_H_ 987 struct mlock_args { 988 const void *addr; 989 size_t len; 990 }; 991 #endif 992 /* 993 * MPSAFE 994 */ 995 int 996 sys_mlock(td, uap) 997 struct thread *td; 998 struct mlock_args *uap; 999 { 1000 1001 return (vm_mlock(td->td_proc, td->td_ucred, uap->addr, uap->len)); 1002 } 1003 1004 int 1005 vm_mlock(struct proc *proc, struct ucred *cred, const void *addr0, size_t len) 1006 { 1007 vm_offset_t addr, end, last, start; 1008 vm_size_t npages, size; 1009 vm_map_t map; 1010 unsigned long nsize; 1011 int error; 1012 1013 error = priv_check_cred(cred, PRIV_VM_MLOCK, 0); 1014 if (error) 1015 return (error); 1016 addr = (vm_offset_t)addr0; 1017 size = len; 1018 last = addr + size; 1019 start = trunc_page(addr); 1020 end = round_page(last); 1021 if (last < addr || end < addr) 1022 return (EINVAL); 1023 npages = atop(end - start); 1024 if (npages > vm_page_max_wired) 1025 return (ENOMEM); 1026 map = &proc->p_vmspace->vm_map; 1027 PROC_LOCK(proc); 1028 nsize = ptoa(npages + pmap_wired_count(map->pmap)); 1029 if (nsize > lim_cur_proc(proc, RLIMIT_MEMLOCK)) { 1030 PROC_UNLOCK(proc); 1031 return (ENOMEM); 1032 } 1033 PROC_UNLOCK(proc); 1034 if (npages + vm_cnt.v_wire_count > vm_page_max_wired) 1035 return (EAGAIN); 1036 #ifdef RACCT 1037 if (racct_enable) { 1038 PROC_LOCK(proc); 1039 error = racct_set(proc, RACCT_MEMLOCK, nsize); 1040 PROC_UNLOCK(proc); 1041 if (error != 0) 1042 return (ENOMEM); 1043 } 1044 #endif 1045 error = vm_map_wire(map, start, end, 1046 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1047 #ifdef RACCT 1048 if (racct_enable && error != KERN_SUCCESS) { 1049 PROC_LOCK(proc); 1050 racct_set(proc, RACCT_MEMLOCK, 1051 ptoa(pmap_wired_count(map->pmap))); 1052 PROC_UNLOCK(proc); 1053 } 1054 #endif 1055 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1056 } 1057 1058 #ifndef _SYS_SYSPROTO_H_ 1059 struct mlockall_args { 1060 int how; 1061 }; 1062 #endif 1063 1064 /* 1065 * MPSAFE 1066 */ 1067 int 1068 sys_mlockall(td, uap) 1069 struct thread *td; 1070 struct mlockall_args *uap; 1071 { 1072 vm_map_t map; 1073 int error; 1074 1075 map = &td->td_proc->p_vmspace->vm_map; 1076 error = priv_check(td, PRIV_VM_MLOCK); 1077 if (error) 1078 return (error); 1079 1080 if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1081 return (EINVAL); 1082 1083 /* 1084 * If wiring all pages in the process would cause it to exceed 1085 * a hard resource limit, return ENOMEM. 1086 */ 1087 if (!old_mlock && uap->how & MCL_CURRENT) { 1088 PROC_LOCK(td->td_proc); 1089 if (map->size > lim_cur(td, RLIMIT_MEMLOCK)) { 1090 PROC_UNLOCK(td->td_proc); 1091 return (ENOMEM); 1092 } 1093 PROC_UNLOCK(td->td_proc); 1094 } 1095 #ifdef RACCT 1096 if (racct_enable) { 1097 PROC_LOCK(td->td_proc); 1098 error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 1099 PROC_UNLOCK(td->td_proc); 1100 if (error != 0) 1101 return (ENOMEM); 1102 } 1103 #endif 1104 1105 if (uap->how & MCL_FUTURE) { 1106 vm_map_lock(map); 1107 vm_map_modflags(map, MAP_WIREFUTURE, 0); 1108 vm_map_unlock(map); 1109 error = 0; 1110 } 1111 1112 if (uap->how & MCL_CURRENT) { 1113 /* 1114 * P1003.1-2001 mandates that all currently mapped pages 1115 * will be memory resident and locked (wired) upon return 1116 * from mlockall(). vm_map_wire() will wire pages, by 1117 * calling vm_fault_wire() for each page in the region. 1118 */ 1119 error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1120 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1121 error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1122 } 1123 #ifdef RACCT 1124 if (racct_enable && error != KERN_SUCCESS) { 1125 PROC_LOCK(td->td_proc); 1126 racct_set(td->td_proc, RACCT_MEMLOCK, 1127 ptoa(pmap_wired_count(map->pmap))); 1128 PROC_UNLOCK(td->td_proc); 1129 } 1130 #endif 1131 1132 return (error); 1133 } 1134 1135 #ifndef _SYS_SYSPROTO_H_ 1136 struct munlockall_args { 1137 register_t dummy; 1138 }; 1139 #endif 1140 1141 /* 1142 * MPSAFE 1143 */ 1144 int 1145 sys_munlockall(td, uap) 1146 struct thread *td; 1147 struct munlockall_args *uap; 1148 { 1149 vm_map_t map; 1150 int error; 1151 1152 map = &td->td_proc->p_vmspace->vm_map; 1153 error = priv_check(td, PRIV_VM_MUNLOCK); 1154 if (error) 1155 return (error); 1156 1157 /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1158 vm_map_lock(map); 1159 vm_map_modflags(map, 0, MAP_WIREFUTURE); 1160 vm_map_unlock(map); 1161 1162 /* Forcibly unwire all pages. */ 1163 error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1164 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1165 #ifdef RACCT 1166 if (racct_enable && error == KERN_SUCCESS) { 1167 PROC_LOCK(td->td_proc); 1168 racct_set(td->td_proc, RACCT_MEMLOCK, 0); 1169 PROC_UNLOCK(td->td_proc); 1170 } 1171 #endif 1172 1173 return (error); 1174 } 1175 1176 #ifndef _SYS_SYSPROTO_H_ 1177 struct munlock_args { 1178 const void *addr; 1179 size_t len; 1180 }; 1181 #endif 1182 /* 1183 * MPSAFE 1184 */ 1185 int 1186 sys_munlock(td, uap) 1187 struct thread *td; 1188 struct munlock_args *uap; 1189 { 1190 vm_offset_t addr, end, last, start; 1191 vm_size_t size; 1192 #ifdef RACCT 1193 vm_map_t map; 1194 #endif 1195 int error; 1196 1197 error = priv_check(td, PRIV_VM_MUNLOCK); 1198 if (error) 1199 return (error); 1200 addr = (vm_offset_t)uap->addr; 1201 size = uap->len; 1202 last = addr + size; 1203 start = trunc_page(addr); 1204 end = round_page(last); 1205 if (last < addr || end < addr) 1206 return (EINVAL); 1207 error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 1208 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1209 #ifdef RACCT 1210 if (racct_enable && error == KERN_SUCCESS) { 1211 PROC_LOCK(td->td_proc); 1212 map = &td->td_proc->p_vmspace->vm_map; 1213 racct_set(td->td_proc, RACCT_MEMLOCK, 1214 ptoa(pmap_wired_count(map->pmap))); 1215 PROC_UNLOCK(td->td_proc); 1216 } 1217 #endif 1218 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1219 } 1220 1221 /* 1222 * vm_mmap_vnode() 1223 * 1224 * Helper function for vm_mmap. Perform sanity check specific for mmap 1225 * operations on vnodes. 1226 */ 1227 int 1228 vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1229 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1230 struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 1231 boolean_t *writecounted) 1232 { 1233 struct vattr va; 1234 vm_object_t obj; 1235 vm_offset_t foff; 1236 struct ucred *cred; 1237 int error, flags, locktype; 1238 1239 cred = td->td_ucred; 1240 if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) 1241 locktype = LK_EXCLUSIVE; 1242 else 1243 locktype = LK_SHARED; 1244 if ((error = vget(vp, locktype, td)) != 0) 1245 return (error); 1246 foff = *foffp; 1247 flags = *flagsp; 1248 obj = vp->v_object; 1249 if (vp->v_type == VREG) { 1250 /* 1251 * Get the proper underlying object 1252 */ 1253 if (obj == NULL) { 1254 error = EINVAL; 1255 goto done; 1256 } 1257 if (obj->type == OBJT_VNODE && obj->handle != vp) { 1258 vput(vp); 1259 vp = (struct vnode *)obj->handle; 1260 /* 1261 * Bypass filesystems obey the mpsafety of the 1262 * underlying fs. Tmpfs never bypasses. 1263 */ 1264 error = vget(vp, locktype, td); 1265 if (error != 0) 1266 return (error); 1267 } 1268 if (locktype == LK_EXCLUSIVE) { 1269 *writecounted = TRUE; 1270 vnode_pager_update_writecount(obj, 0, objsize); 1271 } 1272 } else { 1273 error = EINVAL; 1274 goto done; 1275 } 1276 if ((error = VOP_GETATTR(vp, &va, cred))) 1277 goto done; 1278 #ifdef MAC 1279 /* This relies on VM_PROT_* matching PROT_*. */ 1280 error = mac_vnode_check_mmap(cred, vp, (int)prot, flags); 1281 if (error != 0) 1282 goto done; 1283 #endif 1284 if ((flags & MAP_SHARED) != 0) { 1285 if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1286 if (prot & VM_PROT_WRITE) { 1287 error = EPERM; 1288 goto done; 1289 } 1290 *maxprotp &= ~VM_PROT_WRITE; 1291 } 1292 } 1293 /* 1294 * If it is a regular file without any references 1295 * we do not need to sync it. 1296 * Adjust object size to be the size of actual file. 1297 */ 1298 objsize = round_page(va.va_size); 1299 if (va.va_nlink == 0) 1300 flags |= MAP_NOSYNC; 1301 if (obj->type == OBJT_VNODE) { 1302 obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, 1303 cred); 1304 if (obj == NULL) { 1305 error = ENOMEM; 1306 goto done; 1307 } 1308 } else { 1309 KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP, 1310 ("wrong object type")); 1311 VM_OBJECT_WLOCK(obj); 1312 vm_object_reference_locked(obj); 1313 #if VM_NRESERVLEVEL > 0 1314 vm_object_color(obj, 0); 1315 #endif 1316 VM_OBJECT_WUNLOCK(obj); 1317 } 1318 *objp = obj; 1319 *flagsp = flags; 1320 1321 vfs_mark_atime(vp, cred); 1322 1323 done: 1324 if (error != 0 && *writecounted) { 1325 *writecounted = FALSE; 1326 vnode_pager_update_writecount(obj, objsize, 0); 1327 } 1328 vput(vp); 1329 return (error); 1330 } 1331 1332 /* 1333 * vm_mmap_cdev() 1334 * 1335 * MPSAFE 1336 * 1337 * Helper function for vm_mmap. Perform sanity check specific for mmap 1338 * operations on cdevs. 1339 */ 1340 int 1341 vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot, 1342 vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw, 1343 vm_ooffset_t *foff, vm_object_t *objp) 1344 { 1345 vm_object_t obj; 1346 int error, flags; 1347 1348 flags = *flagsp; 1349 1350 if (dsw->d_flags & D_MMAP_ANON) { 1351 *objp = NULL; 1352 *foff = 0; 1353 *maxprotp = VM_PROT_ALL; 1354 *flagsp |= MAP_ANON; 1355 return (0); 1356 } 1357 /* 1358 * cdevs do not provide private mappings of any kind. 1359 */ 1360 if ((*maxprotp & VM_PROT_WRITE) == 0 && 1361 (prot & VM_PROT_WRITE) != 0) 1362 return (EACCES); 1363 if (flags & (MAP_PRIVATE|MAP_COPY)) 1364 return (EINVAL); 1365 /* 1366 * Force device mappings to be shared. 1367 */ 1368 flags |= MAP_SHARED; 1369 #ifdef MAC_XXX 1370 error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot); 1371 if (error != 0) 1372 return (error); 1373 #endif 1374 /* 1375 * First, try d_mmap_single(). If that is not implemented 1376 * (returns ENODEV), fall back to using the device pager. 1377 * Note that d_mmap_single() must return a reference to the 1378 * object (it needs to bump the reference count of the object 1379 * it returns somehow). 1380 * 1381 * XXX assumes VM_PROT_* == PROT_* 1382 */ 1383 error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 1384 if (error != ENODEV) 1385 return (error); 1386 obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 1387 td->td_ucred); 1388 if (obj == NULL) 1389 return (EINVAL); 1390 *objp = obj; 1391 *flagsp = flags; 1392 return (0); 1393 } 1394 1395 /* 1396 * vm_mmap() 1397 * 1398 * Internal version of mmap used by exec, sys5 shared memory, and 1399 * various device drivers. Handle is either a vnode pointer, a 1400 * character device, or NULL for MAP_ANON. 1401 */ 1402 int 1403 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1404 vm_prot_t maxprot, int flags, 1405 objtype_t handle_type, void *handle, 1406 vm_ooffset_t foff) 1407 { 1408 vm_object_t object; 1409 struct thread *td = curthread; 1410 int error; 1411 boolean_t writecounted; 1412 1413 if (size == 0) 1414 return (EINVAL); 1415 1416 size = round_page(size); 1417 object = NULL; 1418 writecounted = FALSE; 1419 1420 /* 1421 * Lookup/allocate object. 1422 */ 1423 switch (handle_type) { 1424 case OBJT_DEVICE: { 1425 struct cdevsw *dsw; 1426 struct cdev *cdev; 1427 int ref; 1428 1429 cdev = handle; 1430 dsw = dev_refthread(cdev, &ref); 1431 if (dsw == NULL) 1432 return (ENXIO); 1433 error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev, 1434 dsw, &foff, &object); 1435 dev_relthread(cdev, ref); 1436 break; 1437 } 1438 case OBJT_VNODE: 1439 error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 1440 handle, &foff, &object, &writecounted); 1441 break; 1442 case OBJT_DEFAULT: 1443 if (handle == NULL) { 1444 error = 0; 1445 break; 1446 } 1447 /* FALLTHROUGH */ 1448 default: 1449 error = EINVAL; 1450 break; 1451 } 1452 if (error) 1453 return (error); 1454 1455 error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 1456 foff, writecounted, td); 1457 if (error != 0 && object != NULL) { 1458 /* 1459 * If this mapping was accounted for in the vnode's 1460 * writecount, then undo that now. 1461 */ 1462 if (writecounted) 1463 vnode_pager_release_writecount(object, 0, size); 1464 vm_object_deallocate(object); 1465 } 1466 return (error); 1467 } 1468 1469 /* 1470 * Internal version of mmap that maps a specific VM object into an 1471 * map. Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap. 1472 */ 1473 int 1474 vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1475 vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff, 1476 boolean_t writecounted, struct thread *td) 1477 { 1478 boolean_t fitit; 1479 int docow, error, findspace, rv; 1480 1481 if (map == &td->td_proc->p_vmspace->vm_map) { 1482 PROC_LOCK(td->td_proc); 1483 if (map->size + size > lim_cur_proc(td->td_proc, RLIMIT_VMEM)) { 1484 PROC_UNLOCK(td->td_proc); 1485 return (ENOMEM); 1486 } 1487 if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 1488 PROC_UNLOCK(td->td_proc); 1489 return (ENOMEM); 1490 } 1491 if (!old_mlock && map->flags & MAP_WIREFUTURE) { 1492 if (ptoa(pmap_wired_count(map->pmap)) + size > 1493 lim_cur_proc(td->td_proc, RLIMIT_MEMLOCK)) { 1494 racct_set_force(td->td_proc, RACCT_VMEM, 1495 map->size); 1496 PROC_UNLOCK(td->td_proc); 1497 return (ENOMEM); 1498 } 1499 error = racct_set(td->td_proc, RACCT_MEMLOCK, 1500 ptoa(pmap_wired_count(map->pmap)) + size); 1501 if (error != 0) { 1502 racct_set_force(td->td_proc, RACCT_VMEM, 1503 map->size); 1504 PROC_UNLOCK(td->td_proc); 1505 return (error); 1506 } 1507 } 1508 PROC_UNLOCK(td->td_proc); 1509 } 1510 1511 /* 1512 * We currently can only deal with page aligned file offsets. 1513 * The mmap() system call already enforces this by subtracting 1514 * the page offset from the file offset, but checking here 1515 * catches errors in device drivers (e.g. d_single_mmap() 1516 * callbacks) and other internal mapping requests (such as in 1517 * exec). 1518 */ 1519 if (foff & PAGE_MASK) 1520 return (EINVAL); 1521 1522 if ((flags & MAP_FIXED) == 0) { 1523 fitit = TRUE; 1524 *addr = round_page(*addr); 1525 } else { 1526 if (*addr != trunc_page(*addr)) 1527 return (EINVAL); 1528 fitit = FALSE; 1529 } 1530 1531 if (flags & MAP_ANON) { 1532 if (object != NULL || foff != 0) 1533 return (EINVAL); 1534 docow = 0; 1535 } else if (flags & MAP_PREFAULT_READ) 1536 docow = MAP_PREFAULT; 1537 else 1538 docow = MAP_PREFAULT_PARTIAL; 1539 1540 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 1541 docow |= MAP_COPY_ON_WRITE; 1542 if (flags & MAP_NOSYNC) 1543 docow |= MAP_DISABLE_SYNCER; 1544 if (flags & MAP_NOCORE) 1545 docow |= MAP_DISABLE_COREDUMP; 1546 /* Shared memory is also shared with children. */ 1547 if (flags & MAP_SHARED) 1548 docow |= MAP_INHERIT_SHARE; 1549 if (writecounted) 1550 docow |= MAP_VN_WRITECOUNT; 1551 if (flags & MAP_STACK) { 1552 if (object != NULL) 1553 return (EINVAL); 1554 docow |= MAP_STACK_GROWS_DOWN; 1555 } 1556 if ((flags & MAP_EXCL) != 0) 1557 docow |= MAP_CHECK_EXCL; 1558 1559 if (fitit) { 1560 if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER) 1561 findspace = VMFS_SUPER_SPACE; 1562 else if ((flags & MAP_ALIGNMENT_MASK) != 0) 1563 findspace = VMFS_ALIGNED_SPACE(flags >> 1564 MAP_ALIGNMENT_SHIFT); 1565 else 1566 findspace = VMFS_OPTIMAL_SPACE; 1567 rv = vm_map_find(map, object, foff, addr, size, 1568 #ifdef MAP_32BIT 1569 flags & MAP_32BIT ? MAP_32BIT_MAX_ADDR : 1570 #endif 1571 0, findspace, prot, maxprot, docow); 1572 } else { 1573 rv = vm_map_fixed(map, object, foff, *addr, size, 1574 prot, maxprot, docow); 1575 } 1576 1577 if (rv == KERN_SUCCESS) { 1578 /* 1579 * If the process has requested that all future mappings 1580 * be wired, then heed this. 1581 */ 1582 if (map->flags & MAP_WIREFUTURE) { 1583 vm_map_wire(map, *addr, *addr + size, 1584 VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? 1585 VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); 1586 } 1587 } 1588 return (vm_mmap_to_errno(rv)); 1589 } 1590 1591 /* 1592 * Translate a Mach VM return code to zero on success or the appropriate errno 1593 * on failure. 1594 */ 1595 int 1596 vm_mmap_to_errno(int rv) 1597 { 1598 1599 switch (rv) { 1600 case KERN_SUCCESS: 1601 return (0); 1602 case KERN_INVALID_ADDRESS: 1603 case KERN_NO_SPACE: 1604 return (ENOMEM); 1605 case KERN_PROTECTION_FAILURE: 1606 return (EACCES); 1607 default: 1608 return (EINVAL); 1609 } 1610 } 1611