1 /*- 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35 * 36 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37 */ 38 39 /* 40 * Mapped file (mmap) interface to VM 41 */ 42 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include "opt_compat.h" 47 #include "opt_hwpmc_hooks.h" 48 #include "opt_vm.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/capsicum.h> 53 #include <sys/kernel.h> 54 #include <sys/lock.h> 55 #include <sys/mutex.h> 56 #include <sys/sysproto.h> 57 #include <sys/filedesc.h> 58 #include <sys/priv.h> 59 #include <sys/proc.h> 60 #include <sys/procctl.h> 61 #include <sys/racct.h> 62 #include <sys/resource.h> 63 #include <sys/resourcevar.h> 64 #include <sys/rwlock.h> 65 #include <sys/sysctl.h> 66 #include <sys/vnode.h> 67 #include <sys/fcntl.h> 68 #include <sys/file.h> 69 #include <sys/mman.h> 70 #include <sys/mount.h> 71 #include <sys/conf.h> 72 #include <sys/stat.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysent.h> 75 #include <sys/vmmeter.h> 76 77 #include <security/audit/audit.h> 78 #include <security/mac/mac_framework.h> 79 80 #include <vm/vm.h> 81 #include <vm/vm_param.h> 82 #include <vm/pmap.h> 83 #include <vm/vm_map.h> 84 #include <vm/vm_object.h> 85 #include <vm/vm_page.h> 86 #include <vm/vm_pager.h> 87 #include <vm/vm_pageout.h> 88 #include <vm/vm_extern.h> 89 #include <vm/vm_page.h> 90 #include <vm/vnode_pager.h> 91 92 #ifdef HWPMC_HOOKS 93 #include <sys/pmckern.h> 94 #endif 95 96 int old_mlock = 0; 97 SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0, 98 "Do not apply RLIMIT_MEMLOCK on mlockall"); 99 100 #ifdef MAP_32BIT 101 #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) 102 #endif 103 104 #ifndef _SYS_SYSPROTO_H_ 105 struct sbrk_args { 106 int incr; 107 }; 108 #endif 109 110 /* 111 * MPSAFE 112 */ 113 /* ARGSUSED */ 114 int 115 sys_sbrk(td, uap) 116 struct thread *td; 117 struct sbrk_args *uap; 118 { 119 /* Not yet implemented */ 120 return (EOPNOTSUPP); 121 } 122 123 #ifndef _SYS_SYSPROTO_H_ 124 struct sstk_args { 125 int incr; 126 }; 127 #endif 128 129 /* 130 * MPSAFE 131 */ 132 /* ARGSUSED */ 133 int 134 sys_sstk(td, uap) 135 struct thread *td; 136 struct sstk_args *uap; 137 { 138 /* Not yet implemented */ 139 return (EOPNOTSUPP); 140 } 141 142 #if defined(COMPAT_43) 143 #ifndef _SYS_SYSPROTO_H_ 144 struct getpagesize_args { 145 int dummy; 146 }; 147 #endif 148 149 int 150 ogetpagesize(td, uap) 151 struct thread *td; 152 struct getpagesize_args *uap; 153 { 154 /* MP SAFE */ 155 td->td_retval[0] = PAGE_SIZE; 156 return (0); 157 } 158 #endif /* COMPAT_43 */ 159 160 161 /* 162 * Memory Map (mmap) system call. Note that the file offset 163 * and address are allowed to be NOT page aligned, though if 164 * the MAP_FIXED flag it set, both must have the same remainder 165 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 166 * page-aligned, the actual mapping starts at trunc_page(addr) 167 * and the return value is adjusted up by the page offset. 168 * 169 * Generally speaking, only character devices which are themselves 170 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 171 * there would be no cache coherency between a descriptor and a VM mapping 172 * both to the same character device. 173 */ 174 #ifndef _SYS_SYSPROTO_H_ 175 struct mmap_args { 176 void *addr; 177 size_t len; 178 int prot; 179 int flags; 180 int fd; 181 long pad; 182 off_t pos; 183 }; 184 #endif 185 186 /* 187 * MPSAFE 188 */ 189 int 190 sys_mmap(td, uap) 191 struct thread *td; 192 struct mmap_args *uap; 193 { 194 struct file *fp; 195 vm_offset_t addr; 196 vm_size_t size, pageoff; 197 vm_prot_t cap_maxprot; 198 int align, error, flags, prot; 199 off_t pos; 200 struct vmspace *vms = td->td_proc->p_vmspace; 201 cap_rights_t rights; 202 203 addr = (vm_offset_t) uap->addr; 204 size = uap->len; 205 prot = uap->prot; 206 flags = uap->flags; 207 pos = uap->pos; 208 209 fp = NULL; 210 AUDIT_ARG_FD(uap->fd); 211 212 /* 213 * Ignore old flags that used to be defined but did not do anything. 214 */ 215 flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040); 216 217 /* 218 * Enforce the constraints. 219 * Mapping of length 0 is only allowed for old binaries. 220 * Anonymous mapping shall specify -1 as filedescriptor and 221 * zero position for new code. Be nice to ancient a.out 222 * binaries and correct pos for anonymous mapping, since old 223 * ld.so sometimes issues anonymous map requests with non-zero 224 * pos. 225 */ 226 if (!SV_CURPROC_FLAG(SV_AOUT)) { 227 if ((uap->len == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) || 228 ((flags & MAP_ANON) != 0 && (uap->fd != -1 || pos != 0))) 229 return (EINVAL); 230 } else { 231 if ((flags & MAP_ANON) != 0) 232 pos = 0; 233 } 234 235 if (flags & MAP_STACK) { 236 if ((uap->fd != -1) || 237 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 238 return (EINVAL); 239 flags |= MAP_ANON; 240 pos = 0; 241 } 242 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE | 243 MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE | 244 MAP_PREFAULT_READ | 245 #ifdef MAP_32BIT 246 MAP_32BIT | 247 #endif 248 MAP_ALIGNMENT_MASK)) != 0) 249 return (EINVAL); 250 if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL) 251 return (EINVAL); 252 if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED | MAP_PRIVATE)) 253 return (EINVAL); 254 if (prot != PROT_NONE && 255 (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) 256 return (EINVAL); 257 258 /* 259 * Align the file position to a page boundary, 260 * and save its page offset component. 261 */ 262 pageoff = (pos & PAGE_MASK); 263 pos -= pageoff; 264 265 /* Adjust size for rounding (on both ends). */ 266 size += pageoff; /* low end... */ 267 size = (vm_size_t) round_page(size); /* hi end */ 268 269 /* Ensure alignment is at least a page and fits in a pointer. */ 270 align = flags & MAP_ALIGNMENT_MASK; 271 if (align != 0 && align != MAP_ALIGNED_SUPER && 272 (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY || 273 align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT)) 274 return (EINVAL); 275 276 /* 277 * Check for illegal addresses. Watch out for address wrap... Note 278 * that VM_*_ADDRESS are not constants due to casts (argh). 279 */ 280 if (flags & MAP_FIXED) { 281 /* 282 * The specified address must have the same remainder 283 * as the file offset taken modulo PAGE_SIZE, so it 284 * should be aligned after adjustment by pageoff. 285 */ 286 addr -= pageoff; 287 if (addr & PAGE_MASK) 288 return (EINVAL); 289 290 /* Address range must be all in user VM space. */ 291 if (addr < vm_map_min(&vms->vm_map) || 292 addr + size > vm_map_max(&vms->vm_map)) 293 return (EINVAL); 294 if (addr + size < addr) 295 return (EINVAL); 296 #ifdef MAP_32BIT 297 if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR) 298 return (EINVAL); 299 } else if (flags & MAP_32BIT) { 300 /* 301 * For MAP_32BIT, override the hint if it is too high and 302 * do not bother moving the mapping past the heap (since 303 * the heap is usually above 2GB). 304 */ 305 if (addr + size > MAP_32BIT_MAX_ADDR) 306 addr = 0; 307 #endif 308 } else { 309 /* 310 * XXX for non-fixed mappings where no hint is provided or 311 * the hint would fall in the potential heap space, 312 * place it after the end of the largest possible heap. 313 * 314 * There should really be a pmap call to determine a reasonable 315 * location. 316 */ 317 if (addr == 0 || 318 (addr >= round_page((vm_offset_t)vms->vm_taddr) && 319 addr < round_page((vm_offset_t)vms->vm_daddr + 320 lim_max(td, RLIMIT_DATA)))) 321 addr = round_page((vm_offset_t)vms->vm_daddr + 322 lim_max(td, RLIMIT_DATA)); 323 } 324 if (size == 0) { 325 /* 326 * Return success without mapping anything for old 327 * binaries that request a page-aligned mapping of 328 * length 0. For modern binaries, this function 329 * returns an error earlier. 330 */ 331 error = 0; 332 } else if (flags & MAP_ANON) { 333 /* 334 * Mapping blank space is trivial. 335 * 336 * This relies on VM_PROT_* matching PROT_*. 337 */ 338 error = vm_mmap_object(&vms->vm_map, &addr, size, prot, 339 VM_PROT_ALL, flags, NULL, pos, FALSE, td); 340 } else { 341 /* 342 * Mapping file, get fp for validation and don't let the 343 * descriptor disappear on us if we block. Check capability 344 * rights, but also return the maximum rights to be combined 345 * with maxprot later. 346 */ 347 cap_rights_init(&rights, CAP_MMAP); 348 if (prot & PROT_READ) 349 cap_rights_set(&rights, CAP_MMAP_R); 350 if ((flags & MAP_SHARED) != 0) { 351 if (prot & PROT_WRITE) 352 cap_rights_set(&rights, CAP_MMAP_W); 353 } 354 if (prot & PROT_EXEC) 355 cap_rights_set(&rights, CAP_MMAP_X); 356 error = fget_mmap(td, uap->fd, &rights, &cap_maxprot, &fp); 357 if (error != 0) 358 goto done; 359 if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 && 360 td->td_proc->p_osrel >= P_OSREL_MAP_FSTRICT) { 361 error = EINVAL; 362 goto done; 363 } 364 365 /* This relies on VM_PROT_* matching PROT_*. */ 366 error = fo_mmap(fp, &vms->vm_map, &addr, size, prot, 367 cap_maxprot, flags, pos, td); 368 } 369 370 if (error == 0) 371 td->td_retval[0] = (register_t) (addr + pageoff); 372 done: 373 if (fp) 374 fdrop(fp, td); 375 376 return (error); 377 } 378 379 #if defined(COMPAT_FREEBSD6) 380 int 381 freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 382 { 383 struct mmap_args oargs; 384 385 oargs.addr = uap->addr; 386 oargs.len = uap->len; 387 oargs.prot = uap->prot; 388 oargs.flags = uap->flags; 389 oargs.fd = uap->fd; 390 oargs.pos = uap->pos; 391 return (sys_mmap(td, &oargs)); 392 } 393 #endif 394 395 #ifdef COMPAT_43 396 #ifndef _SYS_SYSPROTO_H_ 397 struct ommap_args { 398 caddr_t addr; 399 int len; 400 int prot; 401 int flags; 402 int fd; 403 long pos; 404 }; 405 #endif 406 int 407 ommap(td, uap) 408 struct thread *td; 409 struct ommap_args *uap; 410 { 411 struct mmap_args nargs; 412 static const char cvtbsdprot[8] = { 413 0, 414 PROT_EXEC, 415 PROT_WRITE, 416 PROT_EXEC | PROT_WRITE, 417 PROT_READ, 418 PROT_EXEC | PROT_READ, 419 PROT_WRITE | PROT_READ, 420 PROT_EXEC | PROT_WRITE | PROT_READ, 421 }; 422 423 #define OMAP_ANON 0x0002 424 #define OMAP_COPY 0x0020 425 #define OMAP_SHARED 0x0010 426 #define OMAP_FIXED 0x0100 427 428 nargs.addr = uap->addr; 429 nargs.len = uap->len; 430 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 431 #ifdef COMPAT_FREEBSD32 432 #if defined(__amd64__) 433 if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && 434 nargs.prot != 0) 435 nargs.prot |= PROT_EXEC; 436 #endif 437 #endif 438 nargs.flags = 0; 439 if (uap->flags & OMAP_ANON) 440 nargs.flags |= MAP_ANON; 441 if (uap->flags & OMAP_COPY) 442 nargs.flags |= MAP_COPY; 443 if (uap->flags & OMAP_SHARED) 444 nargs.flags |= MAP_SHARED; 445 else 446 nargs.flags |= MAP_PRIVATE; 447 if (uap->flags & OMAP_FIXED) 448 nargs.flags |= MAP_FIXED; 449 nargs.fd = uap->fd; 450 nargs.pos = uap->pos; 451 return (sys_mmap(td, &nargs)); 452 } 453 #endif /* COMPAT_43 */ 454 455 456 #ifndef _SYS_SYSPROTO_H_ 457 struct msync_args { 458 void *addr; 459 size_t len; 460 int flags; 461 }; 462 #endif 463 /* 464 * MPSAFE 465 */ 466 int 467 sys_msync(td, uap) 468 struct thread *td; 469 struct msync_args *uap; 470 { 471 vm_offset_t addr; 472 vm_size_t size, pageoff; 473 int flags; 474 vm_map_t map; 475 int rv; 476 477 addr = (vm_offset_t) uap->addr; 478 size = uap->len; 479 flags = uap->flags; 480 481 pageoff = (addr & PAGE_MASK); 482 addr -= pageoff; 483 size += pageoff; 484 size = (vm_size_t) round_page(size); 485 if (addr + size < addr) 486 return (EINVAL); 487 488 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 489 return (EINVAL); 490 491 map = &td->td_proc->p_vmspace->vm_map; 492 493 /* 494 * Clean the pages and interpret the return value. 495 */ 496 rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 497 (flags & MS_INVALIDATE) != 0); 498 switch (rv) { 499 case KERN_SUCCESS: 500 return (0); 501 case KERN_INVALID_ADDRESS: 502 return (ENOMEM); 503 case KERN_INVALID_ARGUMENT: 504 return (EBUSY); 505 case KERN_FAILURE: 506 return (EIO); 507 default: 508 return (EINVAL); 509 } 510 } 511 512 #ifndef _SYS_SYSPROTO_H_ 513 struct munmap_args { 514 void *addr; 515 size_t len; 516 }; 517 #endif 518 /* 519 * MPSAFE 520 */ 521 int 522 sys_munmap(td, uap) 523 struct thread *td; 524 struct munmap_args *uap; 525 { 526 #ifdef HWPMC_HOOKS 527 struct pmckern_map_out pkm; 528 vm_map_entry_t entry; 529 bool pmc_handled; 530 #endif 531 vm_offset_t addr; 532 vm_size_t size, pageoff; 533 vm_map_t map; 534 535 addr = (vm_offset_t) uap->addr; 536 size = uap->len; 537 if (size == 0) 538 return (EINVAL); 539 540 pageoff = (addr & PAGE_MASK); 541 addr -= pageoff; 542 size += pageoff; 543 size = (vm_size_t) round_page(size); 544 if (addr + size < addr) 545 return (EINVAL); 546 547 /* 548 * Check for illegal addresses. Watch out for address wrap... 549 */ 550 map = &td->td_proc->p_vmspace->vm_map; 551 if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 552 return (EINVAL); 553 vm_map_lock(map); 554 #ifdef HWPMC_HOOKS 555 pmc_handled = false; 556 if (PMC_HOOK_INSTALLED(PMC_FN_MUNMAP)) { 557 pmc_handled = true; 558 /* 559 * Inform hwpmc if the address range being unmapped contains 560 * an executable region. 561 */ 562 pkm.pm_address = (uintptr_t) NULL; 563 if (vm_map_lookup_entry(map, addr, &entry)) { 564 for (; 565 entry != &map->header && entry->start < addr + size; 566 entry = entry->next) { 567 if (vm_map_check_protection(map, entry->start, 568 entry->end, VM_PROT_EXECUTE) == TRUE) { 569 pkm.pm_address = (uintptr_t) addr; 570 pkm.pm_size = (size_t) size; 571 break; 572 } 573 } 574 } 575 } 576 #endif 577 vm_map_delete(map, addr, addr + size); 578 579 #ifdef HWPMC_HOOKS 580 if (__predict_false(pmc_handled)) { 581 /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 582 vm_map_lock_downgrade(map); 583 if (pkm.pm_address != (uintptr_t) NULL) 584 PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 585 vm_map_unlock_read(map); 586 } else 587 #endif 588 vm_map_unlock(map); 589 590 /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 591 return (0); 592 } 593 594 #ifndef _SYS_SYSPROTO_H_ 595 struct mprotect_args { 596 const void *addr; 597 size_t len; 598 int prot; 599 }; 600 #endif 601 /* 602 * MPSAFE 603 */ 604 int 605 sys_mprotect(td, uap) 606 struct thread *td; 607 struct mprotect_args *uap; 608 { 609 vm_offset_t addr; 610 vm_size_t size, pageoff; 611 vm_prot_t prot; 612 613 addr = (vm_offset_t) uap->addr; 614 size = uap->len; 615 prot = uap->prot & VM_PROT_ALL; 616 617 pageoff = (addr & PAGE_MASK); 618 addr -= pageoff; 619 size += pageoff; 620 size = (vm_size_t) round_page(size); 621 if (addr + size < addr) 622 return (EINVAL); 623 624 switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 625 addr + size, prot, FALSE)) { 626 case KERN_SUCCESS: 627 return (0); 628 case KERN_PROTECTION_FAILURE: 629 return (EACCES); 630 case KERN_RESOURCE_SHORTAGE: 631 return (ENOMEM); 632 } 633 return (EINVAL); 634 } 635 636 #ifndef _SYS_SYSPROTO_H_ 637 struct minherit_args { 638 void *addr; 639 size_t len; 640 int inherit; 641 }; 642 #endif 643 /* 644 * MPSAFE 645 */ 646 int 647 sys_minherit(td, uap) 648 struct thread *td; 649 struct minherit_args *uap; 650 { 651 vm_offset_t addr; 652 vm_size_t size, pageoff; 653 vm_inherit_t inherit; 654 655 addr = (vm_offset_t)uap->addr; 656 size = uap->len; 657 inherit = uap->inherit; 658 659 pageoff = (addr & PAGE_MASK); 660 addr -= pageoff; 661 size += pageoff; 662 size = (vm_size_t) round_page(size); 663 if (addr + size < addr) 664 return (EINVAL); 665 666 switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 667 addr + size, inherit)) { 668 case KERN_SUCCESS: 669 return (0); 670 case KERN_PROTECTION_FAILURE: 671 return (EACCES); 672 } 673 return (EINVAL); 674 } 675 676 #ifndef _SYS_SYSPROTO_H_ 677 struct madvise_args { 678 void *addr; 679 size_t len; 680 int behav; 681 }; 682 #endif 683 684 /* 685 * MPSAFE 686 */ 687 int 688 sys_madvise(td, uap) 689 struct thread *td; 690 struct madvise_args *uap; 691 { 692 vm_offset_t start, end; 693 vm_map_t map; 694 int flags; 695 696 /* 697 * Check for our special case, advising the swap pager we are 698 * "immortal." 699 */ 700 if (uap->behav == MADV_PROTECT) { 701 flags = PPROT_SET; 702 return (kern_procctl(td, P_PID, td->td_proc->p_pid, 703 PROC_SPROTECT, &flags)); 704 } 705 706 /* 707 * Check for illegal behavior 708 */ 709 if (uap->behav < 0 || uap->behav > MADV_CORE) 710 return (EINVAL); 711 /* 712 * Check for illegal addresses. Watch out for address wrap... Note 713 * that VM_*_ADDRESS are not constants due to casts (argh). 714 */ 715 map = &td->td_proc->p_vmspace->vm_map; 716 if ((vm_offset_t)uap->addr < vm_map_min(map) || 717 (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) 718 return (EINVAL); 719 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 720 return (EINVAL); 721 722 /* 723 * Since this routine is only advisory, we default to conservative 724 * behavior. 725 */ 726 start = trunc_page((vm_offset_t) uap->addr); 727 end = round_page((vm_offset_t) uap->addr + uap->len); 728 729 if (vm_map_madvise(map, start, end, uap->behav)) 730 return (EINVAL); 731 return (0); 732 } 733 734 #ifndef _SYS_SYSPROTO_H_ 735 struct mincore_args { 736 const void *addr; 737 size_t len; 738 char *vec; 739 }; 740 #endif 741 742 /* 743 * MPSAFE 744 */ 745 int 746 sys_mincore(td, uap) 747 struct thread *td; 748 struct mincore_args *uap; 749 { 750 vm_offset_t addr, first_addr; 751 vm_offset_t end, cend; 752 pmap_t pmap; 753 vm_map_t map; 754 char *vec; 755 int error = 0; 756 int vecindex, lastvecindex; 757 vm_map_entry_t current; 758 vm_map_entry_t entry; 759 vm_object_t object; 760 vm_paddr_t locked_pa; 761 vm_page_t m; 762 vm_pindex_t pindex; 763 int mincoreinfo; 764 unsigned int timestamp; 765 boolean_t locked; 766 767 /* 768 * Make sure that the addresses presented are valid for user 769 * mode. 770 */ 771 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 772 end = addr + (vm_size_t)round_page(uap->len); 773 map = &td->td_proc->p_vmspace->vm_map; 774 if (end > vm_map_max(map) || end < addr) 775 return (ENOMEM); 776 777 /* 778 * Address of byte vector 779 */ 780 vec = uap->vec; 781 782 pmap = vmspace_pmap(td->td_proc->p_vmspace); 783 784 vm_map_lock_read(map); 785 RestartScan: 786 timestamp = map->timestamp; 787 788 if (!vm_map_lookup_entry(map, addr, &entry)) { 789 vm_map_unlock_read(map); 790 return (ENOMEM); 791 } 792 793 /* 794 * Do this on a map entry basis so that if the pages are not 795 * in the current processes address space, we can easily look 796 * up the pages elsewhere. 797 */ 798 lastvecindex = -1; 799 for (current = entry; 800 (current != &map->header) && (current->start < end); 801 current = current->next) { 802 803 /* 804 * check for contiguity 805 */ 806 if (current->end < end && 807 (entry->next == &map->header || 808 current->next->start > current->end)) { 809 vm_map_unlock_read(map); 810 return (ENOMEM); 811 } 812 813 /* 814 * ignore submaps (for now) or null objects 815 */ 816 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 817 current->object.vm_object == NULL) 818 continue; 819 820 /* 821 * limit this scan to the current map entry and the 822 * limits for the mincore call 823 */ 824 if (addr < current->start) 825 addr = current->start; 826 cend = current->end; 827 if (cend > end) 828 cend = end; 829 830 /* 831 * scan this entry one page at a time 832 */ 833 while (addr < cend) { 834 /* 835 * Check pmap first, it is likely faster, also 836 * it can provide info as to whether we are the 837 * one referencing or modifying the page. 838 */ 839 object = NULL; 840 locked_pa = 0; 841 retry: 842 m = NULL; 843 mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 844 if (locked_pa != 0) { 845 /* 846 * The page is mapped by this process but not 847 * both accessed and modified. It is also 848 * managed. Acquire the object lock so that 849 * other mappings might be examined. 850 */ 851 m = PHYS_TO_VM_PAGE(locked_pa); 852 if (m->object != object) { 853 if (object != NULL) 854 VM_OBJECT_WUNLOCK(object); 855 object = m->object; 856 locked = VM_OBJECT_TRYWLOCK(object); 857 vm_page_unlock(m); 858 if (!locked) { 859 VM_OBJECT_WLOCK(object); 860 vm_page_lock(m); 861 goto retry; 862 } 863 } else 864 vm_page_unlock(m); 865 KASSERT(m->valid == VM_PAGE_BITS_ALL, 866 ("mincore: page %p is mapped but invalid", 867 m)); 868 } else if (mincoreinfo == 0) { 869 /* 870 * The page is not mapped by this process. If 871 * the object implements managed pages, then 872 * determine if the page is resident so that 873 * the mappings might be examined. 874 */ 875 if (current->object.vm_object != object) { 876 if (object != NULL) 877 VM_OBJECT_WUNLOCK(object); 878 object = current->object.vm_object; 879 VM_OBJECT_WLOCK(object); 880 } 881 if (object->type == OBJT_DEFAULT || 882 object->type == OBJT_SWAP || 883 object->type == OBJT_VNODE) { 884 pindex = OFF_TO_IDX(current->offset + 885 (addr - current->start)); 886 m = vm_page_lookup(object, pindex); 887 if (m != NULL && m->valid == 0) 888 m = NULL; 889 if (m != NULL) 890 mincoreinfo = MINCORE_INCORE; 891 } 892 } 893 if (m != NULL) { 894 /* Examine other mappings to the page. */ 895 if (m->dirty == 0 && pmap_is_modified(m)) 896 vm_page_dirty(m); 897 if (m->dirty != 0) 898 mincoreinfo |= MINCORE_MODIFIED_OTHER; 899 /* 900 * The first test for PGA_REFERENCED is an 901 * optimization. The second test is 902 * required because a concurrent pmap 903 * operation could clear the last reference 904 * and set PGA_REFERENCED before the call to 905 * pmap_is_referenced(). 906 */ 907 if ((m->aflags & PGA_REFERENCED) != 0 || 908 pmap_is_referenced(m) || 909 (m->aflags & PGA_REFERENCED) != 0) 910 mincoreinfo |= MINCORE_REFERENCED_OTHER; 911 } 912 if (object != NULL) 913 VM_OBJECT_WUNLOCK(object); 914 915 /* 916 * subyte may page fault. In case it needs to modify 917 * the map, we release the lock. 918 */ 919 vm_map_unlock_read(map); 920 921 /* 922 * calculate index into user supplied byte vector 923 */ 924 vecindex = OFF_TO_IDX(addr - first_addr); 925 926 /* 927 * If we have skipped map entries, we need to make sure that 928 * the byte vector is zeroed for those skipped entries. 929 */ 930 while ((lastvecindex + 1) < vecindex) { 931 ++lastvecindex; 932 error = subyte(vec + lastvecindex, 0); 933 if (error) { 934 error = EFAULT; 935 goto done2; 936 } 937 } 938 939 /* 940 * Pass the page information to the user 941 */ 942 error = subyte(vec + vecindex, mincoreinfo); 943 if (error) { 944 error = EFAULT; 945 goto done2; 946 } 947 948 /* 949 * If the map has changed, due to the subyte, the previous 950 * output may be invalid. 951 */ 952 vm_map_lock_read(map); 953 if (timestamp != map->timestamp) 954 goto RestartScan; 955 956 lastvecindex = vecindex; 957 addr += PAGE_SIZE; 958 } 959 } 960 961 /* 962 * subyte may page fault. In case it needs to modify 963 * the map, we release the lock. 964 */ 965 vm_map_unlock_read(map); 966 967 /* 968 * Zero the last entries in the byte vector. 969 */ 970 vecindex = OFF_TO_IDX(end - first_addr); 971 while ((lastvecindex + 1) < vecindex) { 972 ++lastvecindex; 973 error = subyte(vec + lastvecindex, 0); 974 if (error) { 975 error = EFAULT; 976 goto done2; 977 } 978 } 979 980 /* 981 * If the map has changed, due to the subyte, the previous 982 * output may be invalid. 983 */ 984 vm_map_lock_read(map); 985 if (timestamp != map->timestamp) 986 goto RestartScan; 987 vm_map_unlock_read(map); 988 done2: 989 return (error); 990 } 991 992 #ifndef _SYS_SYSPROTO_H_ 993 struct mlock_args { 994 const void *addr; 995 size_t len; 996 }; 997 #endif 998 /* 999 * MPSAFE 1000 */ 1001 int 1002 sys_mlock(td, uap) 1003 struct thread *td; 1004 struct mlock_args *uap; 1005 { 1006 1007 return (vm_mlock(td->td_proc, td->td_ucred, uap->addr, uap->len)); 1008 } 1009 1010 int 1011 vm_mlock(struct proc *proc, struct ucred *cred, const void *addr0, size_t len) 1012 { 1013 vm_offset_t addr, end, last, start; 1014 vm_size_t npages, size; 1015 vm_map_t map; 1016 unsigned long nsize; 1017 int error; 1018 1019 error = priv_check_cred(cred, PRIV_VM_MLOCK, 0); 1020 if (error) 1021 return (error); 1022 addr = (vm_offset_t)addr0; 1023 size = len; 1024 last = addr + size; 1025 start = trunc_page(addr); 1026 end = round_page(last); 1027 if (last < addr || end < addr) 1028 return (EINVAL); 1029 npages = atop(end - start); 1030 if (npages > vm_page_max_wired) 1031 return (ENOMEM); 1032 map = &proc->p_vmspace->vm_map; 1033 PROC_LOCK(proc); 1034 nsize = ptoa(npages + pmap_wired_count(map->pmap)); 1035 if (nsize > lim_cur_proc(proc, RLIMIT_MEMLOCK)) { 1036 PROC_UNLOCK(proc); 1037 return (ENOMEM); 1038 } 1039 PROC_UNLOCK(proc); 1040 if (npages + vm_cnt.v_wire_count > vm_page_max_wired) 1041 return (EAGAIN); 1042 #ifdef RACCT 1043 if (racct_enable) { 1044 PROC_LOCK(proc); 1045 error = racct_set(proc, RACCT_MEMLOCK, nsize); 1046 PROC_UNLOCK(proc); 1047 if (error != 0) 1048 return (ENOMEM); 1049 } 1050 #endif 1051 error = vm_map_wire(map, start, end, 1052 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1053 #ifdef RACCT 1054 if (racct_enable && error != KERN_SUCCESS) { 1055 PROC_LOCK(proc); 1056 racct_set(proc, RACCT_MEMLOCK, 1057 ptoa(pmap_wired_count(map->pmap))); 1058 PROC_UNLOCK(proc); 1059 } 1060 #endif 1061 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1062 } 1063 1064 #ifndef _SYS_SYSPROTO_H_ 1065 struct mlockall_args { 1066 int how; 1067 }; 1068 #endif 1069 1070 /* 1071 * MPSAFE 1072 */ 1073 int 1074 sys_mlockall(td, uap) 1075 struct thread *td; 1076 struct mlockall_args *uap; 1077 { 1078 vm_map_t map; 1079 int error; 1080 1081 map = &td->td_proc->p_vmspace->vm_map; 1082 error = priv_check(td, PRIV_VM_MLOCK); 1083 if (error) 1084 return (error); 1085 1086 if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1087 return (EINVAL); 1088 1089 /* 1090 * If wiring all pages in the process would cause it to exceed 1091 * a hard resource limit, return ENOMEM. 1092 */ 1093 if (!old_mlock && uap->how & MCL_CURRENT) { 1094 PROC_LOCK(td->td_proc); 1095 if (map->size > lim_cur(td, RLIMIT_MEMLOCK)) { 1096 PROC_UNLOCK(td->td_proc); 1097 return (ENOMEM); 1098 } 1099 PROC_UNLOCK(td->td_proc); 1100 } 1101 #ifdef RACCT 1102 if (racct_enable) { 1103 PROC_LOCK(td->td_proc); 1104 error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 1105 PROC_UNLOCK(td->td_proc); 1106 if (error != 0) 1107 return (ENOMEM); 1108 } 1109 #endif 1110 1111 if (uap->how & MCL_FUTURE) { 1112 vm_map_lock(map); 1113 vm_map_modflags(map, MAP_WIREFUTURE, 0); 1114 vm_map_unlock(map); 1115 error = 0; 1116 } 1117 1118 if (uap->how & MCL_CURRENT) { 1119 /* 1120 * P1003.1-2001 mandates that all currently mapped pages 1121 * will be memory resident and locked (wired) upon return 1122 * from mlockall(). vm_map_wire() will wire pages, by 1123 * calling vm_fault_wire() for each page in the region. 1124 */ 1125 error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1126 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1127 error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1128 } 1129 #ifdef RACCT 1130 if (racct_enable && error != KERN_SUCCESS) { 1131 PROC_LOCK(td->td_proc); 1132 racct_set(td->td_proc, RACCT_MEMLOCK, 1133 ptoa(pmap_wired_count(map->pmap))); 1134 PROC_UNLOCK(td->td_proc); 1135 } 1136 #endif 1137 1138 return (error); 1139 } 1140 1141 #ifndef _SYS_SYSPROTO_H_ 1142 struct munlockall_args { 1143 register_t dummy; 1144 }; 1145 #endif 1146 1147 /* 1148 * MPSAFE 1149 */ 1150 int 1151 sys_munlockall(td, uap) 1152 struct thread *td; 1153 struct munlockall_args *uap; 1154 { 1155 vm_map_t map; 1156 int error; 1157 1158 map = &td->td_proc->p_vmspace->vm_map; 1159 error = priv_check(td, PRIV_VM_MUNLOCK); 1160 if (error) 1161 return (error); 1162 1163 /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1164 vm_map_lock(map); 1165 vm_map_modflags(map, 0, MAP_WIREFUTURE); 1166 vm_map_unlock(map); 1167 1168 /* Forcibly unwire all pages. */ 1169 error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1170 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1171 #ifdef RACCT 1172 if (racct_enable && error == KERN_SUCCESS) { 1173 PROC_LOCK(td->td_proc); 1174 racct_set(td->td_proc, RACCT_MEMLOCK, 0); 1175 PROC_UNLOCK(td->td_proc); 1176 } 1177 #endif 1178 1179 return (error); 1180 } 1181 1182 #ifndef _SYS_SYSPROTO_H_ 1183 struct munlock_args { 1184 const void *addr; 1185 size_t len; 1186 }; 1187 #endif 1188 /* 1189 * MPSAFE 1190 */ 1191 int 1192 sys_munlock(td, uap) 1193 struct thread *td; 1194 struct munlock_args *uap; 1195 { 1196 vm_offset_t addr, end, last, start; 1197 vm_size_t size; 1198 #ifdef RACCT 1199 vm_map_t map; 1200 #endif 1201 int error; 1202 1203 error = priv_check(td, PRIV_VM_MUNLOCK); 1204 if (error) 1205 return (error); 1206 addr = (vm_offset_t)uap->addr; 1207 size = uap->len; 1208 last = addr + size; 1209 start = trunc_page(addr); 1210 end = round_page(last); 1211 if (last < addr || end < addr) 1212 return (EINVAL); 1213 error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 1214 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1215 #ifdef RACCT 1216 if (racct_enable && error == KERN_SUCCESS) { 1217 PROC_LOCK(td->td_proc); 1218 map = &td->td_proc->p_vmspace->vm_map; 1219 racct_set(td->td_proc, RACCT_MEMLOCK, 1220 ptoa(pmap_wired_count(map->pmap))); 1221 PROC_UNLOCK(td->td_proc); 1222 } 1223 #endif 1224 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1225 } 1226 1227 /* 1228 * vm_mmap_vnode() 1229 * 1230 * Helper function for vm_mmap. Perform sanity check specific for mmap 1231 * operations on vnodes. 1232 */ 1233 int 1234 vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1235 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1236 struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 1237 boolean_t *writecounted) 1238 { 1239 struct vattr va; 1240 vm_object_t obj; 1241 vm_offset_t foff; 1242 struct ucred *cred; 1243 int error, flags, locktype; 1244 1245 cred = td->td_ucred; 1246 if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) 1247 locktype = LK_EXCLUSIVE; 1248 else 1249 locktype = LK_SHARED; 1250 if ((error = vget(vp, locktype, td)) != 0) 1251 return (error); 1252 AUDIT_ARG_VNODE1(vp); 1253 foff = *foffp; 1254 flags = *flagsp; 1255 obj = vp->v_object; 1256 if (vp->v_type == VREG) { 1257 /* 1258 * Get the proper underlying object 1259 */ 1260 if (obj == NULL) { 1261 error = EINVAL; 1262 goto done; 1263 } 1264 if (obj->type == OBJT_VNODE && obj->handle != vp) { 1265 vput(vp); 1266 vp = (struct vnode *)obj->handle; 1267 /* 1268 * Bypass filesystems obey the mpsafety of the 1269 * underlying fs. Tmpfs never bypasses. 1270 */ 1271 error = vget(vp, locktype, td); 1272 if (error != 0) 1273 return (error); 1274 } 1275 if (locktype == LK_EXCLUSIVE) { 1276 *writecounted = TRUE; 1277 vnode_pager_update_writecount(obj, 0, objsize); 1278 } 1279 } else { 1280 error = EINVAL; 1281 goto done; 1282 } 1283 if ((error = VOP_GETATTR(vp, &va, cred))) 1284 goto done; 1285 #ifdef MAC 1286 /* This relies on VM_PROT_* matching PROT_*. */ 1287 error = mac_vnode_check_mmap(cred, vp, (int)prot, flags); 1288 if (error != 0) 1289 goto done; 1290 #endif 1291 if ((flags & MAP_SHARED) != 0) { 1292 if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1293 if (prot & VM_PROT_WRITE) { 1294 error = EPERM; 1295 goto done; 1296 } 1297 *maxprotp &= ~VM_PROT_WRITE; 1298 } 1299 } 1300 /* 1301 * If it is a regular file without any references 1302 * we do not need to sync it. 1303 * Adjust object size to be the size of actual file. 1304 */ 1305 objsize = round_page(va.va_size); 1306 if (va.va_nlink == 0) 1307 flags |= MAP_NOSYNC; 1308 if (obj->type == OBJT_VNODE) { 1309 obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, 1310 cred); 1311 if (obj == NULL) { 1312 error = ENOMEM; 1313 goto done; 1314 } 1315 } else { 1316 KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP, 1317 ("wrong object type")); 1318 VM_OBJECT_WLOCK(obj); 1319 vm_object_reference_locked(obj); 1320 #if VM_NRESERVLEVEL > 0 1321 vm_object_color(obj, 0); 1322 #endif 1323 VM_OBJECT_WUNLOCK(obj); 1324 } 1325 *objp = obj; 1326 *flagsp = flags; 1327 1328 vfs_mark_atime(vp, cred); 1329 1330 done: 1331 if (error != 0 && *writecounted) { 1332 *writecounted = FALSE; 1333 vnode_pager_update_writecount(obj, objsize, 0); 1334 } 1335 vput(vp); 1336 return (error); 1337 } 1338 1339 /* 1340 * vm_mmap_cdev() 1341 * 1342 * MPSAFE 1343 * 1344 * Helper function for vm_mmap. Perform sanity check specific for mmap 1345 * operations on cdevs. 1346 */ 1347 int 1348 vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot, 1349 vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw, 1350 vm_ooffset_t *foff, vm_object_t *objp) 1351 { 1352 vm_object_t obj; 1353 int error, flags; 1354 1355 flags = *flagsp; 1356 1357 if (dsw->d_flags & D_MMAP_ANON) { 1358 *objp = NULL; 1359 *foff = 0; 1360 *maxprotp = VM_PROT_ALL; 1361 *flagsp |= MAP_ANON; 1362 return (0); 1363 } 1364 /* 1365 * cdevs do not provide private mappings of any kind. 1366 */ 1367 if ((*maxprotp & VM_PROT_WRITE) == 0 && 1368 (prot & VM_PROT_WRITE) != 0) 1369 return (EACCES); 1370 if (flags & (MAP_PRIVATE|MAP_COPY)) 1371 return (EINVAL); 1372 /* 1373 * Force device mappings to be shared. 1374 */ 1375 flags |= MAP_SHARED; 1376 #ifdef MAC_XXX 1377 error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot); 1378 if (error != 0) 1379 return (error); 1380 #endif 1381 /* 1382 * First, try d_mmap_single(). If that is not implemented 1383 * (returns ENODEV), fall back to using the device pager. 1384 * Note that d_mmap_single() must return a reference to the 1385 * object (it needs to bump the reference count of the object 1386 * it returns somehow). 1387 * 1388 * XXX assumes VM_PROT_* == PROT_* 1389 */ 1390 error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 1391 if (error != ENODEV) 1392 return (error); 1393 obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 1394 td->td_ucred); 1395 if (obj == NULL) 1396 return (EINVAL); 1397 *objp = obj; 1398 *flagsp = flags; 1399 return (0); 1400 } 1401 1402 /* 1403 * vm_mmap() 1404 * 1405 * Internal version of mmap used by exec, sys5 shared memory, and 1406 * various device drivers. Handle is either a vnode pointer, a 1407 * character device, or NULL for MAP_ANON. 1408 */ 1409 int 1410 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1411 vm_prot_t maxprot, int flags, 1412 objtype_t handle_type, void *handle, 1413 vm_ooffset_t foff) 1414 { 1415 vm_object_t object; 1416 struct thread *td = curthread; 1417 int error; 1418 boolean_t writecounted; 1419 1420 if (size == 0) 1421 return (EINVAL); 1422 1423 size = round_page(size); 1424 object = NULL; 1425 writecounted = FALSE; 1426 1427 /* 1428 * Lookup/allocate object. 1429 */ 1430 switch (handle_type) { 1431 case OBJT_DEVICE: { 1432 struct cdevsw *dsw; 1433 struct cdev *cdev; 1434 int ref; 1435 1436 cdev = handle; 1437 dsw = dev_refthread(cdev, &ref); 1438 if (dsw == NULL) 1439 return (ENXIO); 1440 error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev, 1441 dsw, &foff, &object); 1442 dev_relthread(cdev, ref); 1443 break; 1444 } 1445 case OBJT_VNODE: 1446 error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 1447 handle, &foff, &object, &writecounted); 1448 break; 1449 case OBJT_DEFAULT: 1450 if (handle == NULL) { 1451 error = 0; 1452 break; 1453 } 1454 /* FALLTHROUGH */ 1455 default: 1456 error = EINVAL; 1457 break; 1458 } 1459 if (error) 1460 return (error); 1461 1462 error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 1463 foff, writecounted, td); 1464 if (error != 0 && object != NULL) { 1465 /* 1466 * If this mapping was accounted for in the vnode's 1467 * writecount, then undo that now. 1468 */ 1469 if (writecounted) 1470 vnode_pager_release_writecount(object, 0, size); 1471 vm_object_deallocate(object); 1472 } 1473 return (error); 1474 } 1475 1476 /* 1477 * Internal version of mmap that maps a specific VM object into an 1478 * map. Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap. 1479 */ 1480 int 1481 vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1482 vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff, 1483 boolean_t writecounted, struct thread *td) 1484 { 1485 boolean_t fitit; 1486 int docow, error, findspace, rv; 1487 1488 if (map == &td->td_proc->p_vmspace->vm_map) { 1489 PROC_LOCK(td->td_proc); 1490 if (map->size + size > lim_cur_proc(td->td_proc, RLIMIT_VMEM)) { 1491 PROC_UNLOCK(td->td_proc); 1492 return (ENOMEM); 1493 } 1494 if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 1495 PROC_UNLOCK(td->td_proc); 1496 return (ENOMEM); 1497 } 1498 if (!old_mlock && map->flags & MAP_WIREFUTURE) { 1499 if (ptoa(pmap_wired_count(map->pmap)) + size > 1500 lim_cur_proc(td->td_proc, RLIMIT_MEMLOCK)) { 1501 racct_set_force(td->td_proc, RACCT_VMEM, 1502 map->size); 1503 PROC_UNLOCK(td->td_proc); 1504 return (ENOMEM); 1505 } 1506 error = racct_set(td->td_proc, RACCT_MEMLOCK, 1507 ptoa(pmap_wired_count(map->pmap)) + size); 1508 if (error != 0) { 1509 racct_set_force(td->td_proc, RACCT_VMEM, 1510 map->size); 1511 PROC_UNLOCK(td->td_proc); 1512 return (error); 1513 } 1514 } 1515 PROC_UNLOCK(td->td_proc); 1516 } 1517 1518 /* 1519 * We currently can only deal with page aligned file offsets. 1520 * The mmap() system call already enforces this by subtracting 1521 * the page offset from the file offset, but checking here 1522 * catches errors in device drivers (e.g. d_single_mmap() 1523 * callbacks) and other internal mapping requests (such as in 1524 * exec). 1525 */ 1526 if (foff & PAGE_MASK) 1527 return (EINVAL); 1528 1529 if ((flags & MAP_FIXED) == 0) { 1530 fitit = TRUE; 1531 *addr = round_page(*addr); 1532 } else { 1533 if (*addr != trunc_page(*addr)) 1534 return (EINVAL); 1535 fitit = FALSE; 1536 } 1537 1538 if (flags & MAP_ANON) { 1539 if (object != NULL || foff != 0) 1540 return (EINVAL); 1541 docow = 0; 1542 } else if (flags & MAP_PREFAULT_READ) 1543 docow = MAP_PREFAULT; 1544 else 1545 docow = MAP_PREFAULT_PARTIAL; 1546 1547 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 1548 docow |= MAP_COPY_ON_WRITE; 1549 if (flags & MAP_NOSYNC) 1550 docow |= MAP_DISABLE_SYNCER; 1551 if (flags & MAP_NOCORE) 1552 docow |= MAP_DISABLE_COREDUMP; 1553 /* Shared memory is also shared with children. */ 1554 if (flags & MAP_SHARED) 1555 docow |= MAP_INHERIT_SHARE; 1556 if (writecounted) 1557 docow |= MAP_VN_WRITECOUNT; 1558 if (flags & MAP_STACK) { 1559 if (object != NULL) 1560 return (EINVAL); 1561 docow |= MAP_STACK_GROWS_DOWN; 1562 } 1563 if ((flags & MAP_EXCL) != 0) 1564 docow |= MAP_CHECK_EXCL; 1565 1566 if (fitit) { 1567 if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER) 1568 findspace = VMFS_SUPER_SPACE; 1569 else if ((flags & MAP_ALIGNMENT_MASK) != 0) 1570 findspace = VMFS_ALIGNED_SPACE(flags >> 1571 MAP_ALIGNMENT_SHIFT); 1572 else 1573 findspace = VMFS_OPTIMAL_SPACE; 1574 rv = vm_map_find(map, object, foff, addr, size, 1575 #ifdef MAP_32BIT 1576 flags & MAP_32BIT ? MAP_32BIT_MAX_ADDR : 1577 #endif 1578 0, findspace, prot, maxprot, docow); 1579 } else { 1580 rv = vm_map_fixed(map, object, foff, *addr, size, 1581 prot, maxprot, docow); 1582 } 1583 1584 if (rv == KERN_SUCCESS) { 1585 /* 1586 * If the process has requested that all future mappings 1587 * be wired, then heed this. 1588 */ 1589 if (map->flags & MAP_WIREFUTURE) { 1590 vm_map_wire(map, *addr, *addr + size, 1591 VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? 1592 VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); 1593 } 1594 } 1595 return (vm_mmap_to_errno(rv)); 1596 } 1597 1598 /* 1599 * Translate a Mach VM return code to zero on success or the appropriate errno 1600 * on failure. 1601 */ 1602 int 1603 vm_mmap_to_errno(int rv) 1604 { 1605 1606 switch (rv) { 1607 case KERN_SUCCESS: 1608 return (0); 1609 case KERN_INVALID_ADDRESS: 1610 case KERN_NO_SPACE: 1611 return (ENOMEM); 1612 case KERN_PROTECTION_FAILURE: 1613 return (EACCES); 1614 default: 1615 return (EINVAL); 1616 } 1617 } 1618