1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1988 University of Utah. 5 * Copyright (c) 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * the Systems Programming Group of the University of Utah Computer 10 * Science Department. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 37 * 38 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 39 */ 40 41 /* 42 * Mapped file (mmap) interface to VM 43 */ 44 45 #include <sys/cdefs.h> 46 __FBSDID("$FreeBSD$"); 47 48 #include "opt_compat.h" 49 #include "opt_hwpmc_hooks.h" 50 #include "opt_vm.h" 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/capsicum.h> 55 #include <sys/kernel.h> 56 #include <sys/lock.h> 57 #include <sys/mutex.h> 58 #include <sys/sysproto.h> 59 #include <sys/filedesc.h> 60 #include <sys/priv.h> 61 #include <sys/proc.h> 62 #include <sys/procctl.h> 63 #include <sys/racct.h> 64 #include <sys/resource.h> 65 #include <sys/resourcevar.h> 66 #include <sys/rwlock.h> 67 #include <sys/sysctl.h> 68 #include <sys/vnode.h> 69 #include <sys/fcntl.h> 70 #include <sys/file.h> 71 #include <sys/mman.h> 72 #include <sys/mount.h> 73 #include <sys/conf.h> 74 #include <sys/stat.h> 75 #include <sys/syscallsubr.h> 76 #include <sys/sysent.h> 77 #include <sys/vmmeter.h> 78 79 #include <security/audit/audit.h> 80 #include <security/mac/mac_framework.h> 81 82 #include <vm/vm.h> 83 #include <vm/vm_param.h> 84 #include <vm/pmap.h> 85 #include <vm/vm_map.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/vm_pager.h> 89 #include <vm/vm_pageout.h> 90 #include <vm/vm_extern.h> 91 #include <vm/vm_page.h> 92 #include <vm/vnode_pager.h> 93 94 #ifdef HWPMC_HOOKS 95 #include <sys/pmckern.h> 96 #endif 97 98 int old_mlock = 0; 99 SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0, 100 "Do not apply RLIMIT_MEMLOCK on mlockall"); 101 102 #ifdef MAP_32BIT 103 #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) 104 #endif 105 106 #ifndef _SYS_SYSPROTO_H_ 107 struct sbrk_args { 108 int incr; 109 }; 110 #endif 111 112 int 113 sys_sbrk(struct thread *td, struct sbrk_args *uap) 114 { 115 /* Not yet implemented */ 116 return (EOPNOTSUPP); 117 } 118 119 #ifndef _SYS_SYSPROTO_H_ 120 struct sstk_args { 121 int incr; 122 }; 123 #endif 124 125 int 126 sys_sstk(struct thread *td, struct sstk_args *uap) 127 { 128 /* Not yet implemented */ 129 return (EOPNOTSUPP); 130 } 131 132 #if defined(COMPAT_43) 133 #ifndef _SYS_SYSPROTO_H_ 134 struct getpagesize_args { 135 int dummy; 136 }; 137 #endif 138 139 int 140 ogetpagesize(struct thread *td, struct getpagesize_args *uap) 141 { 142 143 td->td_retval[0] = PAGE_SIZE; 144 return (0); 145 } 146 #endif /* COMPAT_43 */ 147 148 149 /* 150 * Memory Map (mmap) system call. Note that the file offset 151 * and address are allowed to be NOT page aligned, though if 152 * the MAP_FIXED flag it set, both must have the same remainder 153 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 154 * page-aligned, the actual mapping starts at trunc_page(addr) 155 * and the return value is adjusted up by the page offset. 156 * 157 * Generally speaking, only character devices which are themselves 158 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 159 * there would be no cache coherency between a descriptor and a VM mapping 160 * both to the same character device. 161 */ 162 #ifndef _SYS_SYSPROTO_H_ 163 struct mmap_args { 164 void *addr; 165 size_t len; 166 int prot; 167 int flags; 168 int fd; 169 long pad; 170 off_t pos; 171 }; 172 #endif 173 174 int 175 sys_mmap(struct thread *td, struct mmap_args *uap) 176 { 177 178 return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot, 179 uap->flags, uap->fd, uap->pos)); 180 } 181 182 int 183 kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags, 184 int fd, off_t pos) 185 { 186 struct vmspace *vms; 187 struct file *fp; 188 vm_offset_t addr; 189 vm_size_t pageoff; 190 vm_prot_t cap_maxprot; 191 int align, error; 192 cap_rights_t rights; 193 194 vms = td->td_proc->p_vmspace; 195 fp = NULL; 196 AUDIT_ARG_FD(fd); 197 addr = addr0; 198 199 /* 200 * Ignore old flags that used to be defined but did not do anything. 201 */ 202 flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040); 203 204 /* 205 * Enforce the constraints. 206 * Mapping of length 0 is only allowed for old binaries. 207 * Anonymous mapping shall specify -1 as filedescriptor and 208 * zero position for new code. Be nice to ancient a.out 209 * binaries and correct pos for anonymous mapping, since old 210 * ld.so sometimes issues anonymous map requests with non-zero 211 * pos. 212 */ 213 if (!SV_CURPROC_FLAG(SV_AOUT)) { 214 if ((size == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) || 215 ((flags & MAP_ANON) != 0 && (fd != -1 || pos != 0))) 216 return (EINVAL); 217 } else { 218 if ((flags & MAP_ANON) != 0) 219 pos = 0; 220 } 221 222 if (flags & MAP_STACK) { 223 if ((fd != -1) || 224 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 225 return (EINVAL); 226 flags |= MAP_ANON; 227 pos = 0; 228 } 229 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE | 230 MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE | 231 MAP_PREFAULT_READ | MAP_GUARD | 232 #ifdef MAP_32BIT 233 MAP_32BIT | 234 #endif 235 MAP_ALIGNMENT_MASK)) != 0) 236 return (EINVAL); 237 if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL) 238 return (EINVAL); 239 if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED | MAP_PRIVATE)) 240 return (EINVAL); 241 if (prot != PROT_NONE && 242 (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) 243 return (EINVAL); 244 if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 || 245 pos != 0 || (flags & (MAP_SHARED | MAP_PRIVATE | MAP_PREFAULT | 246 MAP_PREFAULT_READ | MAP_ANON | MAP_STACK)) != 0)) 247 return (EINVAL); 248 249 /* 250 * Align the file position to a page boundary, 251 * and save its page offset component. 252 */ 253 pageoff = (pos & PAGE_MASK); 254 pos -= pageoff; 255 256 /* Adjust size for rounding (on both ends). */ 257 size += pageoff; /* low end... */ 258 size = (vm_size_t) round_page(size); /* hi end */ 259 260 /* Ensure alignment is at least a page and fits in a pointer. */ 261 align = flags & MAP_ALIGNMENT_MASK; 262 if (align != 0 && align != MAP_ALIGNED_SUPER && 263 (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY || 264 align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT)) 265 return (EINVAL); 266 267 /* 268 * Check for illegal addresses. Watch out for address wrap... Note 269 * that VM_*_ADDRESS are not constants due to casts (argh). 270 */ 271 if (flags & MAP_FIXED) { 272 /* 273 * The specified address must have the same remainder 274 * as the file offset taken modulo PAGE_SIZE, so it 275 * should be aligned after adjustment by pageoff. 276 */ 277 addr -= pageoff; 278 if (addr & PAGE_MASK) 279 return (EINVAL); 280 281 /* Address range must be all in user VM space. */ 282 if (addr < vm_map_min(&vms->vm_map) || 283 addr + size > vm_map_max(&vms->vm_map)) 284 return (EINVAL); 285 if (addr + size < addr) 286 return (EINVAL); 287 #ifdef MAP_32BIT 288 if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR) 289 return (EINVAL); 290 } else if (flags & MAP_32BIT) { 291 /* 292 * For MAP_32BIT, override the hint if it is too high and 293 * do not bother moving the mapping past the heap (since 294 * the heap is usually above 2GB). 295 */ 296 if (addr + size > MAP_32BIT_MAX_ADDR) 297 addr = 0; 298 #endif 299 } else { 300 /* 301 * XXX for non-fixed mappings where no hint is provided or 302 * the hint would fall in the potential heap space, 303 * place it after the end of the largest possible heap. 304 * 305 * There should really be a pmap call to determine a reasonable 306 * location. 307 */ 308 if (addr == 0 || 309 (addr >= round_page((vm_offset_t)vms->vm_taddr) && 310 addr < round_page((vm_offset_t)vms->vm_daddr + 311 lim_max(td, RLIMIT_DATA)))) 312 addr = round_page((vm_offset_t)vms->vm_daddr + 313 lim_max(td, RLIMIT_DATA)); 314 } 315 if (size == 0) { 316 /* 317 * Return success without mapping anything for old 318 * binaries that request a page-aligned mapping of 319 * length 0. For modern binaries, this function 320 * returns an error earlier. 321 */ 322 error = 0; 323 } else if ((flags & MAP_GUARD) != 0) { 324 error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE, 325 VM_PROT_NONE, flags, NULL, pos, FALSE, td); 326 } else if ((flags & MAP_ANON) != 0) { 327 /* 328 * Mapping blank space is trivial. 329 * 330 * This relies on VM_PROT_* matching PROT_*. 331 */ 332 error = vm_mmap_object(&vms->vm_map, &addr, size, prot, 333 VM_PROT_ALL, flags, NULL, pos, FALSE, td); 334 } else { 335 /* 336 * Mapping file, get fp for validation and don't let the 337 * descriptor disappear on us if we block. Check capability 338 * rights, but also return the maximum rights to be combined 339 * with maxprot later. 340 */ 341 cap_rights_init(&rights, CAP_MMAP); 342 if (prot & PROT_READ) 343 cap_rights_set(&rights, CAP_MMAP_R); 344 if ((flags & MAP_SHARED) != 0) { 345 if (prot & PROT_WRITE) 346 cap_rights_set(&rights, CAP_MMAP_W); 347 } 348 if (prot & PROT_EXEC) 349 cap_rights_set(&rights, CAP_MMAP_X); 350 error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp); 351 if (error != 0) 352 goto done; 353 if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 && 354 td->td_proc->p_osrel >= P_OSREL_MAP_FSTRICT) { 355 error = EINVAL; 356 goto done; 357 } 358 359 /* This relies on VM_PROT_* matching PROT_*. */ 360 error = fo_mmap(fp, &vms->vm_map, &addr, size, prot, 361 cap_maxprot, flags, pos, td); 362 } 363 364 if (error == 0) 365 td->td_retval[0] = (register_t) (addr + pageoff); 366 done: 367 if (fp) 368 fdrop(fp, td); 369 370 return (error); 371 } 372 373 #if defined(COMPAT_FREEBSD6) 374 int 375 freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 376 { 377 378 return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot, 379 uap->flags, uap->fd, uap->pos)); 380 } 381 #endif 382 383 #ifdef COMPAT_43 384 #ifndef _SYS_SYSPROTO_H_ 385 struct ommap_args { 386 caddr_t addr; 387 int len; 388 int prot; 389 int flags; 390 int fd; 391 long pos; 392 }; 393 #endif 394 int 395 ommap(struct thread *td, struct ommap_args *uap) 396 { 397 static const char cvtbsdprot[8] = { 398 0, 399 PROT_EXEC, 400 PROT_WRITE, 401 PROT_EXEC | PROT_WRITE, 402 PROT_READ, 403 PROT_EXEC | PROT_READ, 404 PROT_WRITE | PROT_READ, 405 PROT_EXEC | PROT_WRITE | PROT_READ, 406 }; 407 int flags, prot; 408 409 #define OMAP_ANON 0x0002 410 #define OMAP_COPY 0x0020 411 #define OMAP_SHARED 0x0010 412 #define OMAP_FIXED 0x0100 413 414 prot = cvtbsdprot[uap->prot & 0x7]; 415 #ifdef COMPAT_FREEBSD32 416 #if defined(__amd64__) 417 if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && 418 prot != 0) 419 prot |= PROT_EXEC; 420 #endif 421 #endif 422 flags = 0; 423 if (uap->flags & OMAP_ANON) 424 flags |= MAP_ANON; 425 if (uap->flags & OMAP_COPY) 426 flags |= MAP_COPY; 427 if (uap->flags & OMAP_SHARED) 428 flags |= MAP_SHARED; 429 else 430 flags |= MAP_PRIVATE; 431 if (uap->flags & OMAP_FIXED) 432 flags |= MAP_FIXED; 433 return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, flags, 434 uap->fd, uap->pos)); 435 } 436 #endif /* COMPAT_43 */ 437 438 439 #ifndef _SYS_SYSPROTO_H_ 440 struct msync_args { 441 void *addr; 442 size_t len; 443 int flags; 444 }; 445 #endif 446 int 447 sys_msync(struct thread *td, struct msync_args *uap) 448 { 449 450 return (kern_msync(td, (uintptr_t)uap->addr, uap->len, uap->flags)); 451 } 452 453 int 454 kern_msync(struct thread *td, uintptr_t addr0, size_t size, int flags) 455 { 456 vm_offset_t addr; 457 vm_size_t pageoff; 458 vm_map_t map; 459 int rv; 460 461 addr = addr0; 462 pageoff = (addr & PAGE_MASK); 463 addr -= pageoff; 464 size += pageoff; 465 size = (vm_size_t) round_page(size); 466 if (addr + size < addr) 467 return (EINVAL); 468 469 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 470 return (EINVAL); 471 472 map = &td->td_proc->p_vmspace->vm_map; 473 474 /* 475 * Clean the pages and interpret the return value. 476 */ 477 rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 478 (flags & MS_INVALIDATE) != 0); 479 switch (rv) { 480 case KERN_SUCCESS: 481 return (0); 482 case KERN_INVALID_ADDRESS: 483 return (ENOMEM); 484 case KERN_INVALID_ARGUMENT: 485 return (EBUSY); 486 case KERN_FAILURE: 487 return (EIO); 488 default: 489 return (EINVAL); 490 } 491 } 492 493 #ifndef _SYS_SYSPROTO_H_ 494 struct munmap_args { 495 void *addr; 496 size_t len; 497 }; 498 #endif 499 int 500 sys_munmap(struct thread *td, struct munmap_args *uap) 501 { 502 503 return (kern_munmap(td, (uintptr_t)uap->addr, uap->len)); 504 } 505 506 int 507 kern_munmap(struct thread *td, uintptr_t addr0, size_t size) 508 { 509 #ifdef HWPMC_HOOKS 510 struct pmckern_map_out pkm; 511 vm_map_entry_t entry; 512 bool pmc_handled; 513 #endif 514 vm_offset_t addr; 515 vm_size_t pageoff; 516 vm_map_t map; 517 518 if (size == 0) 519 return (EINVAL); 520 521 addr = addr0; 522 pageoff = (addr & PAGE_MASK); 523 addr -= pageoff; 524 size += pageoff; 525 size = (vm_size_t) round_page(size); 526 if (addr + size < addr) 527 return (EINVAL); 528 529 /* 530 * Check for illegal addresses. Watch out for address wrap... 531 */ 532 map = &td->td_proc->p_vmspace->vm_map; 533 if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 534 return (EINVAL); 535 vm_map_lock(map); 536 #ifdef HWPMC_HOOKS 537 pmc_handled = false; 538 if (PMC_HOOK_INSTALLED(PMC_FN_MUNMAP)) { 539 pmc_handled = true; 540 /* 541 * Inform hwpmc if the address range being unmapped contains 542 * an executable region. 543 */ 544 pkm.pm_address = (uintptr_t) NULL; 545 if (vm_map_lookup_entry(map, addr, &entry)) { 546 for (; 547 entry != &map->header && entry->start < addr + size; 548 entry = entry->next) { 549 if (vm_map_check_protection(map, entry->start, 550 entry->end, VM_PROT_EXECUTE) == TRUE) { 551 pkm.pm_address = (uintptr_t) addr; 552 pkm.pm_size = (size_t) size; 553 break; 554 } 555 } 556 } 557 } 558 #endif 559 vm_map_delete(map, addr, addr + size); 560 561 #ifdef HWPMC_HOOKS 562 if (__predict_false(pmc_handled)) { 563 /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 564 vm_map_lock_downgrade(map); 565 if (pkm.pm_address != (uintptr_t) NULL) 566 PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 567 vm_map_unlock_read(map); 568 } else 569 #endif 570 vm_map_unlock(map); 571 572 /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 573 return (0); 574 } 575 576 #ifndef _SYS_SYSPROTO_H_ 577 struct mprotect_args { 578 const void *addr; 579 size_t len; 580 int prot; 581 }; 582 #endif 583 int 584 sys_mprotect(struct thread *td, struct mprotect_args *uap) 585 { 586 587 return (kern_mprotect(td, (uintptr_t)uap->addr, uap->len, uap->prot)); 588 } 589 590 int 591 kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot) 592 { 593 vm_offset_t addr; 594 vm_size_t pageoff; 595 596 addr = addr0; 597 prot = (prot & VM_PROT_ALL); 598 pageoff = (addr & PAGE_MASK); 599 addr -= pageoff; 600 size += pageoff; 601 size = (vm_size_t) round_page(size); 602 if (addr + size < addr) 603 return (EINVAL); 604 605 switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 606 addr + size, prot, FALSE)) { 607 case KERN_SUCCESS: 608 return (0); 609 case KERN_PROTECTION_FAILURE: 610 return (EACCES); 611 case KERN_RESOURCE_SHORTAGE: 612 return (ENOMEM); 613 } 614 return (EINVAL); 615 } 616 617 #ifndef _SYS_SYSPROTO_H_ 618 struct minherit_args { 619 void *addr; 620 size_t len; 621 int inherit; 622 }; 623 #endif 624 int 625 sys_minherit(struct thread *td, struct minherit_args *uap) 626 { 627 vm_offset_t addr; 628 vm_size_t size, pageoff; 629 vm_inherit_t inherit; 630 631 addr = (vm_offset_t)uap->addr; 632 size = uap->len; 633 inherit = uap->inherit; 634 635 pageoff = (addr & PAGE_MASK); 636 addr -= pageoff; 637 size += pageoff; 638 size = (vm_size_t) round_page(size); 639 if (addr + size < addr) 640 return (EINVAL); 641 642 switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 643 addr + size, inherit)) { 644 case KERN_SUCCESS: 645 return (0); 646 case KERN_PROTECTION_FAILURE: 647 return (EACCES); 648 } 649 return (EINVAL); 650 } 651 652 #ifndef _SYS_SYSPROTO_H_ 653 struct madvise_args { 654 void *addr; 655 size_t len; 656 int behav; 657 }; 658 #endif 659 660 int 661 sys_madvise(struct thread *td, struct madvise_args *uap) 662 { 663 664 return (kern_madvise(td, (uintptr_t)uap->addr, uap->len, uap->behav)); 665 } 666 667 int 668 kern_madvise(struct thread *td, uintptr_t addr0, size_t len, int behav) 669 { 670 vm_map_t map; 671 vm_offset_t addr, end, start; 672 int flags; 673 674 /* 675 * Check for our special case, advising the swap pager we are 676 * "immortal." 677 */ 678 if (behav == MADV_PROTECT) { 679 flags = PPROT_SET; 680 return (kern_procctl(td, P_PID, td->td_proc->p_pid, 681 PROC_SPROTECT, &flags)); 682 } 683 684 /* 685 * Check for illegal behavior 686 */ 687 if (behav < 0 || behav > MADV_CORE) 688 return (EINVAL); 689 /* 690 * Check for illegal addresses. Watch out for address wrap... Note 691 * that VM_*_ADDRESS are not constants due to casts (argh). 692 */ 693 map = &td->td_proc->p_vmspace->vm_map; 694 addr = addr0; 695 if (addr < vm_map_min(map) || addr + len > vm_map_max(map)) 696 return (EINVAL); 697 if ((addr + len) < addr) 698 return (EINVAL); 699 700 /* 701 * Since this routine is only advisory, we default to conservative 702 * behavior. 703 */ 704 start = trunc_page(addr); 705 end = round_page(addr + len); 706 707 if (vm_map_madvise(map, start, end, behav)) 708 return (EINVAL); 709 return (0); 710 } 711 712 #ifndef _SYS_SYSPROTO_H_ 713 struct mincore_args { 714 const void *addr; 715 size_t len; 716 char *vec; 717 }; 718 #endif 719 720 int 721 sys_mincore(struct thread *td, struct mincore_args *uap) 722 { 723 724 return (kern_mincore(td, (uintptr_t)uap->addr, uap->len, uap->vec)); 725 } 726 727 int 728 kern_mincore(struct thread *td, uintptr_t addr0, size_t len, char *vec) 729 { 730 vm_offset_t addr, first_addr; 731 vm_offset_t end, cend; 732 pmap_t pmap; 733 vm_map_t map; 734 int error = 0; 735 int vecindex, lastvecindex; 736 vm_map_entry_t current; 737 vm_map_entry_t entry; 738 vm_object_t object; 739 vm_paddr_t locked_pa; 740 vm_page_t m; 741 vm_pindex_t pindex; 742 int mincoreinfo; 743 unsigned int timestamp; 744 boolean_t locked; 745 746 /* 747 * Make sure that the addresses presented are valid for user 748 * mode. 749 */ 750 first_addr = addr = trunc_page(addr0); 751 end = addr + (vm_size_t)round_page(len); 752 map = &td->td_proc->p_vmspace->vm_map; 753 if (end > vm_map_max(map) || end < addr) 754 return (ENOMEM); 755 756 pmap = vmspace_pmap(td->td_proc->p_vmspace); 757 758 vm_map_lock_read(map); 759 RestartScan: 760 timestamp = map->timestamp; 761 762 if (!vm_map_lookup_entry(map, addr, &entry)) { 763 vm_map_unlock_read(map); 764 return (ENOMEM); 765 } 766 767 /* 768 * Do this on a map entry basis so that if the pages are not 769 * in the current processes address space, we can easily look 770 * up the pages elsewhere. 771 */ 772 lastvecindex = -1; 773 for (current = entry; 774 (current != &map->header) && (current->start < end); 775 current = current->next) { 776 777 /* 778 * check for contiguity 779 */ 780 if (current->end < end && 781 (entry->next == &map->header || 782 current->next->start > current->end)) { 783 vm_map_unlock_read(map); 784 return (ENOMEM); 785 } 786 787 /* 788 * ignore submaps (for now) or null objects 789 */ 790 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 791 current->object.vm_object == NULL) 792 continue; 793 794 /* 795 * limit this scan to the current map entry and the 796 * limits for the mincore call 797 */ 798 if (addr < current->start) 799 addr = current->start; 800 cend = current->end; 801 if (cend > end) 802 cend = end; 803 804 /* 805 * scan this entry one page at a time 806 */ 807 while (addr < cend) { 808 /* 809 * Check pmap first, it is likely faster, also 810 * it can provide info as to whether we are the 811 * one referencing or modifying the page. 812 */ 813 object = NULL; 814 locked_pa = 0; 815 retry: 816 m = NULL; 817 mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 818 if (locked_pa != 0) { 819 /* 820 * The page is mapped by this process but not 821 * both accessed and modified. It is also 822 * managed. Acquire the object lock so that 823 * other mappings might be examined. 824 */ 825 m = PHYS_TO_VM_PAGE(locked_pa); 826 if (m->object != object) { 827 if (object != NULL) 828 VM_OBJECT_WUNLOCK(object); 829 object = m->object; 830 locked = VM_OBJECT_TRYWLOCK(object); 831 vm_page_unlock(m); 832 if (!locked) { 833 VM_OBJECT_WLOCK(object); 834 vm_page_lock(m); 835 goto retry; 836 } 837 } else 838 vm_page_unlock(m); 839 KASSERT(m->valid == VM_PAGE_BITS_ALL, 840 ("mincore: page %p is mapped but invalid", 841 m)); 842 } else if (mincoreinfo == 0) { 843 /* 844 * The page is not mapped by this process. If 845 * the object implements managed pages, then 846 * determine if the page is resident so that 847 * the mappings might be examined. 848 */ 849 if (current->object.vm_object != object) { 850 if (object != NULL) 851 VM_OBJECT_WUNLOCK(object); 852 object = current->object.vm_object; 853 VM_OBJECT_WLOCK(object); 854 } 855 if (object->type == OBJT_DEFAULT || 856 object->type == OBJT_SWAP || 857 object->type == OBJT_VNODE) { 858 pindex = OFF_TO_IDX(current->offset + 859 (addr - current->start)); 860 m = vm_page_lookup(object, pindex); 861 if (m != NULL && m->valid == 0) 862 m = NULL; 863 if (m != NULL) 864 mincoreinfo = MINCORE_INCORE; 865 } 866 } 867 if (m != NULL) { 868 /* Examine other mappings to the page. */ 869 if (m->dirty == 0 && pmap_is_modified(m)) 870 vm_page_dirty(m); 871 if (m->dirty != 0) 872 mincoreinfo |= MINCORE_MODIFIED_OTHER; 873 /* 874 * The first test for PGA_REFERENCED is an 875 * optimization. The second test is 876 * required because a concurrent pmap 877 * operation could clear the last reference 878 * and set PGA_REFERENCED before the call to 879 * pmap_is_referenced(). 880 */ 881 if ((m->aflags & PGA_REFERENCED) != 0 || 882 pmap_is_referenced(m) || 883 (m->aflags & PGA_REFERENCED) != 0) 884 mincoreinfo |= MINCORE_REFERENCED_OTHER; 885 } 886 if (object != NULL) 887 VM_OBJECT_WUNLOCK(object); 888 889 /* 890 * subyte may page fault. In case it needs to modify 891 * the map, we release the lock. 892 */ 893 vm_map_unlock_read(map); 894 895 /* 896 * calculate index into user supplied byte vector 897 */ 898 vecindex = atop(addr - first_addr); 899 900 /* 901 * If we have skipped map entries, we need to make sure that 902 * the byte vector is zeroed for those skipped entries. 903 */ 904 while ((lastvecindex + 1) < vecindex) { 905 ++lastvecindex; 906 error = subyte(vec + lastvecindex, 0); 907 if (error) { 908 error = EFAULT; 909 goto done2; 910 } 911 } 912 913 /* 914 * Pass the page information to the user 915 */ 916 error = subyte(vec + vecindex, mincoreinfo); 917 if (error) { 918 error = EFAULT; 919 goto done2; 920 } 921 922 /* 923 * If the map has changed, due to the subyte, the previous 924 * output may be invalid. 925 */ 926 vm_map_lock_read(map); 927 if (timestamp != map->timestamp) 928 goto RestartScan; 929 930 lastvecindex = vecindex; 931 addr += PAGE_SIZE; 932 } 933 } 934 935 /* 936 * subyte may page fault. In case it needs to modify 937 * the map, we release the lock. 938 */ 939 vm_map_unlock_read(map); 940 941 /* 942 * Zero the last entries in the byte vector. 943 */ 944 vecindex = atop(end - first_addr); 945 while ((lastvecindex + 1) < vecindex) { 946 ++lastvecindex; 947 error = subyte(vec + lastvecindex, 0); 948 if (error) { 949 error = EFAULT; 950 goto done2; 951 } 952 } 953 954 /* 955 * If the map has changed, due to the subyte, the previous 956 * output may be invalid. 957 */ 958 vm_map_lock_read(map); 959 if (timestamp != map->timestamp) 960 goto RestartScan; 961 vm_map_unlock_read(map); 962 done2: 963 return (error); 964 } 965 966 #ifndef _SYS_SYSPROTO_H_ 967 struct mlock_args { 968 const void *addr; 969 size_t len; 970 }; 971 #endif 972 int 973 sys_mlock(struct thread *td, struct mlock_args *uap) 974 { 975 976 return (kern_mlock(td->td_proc, td->td_ucred, 977 __DECONST(uintptr_t, uap->addr), uap->len)); 978 } 979 980 int 981 kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len) 982 { 983 vm_offset_t addr, end, last, start; 984 vm_size_t npages, size; 985 vm_map_t map; 986 unsigned long nsize; 987 int error; 988 989 error = priv_check_cred(cred, PRIV_VM_MLOCK, 0); 990 if (error) 991 return (error); 992 addr = addr0; 993 size = len; 994 last = addr + size; 995 start = trunc_page(addr); 996 end = round_page(last); 997 if (last < addr || end < addr) 998 return (EINVAL); 999 npages = atop(end - start); 1000 if (npages > vm_page_max_wired) 1001 return (ENOMEM); 1002 map = &proc->p_vmspace->vm_map; 1003 PROC_LOCK(proc); 1004 nsize = ptoa(npages + pmap_wired_count(map->pmap)); 1005 if (nsize > lim_cur_proc(proc, RLIMIT_MEMLOCK)) { 1006 PROC_UNLOCK(proc); 1007 return (ENOMEM); 1008 } 1009 PROC_UNLOCK(proc); 1010 if (npages + vm_cnt.v_wire_count > vm_page_max_wired) 1011 return (EAGAIN); 1012 #ifdef RACCT 1013 if (racct_enable) { 1014 PROC_LOCK(proc); 1015 error = racct_set(proc, RACCT_MEMLOCK, nsize); 1016 PROC_UNLOCK(proc); 1017 if (error != 0) 1018 return (ENOMEM); 1019 } 1020 #endif 1021 error = vm_map_wire(map, start, end, 1022 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1023 #ifdef RACCT 1024 if (racct_enable && error != KERN_SUCCESS) { 1025 PROC_LOCK(proc); 1026 racct_set(proc, RACCT_MEMLOCK, 1027 ptoa(pmap_wired_count(map->pmap))); 1028 PROC_UNLOCK(proc); 1029 } 1030 #endif 1031 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1032 } 1033 1034 #ifndef _SYS_SYSPROTO_H_ 1035 struct mlockall_args { 1036 int how; 1037 }; 1038 #endif 1039 1040 int 1041 sys_mlockall(struct thread *td, struct mlockall_args *uap) 1042 { 1043 vm_map_t map; 1044 int error; 1045 1046 map = &td->td_proc->p_vmspace->vm_map; 1047 error = priv_check(td, PRIV_VM_MLOCK); 1048 if (error) 1049 return (error); 1050 1051 if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1052 return (EINVAL); 1053 1054 /* 1055 * If wiring all pages in the process would cause it to exceed 1056 * a hard resource limit, return ENOMEM. 1057 */ 1058 if (!old_mlock && uap->how & MCL_CURRENT) { 1059 PROC_LOCK(td->td_proc); 1060 if (map->size > lim_cur(td, RLIMIT_MEMLOCK)) { 1061 PROC_UNLOCK(td->td_proc); 1062 return (ENOMEM); 1063 } 1064 PROC_UNLOCK(td->td_proc); 1065 } 1066 #ifdef RACCT 1067 if (racct_enable) { 1068 PROC_LOCK(td->td_proc); 1069 error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 1070 PROC_UNLOCK(td->td_proc); 1071 if (error != 0) 1072 return (ENOMEM); 1073 } 1074 #endif 1075 1076 if (uap->how & MCL_FUTURE) { 1077 vm_map_lock(map); 1078 vm_map_modflags(map, MAP_WIREFUTURE, 0); 1079 vm_map_unlock(map); 1080 error = 0; 1081 } 1082 1083 if (uap->how & MCL_CURRENT) { 1084 /* 1085 * P1003.1-2001 mandates that all currently mapped pages 1086 * will be memory resident and locked (wired) upon return 1087 * from mlockall(). vm_map_wire() will wire pages, by 1088 * calling vm_fault_wire() for each page in the region. 1089 */ 1090 error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1091 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1092 error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1093 } 1094 #ifdef RACCT 1095 if (racct_enable && error != KERN_SUCCESS) { 1096 PROC_LOCK(td->td_proc); 1097 racct_set(td->td_proc, RACCT_MEMLOCK, 1098 ptoa(pmap_wired_count(map->pmap))); 1099 PROC_UNLOCK(td->td_proc); 1100 } 1101 #endif 1102 1103 return (error); 1104 } 1105 1106 #ifndef _SYS_SYSPROTO_H_ 1107 struct munlockall_args { 1108 register_t dummy; 1109 }; 1110 #endif 1111 1112 int 1113 sys_munlockall(struct thread *td, struct munlockall_args *uap) 1114 { 1115 vm_map_t map; 1116 int error; 1117 1118 map = &td->td_proc->p_vmspace->vm_map; 1119 error = priv_check(td, PRIV_VM_MUNLOCK); 1120 if (error) 1121 return (error); 1122 1123 /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1124 vm_map_lock(map); 1125 vm_map_modflags(map, 0, MAP_WIREFUTURE); 1126 vm_map_unlock(map); 1127 1128 /* Forcibly unwire all pages. */ 1129 error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1130 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1131 #ifdef RACCT 1132 if (racct_enable && error == KERN_SUCCESS) { 1133 PROC_LOCK(td->td_proc); 1134 racct_set(td->td_proc, RACCT_MEMLOCK, 0); 1135 PROC_UNLOCK(td->td_proc); 1136 } 1137 #endif 1138 1139 return (error); 1140 } 1141 1142 #ifndef _SYS_SYSPROTO_H_ 1143 struct munlock_args { 1144 const void *addr; 1145 size_t len; 1146 }; 1147 #endif 1148 int 1149 sys_munlock(struct thread *td, struct munlock_args *uap) 1150 { 1151 1152 return (kern_munlock(td, (uintptr_t)uap->addr, uap->len)); 1153 } 1154 1155 int 1156 kern_munlock(struct thread *td, uintptr_t addr0, size_t size) 1157 { 1158 vm_offset_t addr, end, last, start; 1159 #ifdef RACCT 1160 vm_map_t map; 1161 #endif 1162 int error; 1163 1164 error = priv_check(td, PRIV_VM_MUNLOCK); 1165 if (error) 1166 return (error); 1167 addr = addr0; 1168 last = addr + size; 1169 start = trunc_page(addr); 1170 end = round_page(last); 1171 if (last < addr || end < addr) 1172 return (EINVAL); 1173 error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 1174 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1175 #ifdef RACCT 1176 if (racct_enable && error == KERN_SUCCESS) { 1177 PROC_LOCK(td->td_proc); 1178 map = &td->td_proc->p_vmspace->vm_map; 1179 racct_set(td->td_proc, RACCT_MEMLOCK, 1180 ptoa(pmap_wired_count(map->pmap))); 1181 PROC_UNLOCK(td->td_proc); 1182 } 1183 #endif 1184 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1185 } 1186 1187 /* 1188 * vm_mmap_vnode() 1189 * 1190 * Helper function for vm_mmap. Perform sanity check specific for mmap 1191 * operations on vnodes. 1192 */ 1193 int 1194 vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1195 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1196 struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 1197 boolean_t *writecounted) 1198 { 1199 struct vattr va; 1200 vm_object_t obj; 1201 vm_ooffset_t foff; 1202 struct ucred *cred; 1203 int error, flags, locktype; 1204 1205 cred = td->td_ucred; 1206 if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) 1207 locktype = LK_EXCLUSIVE; 1208 else 1209 locktype = LK_SHARED; 1210 if ((error = vget(vp, locktype, td)) != 0) 1211 return (error); 1212 AUDIT_ARG_VNODE1(vp); 1213 foff = *foffp; 1214 flags = *flagsp; 1215 obj = vp->v_object; 1216 if (vp->v_type == VREG) { 1217 /* 1218 * Get the proper underlying object 1219 */ 1220 if (obj == NULL) { 1221 error = EINVAL; 1222 goto done; 1223 } 1224 if (obj->type == OBJT_VNODE && obj->handle != vp) { 1225 vput(vp); 1226 vp = (struct vnode *)obj->handle; 1227 /* 1228 * Bypass filesystems obey the mpsafety of the 1229 * underlying fs. Tmpfs never bypasses. 1230 */ 1231 error = vget(vp, locktype, td); 1232 if (error != 0) 1233 return (error); 1234 } 1235 if (locktype == LK_EXCLUSIVE) { 1236 *writecounted = TRUE; 1237 vnode_pager_update_writecount(obj, 0, objsize); 1238 } 1239 } else { 1240 error = EINVAL; 1241 goto done; 1242 } 1243 if ((error = VOP_GETATTR(vp, &va, cred))) 1244 goto done; 1245 #ifdef MAC 1246 /* This relies on VM_PROT_* matching PROT_*. */ 1247 error = mac_vnode_check_mmap(cred, vp, (int)prot, flags); 1248 if (error != 0) 1249 goto done; 1250 #endif 1251 if ((flags & MAP_SHARED) != 0) { 1252 if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1253 if (prot & VM_PROT_WRITE) { 1254 error = EPERM; 1255 goto done; 1256 } 1257 *maxprotp &= ~VM_PROT_WRITE; 1258 } 1259 } 1260 /* 1261 * If it is a regular file without any references 1262 * we do not need to sync it. 1263 * Adjust object size to be the size of actual file. 1264 */ 1265 objsize = round_page(va.va_size); 1266 if (va.va_nlink == 0) 1267 flags |= MAP_NOSYNC; 1268 if (obj->type == OBJT_VNODE) { 1269 obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, 1270 cred); 1271 if (obj == NULL) { 1272 error = ENOMEM; 1273 goto done; 1274 } 1275 } else { 1276 KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP, 1277 ("wrong object type")); 1278 VM_OBJECT_WLOCK(obj); 1279 vm_object_reference_locked(obj); 1280 #if VM_NRESERVLEVEL > 0 1281 vm_object_color(obj, 0); 1282 #endif 1283 VM_OBJECT_WUNLOCK(obj); 1284 } 1285 *objp = obj; 1286 *flagsp = flags; 1287 1288 vfs_mark_atime(vp, cred); 1289 1290 done: 1291 if (error != 0 && *writecounted) { 1292 *writecounted = FALSE; 1293 vnode_pager_update_writecount(obj, objsize, 0); 1294 } 1295 vput(vp); 1296 return (error); 1297 } 1298 1299 /* 1300 * vm_mmap_cdev() 1301 * 1302 * Helper function for vm_mmap. Perform sanity check specific for mmap 1303 * operations on cdevs. 1304 */ 1305 int 1306 vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot, 1307 vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw, 1308 vm_ooffset_t *foff, vm_object_t *objp) 1309 { 1310 vm_object_t obj; 1311 int error, flags; 1312 1313 flags = *flagsp; 1314 1315 if (dsw->d_flags & D_MMAP_ANON) { 1316 *objp = NULL; 1317 *foff = 0; 1318 *maxprotp = VM_PROT_ALL; 1319 *flagsp |= MAP_ANON; 1320 return (0); 1321 } 1322 /* 1323 * cdevs do not provide private mappings of any kind. 1324 */ 1325 if ((*maxprotp & VM_PROT_WRITE) == 0 && 1326 (prot & VM_PROT_WRITE) != 0) 1327 return (EACCES); 1328 if (flags & (MAP_PRIVATE|MAP_COPY)) 1329 return (EINVAL); 1330 /* 1331 * Force device mappings to be shared. 1332 */ 1333 flags |= MAP_SHARED; 1334 #ifdef MAC_XXX 1335 error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot); 1336 if (error != 0) 1337 return (error); 1338 #endif 1339 /* 1340 * First, try d_mmap_single(). If that is not implemented 1341 * (returns ENODEV), fall back to using the device pager. 1342 * Note that d_mmap_single() must return a reference to the 1343 * object (it needs to bump the reference count of the object 1344 * it returns somehow). 1345 * 1346 * XXX assumes VM_PROT_* == PROT_* 1347 */ 1348 error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 1349 if (error != ENODEV) 1350 return (error); 1351 obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 1352 td->td_ucred); 1353 if (obj == NULL) 1354 return (EINVAL); 1355 *objp = obj; 1356 *flagsp = flags; 1357 return (0); 1358 } 1359 1360 /* 1361 * vm_mmap() 1362 * 1363 * Internal version of mmap used by exec, sys5 shared memory, and 1364 * various device drivers. Handle is either a vnode pointer, a 1365 * character device, or NULL for MAP_ANON. 1366 */ 1367 int 1368 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1369 vm_prot_t maxprot, int flags, 1370 objtype_t handle_type, void *handle, 1371 vm_ooffset_t foff) 1372 { 1373 vm_object_t object; 1374 struct thread *td = curthread; 1375 int error; 1376 boolean_t writecounted; 1377 1378 if (size == 0) 1379 return (EINVAL); 1380 1381 size = round_page(size); 1382 object = NULL; 1383 writecounted = FALSE; 1384 1385 /* 1386 * Lookup/allocate object. 1387 */ 1388 switch (handle_type) { 1389 case OBJT_DEVICE: { 1390 struct cdevsw *dsw; 1391 struct cdev *cdev; 1392 int ref; 1393 1394 cdev = handle; 1395 dsw = dev_refthread(cdev, &ref); 1396 if (dsw == NULL) 1397 return (ENXIO); 1398 error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev, 1399 dsw, &foff, &object); 1400 dev_relthread(cdev, ref); 1401 break; 1402 } 1403 case OBJT_VNODE: 1404 error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 1405 handle, &foff, &object, &writecounted); 1406 break; 1407 case OBJT_DEFAULT: 1408 if (handle == NULL) { 1409 error = 0; 1410 break; 1411 } 1412 /* FALLTHROUGH */ 1413 default: 1414 error = EINVAL; 1415 break; 1416 } 1417 if (error) 1418 return (error); 1419 1420 error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 1421 foff, writecounted, td); 1422 if (error != 0 && object != NULL) { 1423 /* 1424 * If this mapping was accounted for in the vnode's 1425 * writecount, then undo that now. 1426 */ 1427 if (writecounted) 1428 vnode_pager_release_writecount(object, 0, size); 1429 vm_object_deallocate(object); 1430 } 1431 return (error); 1432 } 1433 1434 /* 1435 * Internal version of mmap that maps a specific VM object into an 1436 * map. Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap. 1437 */ 1438 int 1439 vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1440 vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff, 1441 boolean_t writecounted, struct thread *td) 1442 { 1443 boolean_t curmap, fitit; 1444 vm_offset_t max_addr; 1445 int docow, error, findspace, rv; 1446 1447 curmap = map == &td->td_proc->p_vmspace->vm_map; 1448 if (curmap) { 1449 PROC_LOCK(td->td_proc); 1450 if (map->size + size > lim_cur_proc(td->td_proc, RLIMIT_VMEM)) { 1451 PROC_UNLOCK(td->td_proc); 1452 return (ENOMEM); 1453 } 1454 if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 1455 PROC_UNLOCK(td->td_proc); 1456 return (ENOMEM); 1457 } 1458 if (!old_mlock && map->flags & MAP_WIREFUTURE) { 1459 if (ptoa(pmap_wired_count(map->pmap)) + size > 1460 lim_cur_proc(td->td_proc, RLIMIT_MEMLOCK)) { 1461 racct_set_force(td->td_proc, RACCT_VMEM, 1462 map->size); 1463 PROC_UNLOCK(td->td_proc); 1464 return (ENOMEM); 1465 } 1466 error = racct_set(td->td_proc, RACCT_MEMLOCK, 1467 ptoa(pmap_wired_count(map->pmap)) + size); 1468 if (error != 0) { 1469 racct_set_force(td->td_proc, RACCT_VMEM, 1470 map->size); 1471 PROC_UNLOCK(td->td_proc); 1472 return (error); 1473 } 1474 } 1475 PROC_UNLOCK(td->td_proc); 1476 } 1477 1478 /* 1479 * We currently can only deal with page aligned file offsets. 1480 * The mmap() system call already enforces this by subtracting 1481 * the page offset from the file offset, but checking here 1482 * catches errors in device drivers (e.g. d_single_mmap() 1483 * callbacks) and other internal mapping requests (such as in 1484 * exec). 1485 */ 1486 if (foff & PAGE_MASK) 1487 return (EINVAL); 1488 1489 if ((flags & MAP_FIXED) == 0) { 1490 fitit = TRUE; 1491 *addr = round_page(*addr); 1492 } else { 1493 if (*addr != trunc_page(*addr)) 1494 return (EINVAL); 1495 fitit = FALSE; 1496 } 1497 1498 if (flags & MAP_ANON) { 1499 if (object != NULL || foff != 0) 1500 return (EINVAL); 1501 docow = 0; 1502 } else if (flags & MAP_PREFAULT_READ) 1503 docow = MAP_PREFAULT; 1504 else 1505 docow = MAP_PREFAULT_PARTIAL; 1506 1507 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 1508 docow |= MAP_COPY_ON_WRITE; 1509 if (flags & MAP_NOSYNC) 1510 docow |= MAP_DISABLE_SYNCER; 1511 if (flags & MAP_NOCORE) 1512 docow |= MAP_DISABLE_COREDUMP; 1513 /* Shared memory is also shared with children. */ 1514 if (flags & MAP_SHARED) 1515 docow |= MAP_INHERIT_SHARE; 1516 if (writecounted) 1517 docow |= MAP_VN_WRITECOUNT; 1518 if (flags & MAP_STACK) { 1519 if (object != NULL) 1520 return (EINVAL); 1521 docow |= MAP_STACK_GROWS_DOWN; 1522 } 1523 if ((flags & MAP_EXCL) != 0) 1524 docow |= MAP_CHECK_EXCL; 1525 if ((flags & MAP_GUARD) != 0) 1526 docow |= MAP_CREATE_GUARD; 1527 1528 if (fitit) { 1529 if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER) 1530 findspace = VMFS_SUPER_SPACE; 1531 else if ((flags & MAP_ALIGNMENT_MASK) != 0) 1532 findspace = VMFS_ALIGNED_SPACE(flags >> 1533 MAP_ALIGNMENT_SHIFT); 1534 else 1535 findspace = VMFS_OPTIMAL_SPACE; 1536 max_addr = 0; 1537 #ifdef MAP_32BIT 1538 if ((flags & MAP_32BIT) != 0) 1539 max_addr = MAP_32BIT_MAX_ADDR; 1540 #endif 1541 if (curmap) { 1542 rv = vm_map_find_min(map, object, foff, addr, size, 1543 round_page((vm_offset_t)td->td_proc->p_vmspace-> 1544 vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr, 1545 findspace, prot, maxprot, docow); 1546 } else { 1547 rv = vm_map_find(map, object, foff, addr, size, 1548 max_addr, findspace, prot, maxprot, docow); 1549 } 1550 } else { 1551 rv = vm_map_fixed(map, object, foff, *addr, size, 1552 prot, maxprot, docow); 1553 } 1554 1555 if (rv == KERN_SUCCESS) { 1556 /* 1557 * If the process has requested that all future mappings 1558 * be wired, then heed this. 1559 */ 1560 if (map->flags & MAP_WIREFUTURE) { 1561 vm_map_wire(map, *addr, *addr + size, 1562 VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? 1563 VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); 1564 } 1565 } 1566 return (vm_mmap_to_errno(rv)); 1567 } 1568 1569 /* 1570 * Translate a Mach VM return code to zero on success or the appropriate errno 1571 * on failure. 1572 */ 1573 int 1574 vm_mmap_to_errno(int rv) 1575 { 1576 1577 switch (rv) { 1578 case KERN_SUCCESS: 1579 return (0); 1580 case KERN_INVALID_ADDRESS: 1581 case KERN_NO_SPACE: 1582 return (ENOMEM); 1583 case KERN_PROTECTION_FAILURE: 1584 return (EACCES); 1585 default: 1586 return (EINVAL); 1587 } 1588 } 1589