1 /*- 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35 * 36 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37 */ 38 39 /* 40 * Mapped file (mmap) interface to VM 41 */ 42 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include "opt_compat.h" 47 #include "opt_hwpmc_hooks.h" 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/capability.h> 52 #include <sys/kernel.h> 53 #include <sys/lock.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/filedesc.h> 57 #include <sys/priv.h> 58 #include <sys/proc.h> 59 #include <sys/racct.h> 60 #include <sys/resource.h> 61 #include <sys/resourcevar.h> 62 #include <sys/vnode.h> 63 #include <sys/fcntl.h> 64 #include <sys/file.h> 65 #include <sys/mman.h> 66 #include <sys/mount.h> 67 #include <sys/conf.h> 68 #include <sys/stat.h> 69 #include <sys/sysent.h> 70 #include <sys/vmmeter.h> 71 72 #include <security/mac/mac_framework.h> 73 74 #include <vm/vm.h> 75 #include <vm/vm_param.h> 76 #include <vm/pmap.h> 77 #include <vm/vm_map.h> 78 #include <vm/vm_object.h> 79 #include <vm/vm_page.h> 80 #include <vm/vm_pager.h> 81 #include <vm/vm_pageout.h> 82 #include <vm/vm_extern.h> 83 #include <vm/vm_page.h> 84 #include <vm/vnode_pager.h> 85 86 #ifdef HWPMC_HOOKS 87 #include <sys/pmckern.h> 88 #endif 89 90 #ifndef _SYS_SYSPROTO_H_ 91 struct sbrk_args { 92 int incr; 93 }; 94 #endif 95 96 static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 97 int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *); 98 static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 99 int *, struct cdev *, vm_ooffset_t *, vm_object_t *); 100 static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 101 int *, struct shmfd *, vm_ooffset_t, vm_object_t *); 102 103 /* 104 * MPSAFE 105 */ 106 /* ARGSUSED */ 107 int 108 sys_sbrk(td, uap) 109 struct thread *td; 110 struct sbrk_args *uap; 111 { 112 /* Not yet implemented */ 113 return (EOPNOTSUPP); 114 } 115 116 #ifndef _SYS_SYSPROTO_H_ 117 struct sstk_args { 118 int incr; 119 }; 120 #endif 121 122 /* 123 * MPSAFE 124 */ 125 /* ARGSUSED */ 126 int 127 sys_sstk(td, uap) 128 struct thread *td; 129 struct sstk_args *uap; 130 { 131 /* Not yet implemented */ 132 return (EOPNOTSUPP); 133 } 134 135 #if defined(COMPAT_43) 136 #ifndef _SYS_SYSPROTO_H_ 137 struct getpagesize_args { 138 int dummy; 139 }; 140 #endif 141 142 int 143 ogetpagesize(td, uap) 144 struct thread *td; 145 struct getpagesize_args *uap; 146 { 147 /* MP SAFE */ 148 td->td_retval[0] = PAGE_SIZE; 149 return (0); 150 } 151 #endif /* COMPAT_43 */ 152 153 154 /* 155 * Memory Map (mmap) system call. Note that the file offset 156 * and address are allowed to be NOT page aligned, though if 157 * the MAP_FIXED flag it set, both must have the same remainder 158 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 159 * page-aligned, the actual mapping starts at trunc_page(addr) 160 * and the return value is adjusted up by the page offset. 161 * 162 * Generally speaking, only character devices which are themselves 163 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 164 * there would be no cache coherency between a descriptor and a VM mapping 165 * both to the same character device. 166 */ 167 #ifndef _SYS_SYSPROTO_H_ 168 struct mmap_args { 169 void *addr; 170 size_t len; 171 int prot; 172 int flags; 173 int fd; 174 long pad; 175 off_t pos; 176 }; 177 #endif 178 179 /* 180 * MPSAFE 181 */ 182 int 183 sys_mmap(td, uap) 184 struct thread *td; 185 struct mmap_args *uap; 186 { 187 #ifdef HWPMC_HOOKS 188 struct pmckern_map_in pkm; 189 #endif 190 struct file *fp; 191 struct vnode *vp; 192 vm_offset_t addr; 193 vm_size_t size, pageoff; 194 vm_prot_t cap_maxprot, prot, maxprot; 195 void *handle; 196 objtype_t handle_type; 197 int flags, error; 198 off_t pos; 199 struct vmspace *vms = td->td_proc->p_vmspace; 200 cap_rights_t rights; 201 202 addr = (vm_offset_t) uap->addr; 203 size = uap->len; 204 prot = uap->prot & VM_PROT_ALL; 205 flags = uap->flags; 206 pos = uap->pos; 207 208 fp = NULL; 209 210 /* Make sure mapping fits into numeric range, etc. */ 211 if ((uap->len == 0 && !SV_CURPROC_FLAG(SV_AOUT) && 212 curproc->p_osrel >= P_OSREL_MAP_ANON) || 213 ((flags & MAP_ANON) && (uap->fd != -1 || pos != 0))) 214 return (EINVAL); 215 216 if (flags & MAP_STACK) { 217 if ((uap->fd != -1) || 218 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 219 return (EINVAL); 220 flags |= MAP_ANON; 221 pos = 0; 222 } 223 224 /* 225 * Align the file position to a page boundary, 226 * and save its page offset component. 227 */ 228 pageoff = (pos & PAGE_MASK); 229 pos -= pageoff; 230 231 /* Adjust size for rounding (on both ends). */ 232 size += pageoff; /* low end... */ 233 size = (vm_size_t) round_page(size); /* hi end */ 234 235 /* 236 * Check for illegal addresses. Watch out for address wrap... Note 237 * that VM_*_ADDRESS are not constants due to casts (argh). 238 */ 239 if (flags & MAP_FIXED) { 240 /* 241 * The specified address must have the same remainder 242 * as the file offset taken modulo PAGE_SIZE, so it 243 * should be aligned after adjustment by pageoff. 244 */ 245 addr -= pageoff; 246 if (addr & PAGE_MASK) 247 return (EINVAL); 248 249 /* Address range must be all in user VM space. */ 250 if (addr < vm_map_min(&vms->vm_map) || 251 addr + size > vm_map_max(&vms->vm_map)) 252 return (EINVAL); 253 if (addr + size < addr) 254 return (EINVAL); 255 } else { 256 /* 257 * XXX for non-fixed mappings where no hint is provided or 258 * the hint would fall in the potential heap space, 259 * place it after the end of the largest possible heap. 260 * 261 * There should really be a pmap call to determine a reasonable 262 * location. 263 */ 264 PROC_LOCK(td->td_proc); 265 if (addr == 0 || 266 (addr >= round_page((vm_offset_t)vms->vm_taddr) && 267 addr < round_page((vm_offset_t)vms->vm_daddr + 268 lim_max(td->td_proc, RLIMIT_DATA)))) 269 addr = round_page((vm_offset_t)vms->vm_daddr + 270 lim_max(td->td_proc, RLIMIT_DATA)); 271 PROC_UNLOCK(td->td_proc); 272 } 273 if (flags & MAP_ANON) { 274 /* 275 * Mapping blank space is trivial. 276 */ 277 handle = NULL; 278 handle_type = OBJT_DEFAULT; 279 maxprot = VM_PROT_ALL; 280 cap_maxprot = VM_PROT_ALL; 281 } else { 282 /* 283 * Mapping file, get fp for validation and don't let the 284 * descriptor disappear on us if we block. Check capability 285 * rights, but also return the maximum rights to be combined 286 * with maxprot later. 287 */ 288 rights = CAP_MMAP; 289 if (prot & PROT_READ) 290 rights |= CAP_READ; 291 if ((flags & MAP_SHARED) != 0) { 292 if (prot & PROT_WRITE) 293 rights |= CAP_WRITE; 294 } 295 if (prot & PROT_EXEC) 296 rights |= CAP_MAPEXEC; 297 if ((error = fget_mmap(td, uap->fd, rights, &cap_maxprot, 298 &fp)) != 0) 299 goto done; 300 if (fp->f_type == DTYPE_SHM) { 301 handle = fp->f_data; 302 handle_type = OBJT_SWAP; 303 maxprot = VM_PROT_NONE; 304 305 /* FREAD should always be set. */ 306 if (fp->f_flag & FREAD) 307 maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; 308 if (fp->f_flag & FWRITE) 309 maxprot |= VM_PROT_WRITE; 310 goto map; 311 } 312 if (fp->f_type != DTYPE_VNODE) { 313 error = ENODEV; 314 goto done; 315 } 316 #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ 317 defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) 318 /* 319 * POSIX shared-memory objects are defined to have 320 * kernel persistence, and are not defined to support 321 * read(2)/write(2) -- or even open(2). Thus, we can 322 * use MAP_ASYNC to trade on-disk coherence for speed. 323 * The shm_open(3) library routine turns on the FPOSIXSHM 324 * flag to request this behavior. 325 */ 326 if (fp->f_flag & FPOSIXSHM) 327 flags |= MAP_NOSYNC; 328 #endif 329 vp = fp->f_vnode; 330 /* 331 * Ensure that file and memory protections are 332 * compatible. Note that we only worry about 333 * writability if mapping is shared; in this case, 334 * current and max prot are dictated by the open file. 335 * XXX use the vnode instead? Problem is: what 336 * credentials do we use for determination? What if 337 * proc does a setuid? 338 */ 339 if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC) 340 maxprot = VM_PROT_NONE; 341 else 342 maxprot = VM_PROT_EXECUTE; 343 if (fp->f_flag & FREAD) { 344 maxprot |= VM_PROT_READ; 345 } else if (prot & PROT_READ) { 346 error = EACCES; 347 goto done; 348 } 349 /* 350 * If we are sharing potential changes (either via 351 * MAP_SHARED or via the implicit sharing of character 352 * device mappings), and we are trying to get write 353 * permission although we opened it without asking 354 * for it, bail out. 355 */ 356 if ((flags & MAP_SHARED) != 0) { 357 if ((fp->f_flag & FWRITE) != 0) { 358 maxprot |= VM_PROT_WRITE; 359 } else if ((prot & PROT_WRITE) != 0) { 360 error = EACCES; 361 goto done; 362 } 363 } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) { 364 maxprot |= VM_PROT_WRITE; 365 cap_maxprot |= VM_PROT_WRITE; 366 } 367 handle = (void *)vp; 368 handle_type = OBJT_VNODE; 369 } 370 map: 371 td->td_fpop = fp; 372 maxprot &= cap_maxprot; 373 error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, 374 flags, handle_type, handle, pos); 375 td->td_fpop = NULL; 376 #ifdef HWPMC_HOOKS 377 /* inform hwpmc(4) if an executable is being mapped */ 378 if (error == 0 && handle_type == OBJT_VNODE && 379 (prot & PROT_EXEC)) { 380 pkm.pm_file = handle; 381 pkm.pm_address = (uintptr_t) addr; 382 PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); 383 } 384 #endif 385 if (error == 0) 386 td->td_retval[0] = (register_t) (addr + pageoff); 387 done: 388 if (fp) 389 fdrop(fp, td); 390 391 return (error); 392 } 393 394 int 395 freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 396 { 397 struct mmap_args oargs; 398 399 oargs.addr = uap->addr; 400 oargs.len = uap->len; 401 oargs.prot = uap->prot; 402 oargs.flags = uap->flags; 403 oargs.fd = uap->fd; 404 oargs.pos = uap->pos; 405 return (sys_mmap(td, &oargs)); 406 } 407 408 #ifdef COMPAT_43 409 #ifndef _SYS_SYSPROTO_H_ 410 struct ommap_args { 411 caddr_t addr; 412 int len; 413 int prot; 414 int flags; 415 int fd; 416 long pos; 417 }; 418 #endif 419 int 420 ommap(td, uap) 421 struct thread *td; 422 struct ommap_args *uap; 423 { 424 struct mmap_args nargs; 425 static const char cvtbsdprot[8] = { 426 0, 427 PROT_EXEC, 428 PROT_WRITE, 429 PROT_EXEC | PROT_WRITE, 430 PROT_READ, 431 PROT_EXEC | PROT_READ, 432 PROT_WRITE | PROT_READ, 433 PROT_EXEC | PROT_WRITE | PROT_READ, 434 }; 435 436 #define OMAP_ANON 0x0002 437 #define OMAP_COPY 0x0020 438 #define OMAP_SHARED 0x0010 439 #define OMAP_FIXED 0x0100 440 441 nargs.addr = uap->addr; 442 nargs.len = uap->len; 443 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 444 nargs.flags = 0; 445 if (uap->flags & OMAP_ANON) 446 nargs.flags |= MAP_ANON; 447 if (uap->flags & OMAP_COPY) 448 nargs.flags |= MAP_COPY; 449 if (uap->flags & OMAP_SHARED) 450 nargs.flags |= MAP_SHARED; 451 else 452 nargs.flags |= MAP_PRIVATE; 453 if (uap->flags & OMAP_FIXED) 454 nargs.flags |= MAP_FIXED; 455 nargs.fd = uap->fd; 456 nargs.pos = uap->pos; 457 return (sys_mmap(td, &nargs)); 458 } 459 #endif /* COMPAT_43 */ 460 461 462 #ifndef _SYS_SYSPROTO_H_ 463 struct msync_args { 464 void *addr; 465 size_t len; 466 int flags; 467 }; 468 #endif 469 /* 470 * MPSAFE 471 */ 472 int 473 sys_msync(td, uap) 474 struct thread *td; 475 struct msync_args *uap; 476 { 477 vm_offset_t addr; 478 vm_size_t size, pageoff; 479 int flags; 480 vm_map_t map; 481 int rv; 482 483 addr = (vm_offset_t) uap->addr; 484 size = uap->len; 485 flags = uap->flags; 486 487 pageoff = (addr & PAGE_MASK); 488 addr -= pageoff; 489 size += pageoff; 490 size = (vm_size_t) round_page(size); 491 if (addr + size < addr) 492 return (EINVAL); 493 494 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 495 return (EINVAL); 496 497 map = &td->td_proc->p_vmspace->vm_map; 498 499 /* 500 * Clean the pages and interpret the return value. 501 */ 502 rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 503 (flags & MS_INVALIDATE) != 0); 504 switch (rv) { 505 case KERN_SUCCESS: 506 return (0); 507 case KERN_INVALID_ADDRESS: 508 return (EINVAL); /* Sun returns ENOMEM? */ 509 case KERN_INVALID_ARGUMENT: 510 return (EBUSY); 511 default: 512 return (EINVAL); 513 } 514 } 515 516 #ifndef _SYS_SYSPROTO_H_ 517 struct munmap_args { 518 void *addr; 519 size_t len; 520 }; 521 #endif 522 /* 523 * MPSAFE 524 */ 525 int 526 sys_munmap(td, uap) 527 struct thread *td; 528 struct munmap_args *uap; 529 { 530 #ifdef HWPMC_HOOKS 531 struct pmckern_map_out pkm; 532 vm_map_entry_t entry; 533 #endif 534 vm_offset_t addr; 535 vm_size_t size, pageoff; 536 vm_map_t map; 537 538 addr = (vm_offset_t) uap->addr; 539 size = uap->len; 540 if (size == 0) 541 return (EINVAL); 542 543 pageoff = (addr & PAGE_MASK); 544 addr -= pageoff; 545 size += pageoff; 546 size = (vm_size_t) round_page(size); 547 if (addr + size < addr) 548 return (EINVAL); 549 550 /* 551 * Check for illegal addresses. Watch out for address wrap... 552 */ 553 map = &td->td_proc->p_vmspace->vm_map; 554 if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 555 return (EINVAL); 556 vm_map_lock(map); 557 #ifdef HWPMC_HOOKS 558 /* 559 * Inform hwpmc if the address range being unmapped contains 560 * an executable region. 561 */ 562 pkm.pm_address = (uintptr_t) NULL; 563 if (vm_map_lookup_entry(map, addr, &entry)) { 564 for (; 565 entry != &map->header && entry->start < addr + size; 566 entry = entry->next) { 567 if (vm_map_check_protection(map, entry->start, 568 entry->end, VM_PROT_EXECUTE) == TRUE) { 569 pkm.pm_address = (uintptr_t) addr; 570 pkm.pm_size = (size_t) size; 571 break; 572 } 573 } 574 } 575 #endif 576 vm_map_delete(map, addr, addr + size); 577 578 #ifdef HWPMC_HOOKS 579 /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 580 vm_map_lock_downgrade(map); 581 if (pkm.pm_address != (uintptr_t) NULL) 582 PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 583 vm_map_unlock_read(map); 584 #else 585 vm_map_unlock(map); 586 #endif 587 /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 588 return (0); 589 } 590 591 #ifndef _SYS_SYSPROTO_H_ 592 struct mprotect_args { 593 const void *addr; 594 size_t len; 595 int prot; 596 }; 597 #endif 598 /* 599 * MPSAFE 600 */ 601 int 602 sys_mprotect(td, uap) 603 struct thread *td; 604 struct mprotect_args *uap; 605 { 606 vm_offset_t addr; 607 vm_size_t size, pageoff; 608 vm_prot_t prot; 609 610 addr = (vm_offset_t) uap->addr; 611 size = uap->len; 612 prot = uap->prot & VM_PROT_ALL; 613 614 pageoff = (addr & PAGE_MASK); 615 addr -= pageoff; 616 size += pageoff; 617 size = (vm_size_t) round_page(size); 618 if (addr + size < addr) 619 return (EINVAL); 620 621 switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 622 addr + size, prot, FALSE)) { 623 case KERN_SUCCESS: 624 return (0); 625 case KERN_PROTECTION_FAILURE: 626 return (EACCES); 627 case KERN_RESOURCE_SHORTAGE: 628 return (ENOMEM); 629 } 630 return (EINVAL); 631 } 632 633 #ifndef _SYS_SYSPROTO_H_ 634 struct minherit_args { 635 void *addr; 636 size_t len; 637 int inherit; 638 }; 639 #endif 640 /* 641 * MPSAFE 642 */ 643 int 644 sys_minherit(td, uap) 645 struct thread *td; 646 struct minherit_args *uap; 647 { 648 vm_offset_t addr; 649 vm_size_t size, pageoff; 650 vm_inherit_t inherit; 651 652 addr = (vm_offset_t)uap->addr; 653 size = uap->len; 654 inherit = uap->inherit; 655 656 pageoff = (addr & PAGE_MASK); 657 addr -= pageoff; 658 size += pageoff; 659 size = (vm_size_t) round_page(size); 660 if (addr + size < addr) 661 return (EINVAL); 662 663 switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 664 addr + size, inherit)) { 665 case KERN_SUCCESS: 666 return (0); 667 case KERN_PROTECTION_FAILURE: 668 return (EACCES); 669 } 670 return (EINVAL); 671 } 672 673 #ifndef _SYS_SYSPROTO_H_ 674 struct madvise_args { 675 void *addr; 676 size_t len; 677 int behav; 678 }; 679 #endif 680 681 /* 682 * MPSAFE 683 */ 684 int 685 sys_madvise(td, uap) 686 struct thread *td; 687 struct madvise_args *uap; 688 { 689 vm_offset_t start, end; 690 vm_map_t map; 691 struct proc *p; 692 int error; 693 694 /* 695 * Check for our special case, advising the swap pager we are 696 * "immortal." 697 */ 698 if (uap->behav == MADV_PROTECT) { 699 error = priv_check(td, PRIV_VM_MADV_PROTECT); 700 if (error == 0) { 701 p = td->td_proc; 702 PROC_LOCK(p); 703 p->p_flag |= P_PROTECTED; 704 PROC_UNLOCK(p); 705 } 706 return (error); 707 } 708 /* 709 * Check for illegal behavior 710 */ 711 if (uap->behav < 0 || uap->behav > MADV_CORE) 712 return (EINVAL); 713 /* 714 * Check for illegal addresses. Watch out for address wrap... Note 715 * that VM_*_ADDRESS are not constants due to casts (argh). 716 */ 717 map = &td->td_proc->p_vmspace->vm_map; 718 if ((vm_offset_t)uap->addr < vm_map_min(map) || 719 (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) 720 return (EINVAL); 721 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 722 return (EINVAL); 723 724 /* 725 * Since this routine is only advisory, we default to conservative 726 * behavior. 727 */ 728 start = trunc_page((vm_offset_t) uap->addr); 729 end = round_page((vm_offset_t) uap->addr + uap->len); 730 731 if (vm_map_madvise(map, start, end, uap->behav)) 732 return (EINVAL); 733 return (0); 734 } 735 736 #ifndef _SYS_SYSPROTO_H_ 737 struct mincore_args { 738 const void *addr; 739 size_t len; 740 char *vec; 741 }; 742 #endif 743 744 /* 745 * MPSAFE 746 */ 747 int 748 sys_mincore(td, uap) 749 struct thread *td; 750 struct mincore_args *uap; 751 { 752 vm_offset_t addr, first_addr; 753 vm_offset_t end, cend; 754 pmap_t pmap; 755 vm_map_t map; 756 char *vec; 757 int error = 0; 758 int vecindex, lastvecindex; 759 vm_map_entry_t current; 760 vm_map_entry_t entry; 761 vm_object_t object; 762 vm_paddr_t locked_pa; 763 vm_page_t m; 764 vm_pindex_t pindex; 765 int mincoreinfo; 766 unsigned int timestamp; 767 boolean_t locked; 768 769 /* 770 * Make sure that the addresses presented are valid for user 771 * mode. 772 */ 773 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 774 end = addr + (vm_size_t)round_page(uap->len); 775 map = &td->td_proc->p_vmspace->vm_map; 776 if (end > vm_map_max(map) || end < addr) 777 return (ENOMEM); 778 779 /* 780 * Address of byte vector 781 */ 782 vec = uap->vec; 783 784 pmap = vmspace_pmap(td->td_proc->p_vmspace); 785 786 vm_map_lock_read(map); 787 RestartScan: 788 timestamp = map->timestamp; 789 790 if (!vm_map_lookup_entry(map, addr, &entry)) { 791 vm_map_unlock_read(map); 792 return (ENOMEM); 793 } 794 795 /* 796 * Do this on a map entry basis so that if the pages are not 797 * in the current processes address space, we can easily look 798 * up the pages elsewhere. 799 */ 800 lastvecindex = -1; 801 for (current = entry; 802 (current != &map->header) && (current->start < end); 803 current = current->next) { 804 805 /* 806 * check for contiguity 807 */ 808 if (current->end < end && 809 (entry->next == &map->header || 810 current->next->start > current->end)) { 811 vm_map_unlock_read(map); 812 return (ENOMEM); 813 } 814 815 /* 816 * ignore submaps (for now) or null objects 817 */ 818 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 819 current->object.vm_object == NULL) 820 continue; 821 822 /* 823 * limit this scan to the current map entry and the 824 * limits for the mincore call 825 */ 826 if (addr < current->start) 827 addr = current->start; 828 cend = current->end; 829 if (cend > end) 830 cend = end; 831 832 /* 833 * scan this entry one page at a time 834 */ 835 while (addr < cend) { 836 /* 837 * Check pmap first, it is likely faster, also 838 * it can provide info as to whether we are the 839 * one referencing or modifying the page. 840 */ 841 object = NULL; 842 locked_pa = 0; 843 retry: 844 m = NULL; 845 mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 846 if (locked_pa != 0) { 847 /* 848 * The page is mapped by this process but not 849 * both accessed and modified. It is also 850 * managed. Acquire the object lock so that 851 * other mappings might be examined. 852 */ 853 m = PHYS_TO_VM_PAGE(locked_pa); 854 if (m->object != object) { 855 if (object != NULL) 856 VM_OBJECT_UNLOCK(object); 857 object = m->object; 858 locked = VM_OBJECT_TRYLOCK(object); 859 vm_page_unlock(m); 860 if (!locked) { 861 VM_OBJECT_LOCK(object); 862 vm_page_lock(m); 863 goto retry; 864 } 865 } else 866 vm_page_unlock(m); 867 KASSERT(m->valid == VM_PAGE_BITS_ALL, 868 ("mincore: page %p is mapped but invalid", 869 m)); 870 } else if (mincoreinfo == 0) { 871 /* 872 * The page is not mapped by this process. If 873 * the object implements managed pages, then 874 * determine if the page is resident so that 875 * the mappings might be examined. 876 */ 877 if (current->object.vm_object != object) { 878 if (object != NULL) 879 VM_OBJECT_UNLOCK(object); 880 object = current->object.vm_object; 881 VM_OBJECT_LOCK(object); 882 } 883 if (object->type == OBJT_DEFAULT || 884 object->type == OBJT_SWAP || 885 object->type == OBJT_VNODE) { 886 pindex = OFF_TO_IDX(current->offset + 887 (addr - current->start)); 888 m = vm_page_lookup(object, pindex); 889 if (m != NULL && m->valid == 0) 890 m = NULL; 891 if (m != NULL) 892 mincoreinfo = MINCORE_INCORE; 893 } 894 } 895 if (m != NULL) { 896 /* Examine other mappings to the page. */ 897 if (m->dirty == 0 && pmap_is_modified(m)) 898 vm_page_dirty(m); 899 if (m->dirty != 0) 900 mincoreinfo |= MINCORE_MODIFIED_OTHER; 901 /* 902 * The first test for PGA_REFERENCED is an 903 * optimization. The second test is 904 * required because a concurrent pmap 905 * operation could clear the last reference 906 * and set PGA_REFERENCED before the call to 907 * pmap_is_referenced(). 908 */ 909 if ((m->aflags & PGA_REFERENCED) != 0 || 910 pmap_is_referenced(m) || 911 (m->aflags & PGA_REFERENCED) != 0) 912 mincoreinfo |= MINCORE_REFERENCED_OTHER; 913 } 914 if (object != NULL) 915 VM_OBJECT_UNLOCK(object); 916 917 /* 918 * subyte may page fault. In case it needs to modify 919 * the map, we release the lock. 920 */ 921 vm_map_unlock_read(map); 922 923 /* 924 * calculate index into user supplied byte vector 925 */ 926 vecindex = OFF_TO_IDX(addr - first_addr); 927 928 /* 929 * If we have skipped map entries, we need to make sure that 930 * the byte vector is zeroed for those skipped entries. 931 */ 932 while ((lastvecindex + 1) < vecindex) { 933 error = subyte(vec + lastvecindex, 0); 934 if (error) { 935 error = EFAULT; 936 goto done2; 937 } 938 ++lastvecindex; 939 } 940 941 /* 942 * Pass the page information to the user 943 */ 944 error = subyte(vec + vecindex, mincoreinfo); 945 if (error) { 946 error = EFAULT; 947 goto done2; 948 } 949 950 /* 951 * If the map has changed, due to the subyte, the previous 952 * output may be invalid. 953 */ 954 vm_map_lock_read(map); 955 if (timestamp != map->timestamp) 956 goto RestartScan; 957 958 lastvecindex = vecindex; 959 addr += PAGE_SIZE; 960 } 961 } 962 963 /* 964 * subyte may page fault. In case it needs to modify 965 * the map, we release the lock. 966 */ 967 vm_map_unlock_read(map); 968 969 /* 970 * Zero the last entries in the byte vector. 971 */ 972 vecindex = OFF_TO_IDX(end - first_addr); 973 while ((lastvecindex + 1) < vecindex) { 974 error = subyte(vec + lastvecindex, 0); 975 if (error) { 976 error = EFAULT; 977 goto done2; 978 } 979 ++lastvecindex; 980 } 981 982 /* 983 * If the map has changed, due to the subyte, the previous 984 * output may be invalid. 985 */ 986 vm_map_lock_read(map); 987 if (timestamp != map->timestamp) 988 goto RestartScan; 989 vm_map_unlock_read(map); 990 done2: 991 return (error); 992 } 993 994 #ifndef _SYS_SYSPROTO_H_ 995 struct mlock_args { 996 const void *addr; 997 size_t len; 998 }; 999 #endif 1000 /* 1001 * MPSAFE 1002 */ 1003 int 1004 sys_mlock(td, uap) 1005 struct thread *td; 1006 struct mlock_args *uap; 1007 { 1008 struct proc *proc; 1009 vm_offset_t addr, end, last, start; 1010 vm_size_t npages, size; 1011 unsigned long nsize; 1012 int error; 1013 1014 error = priv_check(td, PRIV_VM_MLOCK); 1015 if (error) 1016 return (error); 1017 addr = (vm_offset_t)uap->addr; 1018 size = uap->len; 1019 last = addr + size; 1020 start = trunc_page(addr); 1021 end = round_page(last); 1022 if (last < addr || end < addr) 1023 return (EINVAL); 1024 npages = atop(end - start); 1025 if (npages > vm_page_max_wired) 1026 return (ENOMEM); 1027 proc = td->td_proc; 1028 PROC_LOCK(proc); 1029 nsize = ptoa(npages + 1030 pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map))); 1031 if (nsize > lim_cur(proc, RLIMIT_MEMLOCK)) { 1032 PROC_UNLOCK(proc); 1033 return (ENOMEM); 1034 } 1035 PROC_UNLOCK(proc); 1036 if (npages + cnt.v_wire_count > vm_page_max_wired) 1037 return (EAGAIN); 1038 #ifdef RACCT 1039 PROC_LOCK(proc); 1040 error = racct_set(proc, RACCT_MEMLOCK, nsize); 1041 PROC_UNLOCK(proc); 1042 if (error != 0) 1043 return (ENOMEM); 1044 #endif 1045 error = vm_map_wire(&proc->p_vmspace->vm_map, start, end, 1046 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1047 #ifdef RACCT 1048 if (error != KERN_SUCCESS) { 1049 PROC_LOCK(proc); 1050 racct_set(proc, RACCT_MEMLOCK, 1051 ptoa(pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map)))); 1052 PROC_UNLOCK(proc); 1053 } 1054 #endif 1055 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1056 } 1057 1058 #ifndef _SYS_SYSPROTO_H_ 1059 struct mlockall_args { 1060 int how; 1061 }; 1062 #endif 1063 1064 /* 1065 * MPSAFE 1066 */ 1067 int 1068 sys_mlockall(td, uap) 1069 struct thread *td; 1070 struct mlockall_args *uap; 1071 { 1072 vm_map_t map; 1073 int error; 1074 1075 map = &td->td_proc->p_vmspace->vm_map; 1076 error = 0; 1077 1078 if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1079 return (EINVAL); 1080 1081 #if 0 1082 /* 1083 * If wiring all pages in the process would cause it to exceed 1084 * a hard resource limit, return ENOMEM. 1085 */ 1086 PROC_LOCK(td->td_proc); 1087 if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { 1088 PROC_UNLOCK(td->td_proc); 1089 return (ENOMEM); 1090 } 1091 PROC_UNLOCK(td->td_proc); 1092 #else 1093 error = priv_check(td, PRIV_VM_MLOCK); 1094 if (error) 1095 return (error); 1096 #endif 1097 #ifdef RACCT 1098 PROC_LOCK(td->td_proc); 1099 error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 1100 PROC_UNLOCK(td->td_proc); 1101 if (error != 0) 1102 return (ENOMEM); 1103 #endif 1104 1105 if (uap->how & MCL_FUTURE) { 1106 vm_map_lock(map); 1107 vm_map_modflags(map, MAP_WIREFUTURE, 0); 1108 vm_map_unlock(map); 1109 error = 0; 1110 } 1111 1112 if (uap->how & MCL_CURRENT) { 1113 /* 1114 * P1003.1-2001 mandates that all currently mapped pages 1115 * will be memory resident and locked (wired) upon return 1116 * from mlockall(). vm_map_wire() will wire pages, by 1117 * calling vm_fault_wire() for each page in the region. 1118 */ 1119 error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1120 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1121 error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1122 } 1123 #ifdef RACCT 1124 if (error != KERN_SUCCESS) { 1125 PROC_LOCK(td->td_proc); 1126 racct_set(td->td_proc, RACCT_MEMLOCK, 1127 ptoa(pmap_wired_count(vm_map_pmap(&td->td_proc->p_vmspace->vm_map)))); 1128 PROC_UNLOCK(td->td_proc); 1129 } 1130 #endif 1131 1132 return (error); 1133 } 1134 1135 #ifndef _SYS_SYSPROTO_H_ 1136 struct munlockall_args { 1137 register_t dummy; 1138 }; 1139 #endif 1140 1141 /* 1142 * MPSAFE 1143 */ 1144 int 1145 sys_munlockall(td, uap) 1146 struct thread *td; 1147 struct munlockall_args *uap; 1148 { 1149 vm_map_t map; 1150 int error; 1151 1152 map = &td->td_proc->p_vmspace->vm_map; 1153 error = priv_check(td, PRIV_VM_MUNLOCK); 1154 if (error) 1155 return (error); 1156 1157 /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1158 vm_map_lock(map); 1159 vm_map_modflags(map, 0, MAP_WIREFUTURE); 1160 vm_map_unlock(map); 1161 1162 /* Forcibly unwire all pages. */ 1163 error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1164 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1165 #ifdef RACCT 1166 if (error == KERN_SUCCESS) { 1167 PROC_LOCK(td->td_proc); 1168 racct_set(td->td_proc, RACCT_MEMLOCK, 0); 1169 PROC_UNLOCK(td->td_proc); 1170 } 1171 #endif 1172 1173 return (error); 1174 } 1175 1176 #ifndef _SYS_SYSPROTO_H_ 1177 struct munlock_args { 1178 const void *addr; 1179 size_t len; 1180 }; 1181 #endif 1182 /* 1183 * MPSAFE 1184 */ 1185 int 1186 sys_munlock(td, uap) 1187 struct thread *td; 1188 struct munlock_args *uap; 1189 { 1190 vm_offset_t addr, end, last, start; 1191 vm_size_t size; 1192 int error; 1193 1194 error = priv_check(td, PRIV_VM_MUNLOCK); 1195 if (error) 1196 return (error); 1197 addr = (vm_offset_t)uap->addr; 1198 size = uap->len; 1199 last = addr + size; 1200 start = trunc_page(addr); 1201 end = round_page(last); 1202 if (last < addr || end < addr) 1203 return (EINVAL); 1204 error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 1205 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1206 #ifdef RACCT 1207 if (error == KERN_SUCCESS) { 1208 PROC_LOCK(td->td_proc); 1209 racct_sub(td->td_proc, RACCT_MEMLOCK, ptoa(end - start)); 1210 PROC_UNLOCK(td->td_proc); 1211 } 1212 #endif 1213 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1214 } 1215 1216 /* 1217 * vm_mmap_vnode() 1218 * 1219 * Helper function for vm_mmap. Perform sanity check specific for mmap 1220 * operations on vnodes. 1221 * 1222 * For VCHR vnodes, the vnode lock is held over the call to 1223 * vm_mmap_cdev() to keep vp->v_rdev valid. 1224 */ 1225 int 1226 vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1227 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1228 struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 1229 boolean_t *writecounted) 1230 { 1231 struct vattr va; 1232 vm_object_t obj; 1233 vm_offset_t foff; 1234 struct mount *mp; 1235 struct ucred *cred; 1236 int error, flags, locktype, vfslocked; 1237 1238 mp = vp->v_mount; 1239 cred = td->td_ucred; 1240 if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) 1241 locktype = LK_EXCLUSIVE; 1242 else 1243 locktype = LK_SHARED; 1244 vfslocked = VFS_LOCK_GIANT(mp); 1245 if ((error = vget(vp, locktype, td)) != 0) { 1246 VFS_UNLOCK_GIANT(vfslocked); 1247 return (error); 1248 } 1249 foff = *foffp; 1250 flags = *flagsp; 1251 obj = vp->v_object; 1252 if (vp->v_type == VREG) { 1253 /* 1254 * Get the proper underlying object 1255 */ 1256 if (obj == NULL) { 1257 error = EINVAL; 1258 goto done; 1259 } 1260 if (obj->handle != vp) { 1261 vput(vp); 1262 vp = (struct vnode *)obj->handle; 1263 /* 1264 * Bypass filesystems obey the mpsafety of the 1265 * underlying fs. 1266 */ 1267 error = vget(vp, locktype, td); 1268 if (error != 0) { 1269 VFS_UNLOCK_GIANT(vfslocked); 1270 return (error); 1271 } 1272 } 1273 if (locktype == LK_EXCLUSIVE) { 1274 *writecounted = TRUE; 1275 vnode_pager_update_writecount(obj, 0, objsize); 1276 } 1277 } else if (vp->v_type == VCHR) { 1278 error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp, 1279 vp->v_rdev, foffp, objp); 1280 if (error == 0) 1281 goto mark_atime; 1282 goto done; 1283 } else { 1284 error = EINVAL; 1285 goto done; 1286 } 1287 if ((error = VOP_GETATTR(vp, &va, cred))) 1288 goto done; 1289 #ifdef MAC 1290 error = mac_vnode_check_mmap(cred, vp, prot, flags); 1291 if (error != 0) 1292 goto done; 1293 #endif 1294 if ((flags & MAP_SHARED) != 0) { 1295 if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1296 if (prot & PROT_WRITE) { 1297 error = EPERM; 1298 goto done; 1299 } 1300 *maxprotp &= ~VM_PROT_WRITE; 1301 } 1302 } 1303 /* 1304 * If it is a regular file without any references 1305 * we do not need to sync it. 1306 * Adjust object size to be the size of actual file. 1307 */ 1308 objsize = round_page(va.va_size); 1309 if (va.va_nlink == 0) 1310 flags |= MAP_NOSYNC; 1311 obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, cred); 1312 if (obj == NULL) { 1313 error = ENOMEM; 1314 goto done; 1315 } 1316 *objp = obj; 1317 *flagsp = flags; 1318 1319 mark_atime: 1320 vfs_mark_atime(vp, cred); 1321 1322 done: 1323 vput(vp); 1324 VFS_UNLOCK_GIANT(vfslocked); 1325 return (error); 1326 } 1327 1328 /* 1329 * vm_mmap_cdev() 1330 * 1331 * MPSAFE 1332 * 1333 * Helper function for vm_mmap. Perform sanity check specific for mmap 1334 * operations on cdevs. 1335 */ 1336 int 1337 vm_mmap_cdev(struct thread *td, vm_size_t objsize, 1338 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1339 struct cdev *cdev, vm_ooffset_t *foff, vm_object_t *objp) 1340 { 1341 vm_object_t obj; 1342 struct cdevsw *dsw; 1343 int error, flags, ref; 1344 1345 flags = *flagsp; 1346 1347 dsw = dev_refthread(cdev, &ref); 1348 if (dsw == NULL) 1349 return (ENXIO); 1350 if (dsw->d_flags & D_MMAP_ANON) { 1351 dev_relthread(cdev, ref); 1352 *maxprotp = VM_PROT_ALL; 1353 *flagsp |= MAP_ANON; 1354 return (0); 1355 } 1356 /* 1357 * cdevs do not provide private mappings of any kind. 1358 */ 1359 if ((*maxprotp & VM_PROT_WRITE) == 0 && 1360 (prot & PROT_WRITE) != 0) { 1361 dev_relthread(cdev, ref); 1362 return (EACCES); 1363 } 1364 if (flags & (MAP_PRIVATE|MAP_COPY)) { 1365 dev_relthread(cdev, ref); 1366 return (EINVAL); 1367 } 1368 /* 1369 * Force device mappings to be shared. 1370 */ 1371 flags |= MAP_SHARED; 1372 #ifdef MAC_XXX 1373 error = mac_cdev_check_mmap(td->td_ucred, cdev, prot); 1374 if (error != 0) { 1375 dev_relthread(cdev, ref); 1376 return (error); 1377 } 1378 #endif 1379 /* 1380 * First, try d_mmap_single(). If that is not implemented 1381 * (returns ENODEV), fall back to using the device pager. 1382 * Note that d_mmap_single() must return a reference to the 1383 * object (it needs to bump the reference count of the object 1384 * it returns somehow). 1385 * 1386 * XXX assumes VM_PROT_* == PROT_* 1387 */ 1388 error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 1389 dev_relthread(cdev, ref); 1390 if (error != ENODEV) 1391 return (error); 1392 obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 1393 td->td_ucred); 1394 if (obj == NULL) 1395 return (EINVAL); 1396 *objp = obj; 1397 *flagsp = flags; 1398 return (0); 1399 } 1400 1401 /* 1402 * vm_mmap_shm() 1403 * 1404 * MPSAFE 1405 * 1406 * Helper function for vm_mmap. Perform sanity check specific for mmap 1407 * operations on shm file descriptors. 1408 */ 1409 int 1410 vm_mmap_shm(struct thread *td, vm_size_t objsize, 1411 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1412 struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp) 1413 { 1414 int error; 1415 1416 if ((*flagsp & MAP_SHARED) != 0 && 1417 (*maxprotp & VM_PROT_WRITE) == 0 && 1418 (prot & PROT_WRITE) != 0) 1419 return (EACCES); 1420 #ifdef MAC 1421 error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp); 1422 if (error != 0) 1423 return (error); 1424 #endif 1425 error = shm_mmap(shmfd, objsize, foff, objp); 1426 if (error) 1427 return (error); 1428 return (0); 1429 } 1430 1431 /* 1432 * vm_mmap() 1433 * 1434 * MPSAFE 1435 * 1436 * Internal version of mmap. Currently used by mmap, exec, and sys5 1437 * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. 1438 */ 1439 int 1440 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1441 vm_prot_t maxprot, int flags, 1442 objtype_t handle_type, void *handle, 1443 vm_ooffset_t foff) 1444 { 1445 boolean_t fitit; 1446 vm_object_t object = NULL; 1447 struct thread *td = curthread; 1448 int docow, error, rv; 1449 boolean_t writecounted; 1450 1451 if (size == 0) 1452 return (0); 1453 1454 size = round_page(size); 1455 1456 if (map == &td->td_proc->p_vmspace->vm_map) { 1457 PROC_LOCK(td->td_proc); 1458 if (map->size + size > lim_cur(td->td_proc, RLIMIT_VMEM)) { 1459 PROC_UNLOCK(td->td_proc); 1460 return (ENOMEM); 1461 } 1462 if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 1463 PROC_UNLOCK(td->td_proc); 1464 return (ENOMEM); 1465 } 1466 PROC_UNLOCK(td->td_proc); 1467 } 1468 1469 /* 1470 * We currently can only deal with page aligned file offsets. 1471 * The check is here rather than in the syscall because the 1472 * kernel calls this function internally for other mmaping 1473 * operations (such as in exec) and non-aligned offsets will 1474 * cause pmap inconsistencies...so we want to be sure to 1475 * disallow this in all cases. 1476 */ 1477 if (foff & PAGE_MASK) 1478 return (EINVAL); 1479 1480 if ((flags & MAP_FIXED) == 0) { 1481 fitit = TRUE; 1482 *addr = round_page(*addr); 1483 } else { 1484 if (*addr != trunc_page(*addr)) 1485 return (EINVAL); 1486 fitit = FALSE; 1487 } 1488 writecounted = FALSE; 1489 1490 /* 1491 * Lookup/allocate object. 1492 */ 1493 switch (handle_type) { 1494 case OBJT_DEVICE: 1495 error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, 1496 handle, &foff, &object); 1497 break; 1498 case OBJT_VNODE: 1499 error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 1500 handle, &foff, &object, &writecounted); 1501 break; 1502 case OBJT_SWAP: 1503 error = vm_mmap_shm(td, size, prot, &maxprot, &flags, 1504 handle, foff, &object); 1505 break; 1506 case OBJT_DEFAULT: 1507 if (handle == NULL) { 1508 error = 0; 1509 break; 1510 } 1511 /* FALLTHROUGH */ 1512 default: 1513 error = EINVAL; 1514 break; 1515 } 1516 if (error) 1517 return (error); 1518 if (flags & MAP_ANON) { 1519 object = NULL; 1520 docow = 0; 1521 /* 1522 * Unnamed anonymous regions always start at 0. 1523 */ 1524 if (handle == 0) 1525 foff = 0; 1526 } else if (flags & MAP_PREFAULT_READ) 1527 docow = MAP_PREFAULT; 1528 else 1529 docow = MAP_PREFAULT_PARTIAL; 1530 1531 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 1532 docow |= MAP_COPY_ON_WRITE; 1533 if (flags & MAP_NOSYNC) 1534 docow |= MAP_DISABLE_SYNCER; 1535 if (flags & MAP_NOCORE) 1536 docow |= MAP_DISABLE_COREDUMP; 1537 /* Shared memory is also shared with children. */ 1538 if (flags & MAP_SHARED) 1539 docow |= MAP_INHERIT_SHARE; 1540 if (writecounted) 1541 docow |= MAP_VN_WRITECOUNT; 1542 1543 if (flags & MAP_STACK) 1544 rv = vm_map_stack(map, *addr, size, prot, maxprot, 1545 docow | MAP_STACK_GROWS_DOWN); 1546 else if (fitit) 1547 rv = vm_map_find(map, object, foff, addr, size, 1548 object != NULL && object->type == OBJT_DEVICE ? 1549 VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, prot, maxprot, docow); 1550 else 1551 rv = vm_map_fixed(map, object, foff, *addr, size, 1552 prot, maxprot, docow); 1553 1554 if (rv == KERN_SUCCESS) { 1555 /* 1556 * If the process has requested that all future mappings 1557 * be wired, then heed this. 1558 */ 1559 if (map->flags & MAP_WIREFUTURE) 1560 vm_map_wire(map, *addr, *addr + size, 1561 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1562 } else { 1563 /* 1564 * If this mapping was accounted for in the vnode's 1565 * writecount, then undo that now. 1566 */ 1567 if (writecounted) 1568 vnode_pager_release_writecount(object, 0, size); 1569 /* 1570 * Lose the object reference. Will destroy the 1571 * object if it's an unnamed anonymous mapping 1572 * or named anonymous without other references. 1573 */ 1574 vm_object_deallocate(object); 1575 } 1576 return (vm_mmap_to_errno(rv)); 1577 } 1578 1579 /* 1580 * Translate a Mach VM return code to zero on success or the appropriate errno 1581 * on failure. 1582 */ 1583 int 1584 vm_mmap_to_errno(int rv) 1585 { 1586 1587 switch (rv) { 1588 case KERN_SUCCESS: 1589 return (0); 1590 case KERN_INVALID_ADDRESS: 1591 case KERN_NO_SPACE: 1592 return (ENOMEM); 1593 case KERN_PROTECTION_FAILURE: 1594 return (EACCES); 1595 default: 1596 return (EINVAL); 1597 } 1598 } 1599