1 /*- 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35 * 36 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37 */ 38 39 /* 40 * Mapped file (mmap) interface to VM 41 */ 42 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include "opt_compat.h" 47 #include "opt_hwpmc_hooks.h" 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/capability.h> 52 #include <sys/kernel.h> 53 #include <sys/lock.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/filedesc.h> 57 #include <sys/priv.h> 58 #include <sys/proc.h> 59 #include <sys/racct.h> 60 #include <sys/resource.h> 61 #include <sys/resourcevar.h> 62 #include <sys/vnode.h> 63 #include <sys/fcntl.h> 64 #include <sys/file.h> 65 #include <sys/mman.h> 66 #include <sys/mount.h> 67 #include <sys/conf.h> 68 #include <sys/stat.h> 69 #include <sys/sysent.h> 70 #include <sys/vmmeter.h> 71 72 #include <security/mac/mac_framework.h> 73 74 #include <vm/vm.h> 75 #include <vm/vm_param.h> 76 #include <vm/pmap.h> 77 #include <vm/vm_map.h> 78 #include <vm/vm_object.h> 79 #include <vm/vm_page.h> 80 #include <vm/vm_pager.h> 81 #include <vm/vm_pageout.h> 82 #include <vm/vm_extern.h> 83 #include <vm/vm_page.h> 84 #include <vm/vnode_pager.h> 85 86 #ifdef HWPMC_HOOKS 87 #include <sys/pmckern.h> 88 #endif 89 90 #ifndef _SYS_SYSPROTO_H_ 91 struct sbrk_args { 92 int incr; 93 }; 94 #endif 95 96 static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 97 int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *); 98 static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 99 int *, struct cdev *, vm_ooffset_t *, vm_object_t *); 100 static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 101 int *, struct shmfd *, vm_ooffset_t, vm_object_t *); 102 103 /* 104 * MPSAFE 105 */ 106 /* ARGSUSED */ 107 int 108 sys_sbrk(td, uap) 109 struct thread *td; 110 struct sbrk_args *uap; 111 { 112 /* Not yet implemented */ 113 return (EOPNOTSUPP); 114 } 115 116 #ifndef _SYS_SYSPROTO_H_ 117 struct sstk_args { 118 int incr; 119 }; 120 #endif 121 122 /* 123 * MPSAFE 124 */ 125 /* ARGSUSED */ 126 int 127 sys_sstk(td, uap) 128 struct thread *td; 129 struct sstk_args *uap; 130 { 131 /* Not yet implemented */ 132 return (EOPNOTSUPP); 133 } 134 135 #if defined(COMPAT_43) 136 #ifndef _SYS_SYSPROTO_H_ 137 struct getpagesize_args { 138 int dummy; 139 }; 140 #endif 141 142 int 143 ogetpagesize(td, uap) 144 struct thread *td; 145 struct getpagesize_args *uap; 146 { 147 /* MP SAFE */ 148 td->td_retval[0] = PAGE_SIZE; 149 return (0); 150 } 151 #endif /* COMPAT_43 */ 152 153 154 /* 155 * Memory Map (mmap) system call. Note that the file offset 156 * and address are allowed to be NOT page aligned, though if 157 * the MAP_FIXED flag it set, both must have the same remainder 158 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 159 * page-aligned, the actual mapping starts at trunc_page(addr) 160 * and the return value is adjusted up by the page offset. 161 * 162 * Generally speaking, only character devices which are themselves 163 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 164 * there would be no cache coherency between a descriptor and a VM mapping 165 * both to the same character device. 166 */ 167 #ifndef _SYS_SYSPROTO_H_ 168 struct mmap_args { 169 void *addr; 170 size_t len; 171 int prot; 172 int flags; 173 int fd; 174 long pad; 175 off_t pos; 176 }; 177 #endif 178 179 /* 180 * MPSAFE 181 */ 182 int 183 sys_mmap(td, uap) 184 struct thread *td; 185 struct mmap_args *uap; 186 { 187 #ifdef HWPMC_HOOKS 188 struct pmckern_map_in pkm; 189 #endif 190 struct file *fp; 191 struct vnode *vp; 192 vm_offset_t addr; 193 vm_size_t size, pageoff; 194 vm_prot_t cap_maxprot, prot, maxprot; 195 void *handle; 196 objtype_t handle_type; 197 int flags, error; 198 off_t pos; 199 struct vmspace *vms = td->td_proc->p_vmspace; 200 cap_rights_t rights; 201 202 addr = (vm_offset_t) uap->addr; 203 size = uap->len; 204 prot = uap->prot & VM_PROT_ALL; 205 flags = uap->flags; 206 pos = uap->pos; 207 208 fp = NULL; 209 210 /* Make sure mapping fits into numeric range, etc. */ 211 if ((uap->len == 0 && !SV_CURPROC_FLAG(SV_AOUT) && 212 curproc->p_osrel >= P_OSREL_MAP_ANON) || 213 ((flags & MAP_ANON) && (uap->fd != -1 || pos != 0))) 214 return (EINVAL); 215 216 if (flags & MAP_STACK) { 217 if ((uap->fd != -1) || 218 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 219 return (EINVAL); 220 flags |= MAP_ANON; 221 pos = 0; 222 } 223 224 /* 225 * Align the file position to a page boundary, 226 * and save its page offset component. 227 */ 228 pageoff = (pos & PAGE_MASK); 229 pos -= pageoff; 230 231 /* Adjust size for rounding (on both ends). */ 232 size += pageoff; /* low end... */ 233 size = (vm_size_t) round_page(size); /* hi end */ 234 235 /* 236 * Check for illegal addresses. Watch out for address wrap... Note 237 * that VM_*_ADDRESS are not constants due to casts (argh). 238 */ 239 if (flags & MAP_FIXED) { 240 /* 241 * The specified address must have the same remainder 242 * as the file offset taken modulo PAGE_SIZE, so it 243 * should be aligned after adjustment by pageoff. 244 */ 245 addr -= pageoff; 246 if (addr & PAGE_MASK) 247 return (EINVAL); 248 249 /* Address range must be all in user VM space. */ 250 if (addr < vm_map_min(&vms->vm_map) || 251 addr + size > vm_map_max(&vms->vm_map)) 252 return (EINVAL); 253 if (addr + size < addr) 254 return (EINVAL); 255 } else { 256 /* 257 * XXX for non-fixed mappings where no hint is provided or 258 * the hint would fall in the potential heap space, 259 * place it after the end of the largest possible heap. 260 * 261 * There should really be a pmap call to determine a reasonable 262 * location. 263 */ 264 PROC_LOCK(td->td_proc); 265 if (addr == 0 || 266 (addr >= round_page((vm_offset_t)vms->vm_taddr) && 267 addr < round_page((vm_offset_t)vms->vm_daddr + 268 lim_max(td->td_proc, RLIMIT_DATA)))) 269 addr = round_page((vm_offset_t)vms->vm_daddr + 270 lim_max(td->td_proc, RLIMIT_DATA)); 271 PROC_UNLOCK(td->td_proc); 272 } 273 if (flags & MAP_ANON) { 274 /* 275 * Mapping blank space is trivial. 276 */ 277 handle = NULL; 278 handle_type = OBJT_DEFAULT; 279 maxprot = VM_PROT_ALL; 280 cap_maxprot = VM_PROT_ALL; 281 } else { 282 /* 283 * Mapping file, get fp for validation and don't let the 284 * descriptor disappear on us if we block. Check capability 285 * rights, but also return the maximum rights to be combined 286 * with maxprot later. 287 */ 288 rights = CAP_MMAP; 289 if (prot & PROT_READ) 290 rights |= CAP_READ; 291 if ((flags & MAP_SHARED) != 0) { 292 if (prot & PROT_WRITE) 293 rights |= CAP_WRITE; 294 } 295 if (prot & PROT_EXEC) 296 rights |= CAP_MAPEXEC; 297 if ((error = fget_mmap(td, uap->fd, rights, &cap_maxprot, 298 &fp)) != 0) 299 goto done; 300 if (fp->f_type == DTYPE_SHM) { 301 handle = fp->f_data; 302 handle_type = OBJT_SWAP; 303 maxprot = VM_PROT_NONE; 304 305 /* FREAD should always be set. */ 306 if (fp->f_flag & FREAD) 307 maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; 308 if (fp->f_flag & FWRITE) 309 maxprot |= VM_PROT_WRITE; 310 goto map; 311 } 312 if (fp->f_type != DTYPE_VNODE) { 313 error = ENODEV; 314 goto done; 315 } 316 #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ 317 defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) 318 /* 319 * POSIX shared-memory objects are defined to have 320 * kernel persistence, and are not defined to support 321 * read(2)/write(2) -- or even open(2). Thus, we can 322 * use MAP_ASYNC to trade on-disk coherence for speed. 323 * The shm_open(3) library routine turns on the FPOSIXSHM 324 * flag to request this behavior. 325 */ 326 if (fp->f_flag & FPOSIXSHM) 327 flags |= MAP_NOSYNC; 328 #endif 329 vp = fp->f_vnode; 330 /* 331 * Ensure that file and memory protections are 332 * compatible. Note that we only worry about 333 * writability if mapping is shared; in this case, 334 * current and max prot are dictated by the open file. 335 * XXX use the vnode instead? Problem is: what 336 * credentials do we use for determination? What if 337 * proc does a setuid? 338 */ 339 if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC) 340 maxprot = VM_PROT_NONE; 341 else 342 maxprot = VM_PROT_EXECUTE; 343 if (fp->f_flag & FREAD) { 344 maxprot |= VM_PROT_READ; 345 } else if (prot & PROT_READ) { 346 error = EACCES; 347 goto done; 348 } 349 /* 350 * If we are sharing potential changes (either via 351 * MAP_SHARED or via the implicit sharing of character 352 * device mappings), and we are trying to get write 353 * permission although we opened it without asking 354 * for it, bail out. 355 */ 356 if ((flags & MAP_SHARED) != 0) { 357 if ((fp->f_flag & FWRITE) != 0) { 358 maxprot |= VM_PROT_WRITE; 359 } else if ((prot & PROT_WRITE) != 0) { 360 error = EACCES; 361 goto done; 362 } 363 } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) { 364 maxprot |= VM_PROT_WRITE; 365 cap_maxprot |= VM_PROT_WRITE; 366 } 367 handle = (void *)vp; 368 handle_type = OBJT_VNODE; 369 } 370 map: 371 td->td_fpop = fp; 372 maxprot &= cap_maxprot; 373 error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, 374 flags, handle_type, handle, pos); 375 td->td_fpop = NULL; 376 #ifdef HWPMC_HOOKS 377 /* inform hwpmc(4) if an executable is being mapped */ 378 if (error == 0 && handle_type == OBJT_VNODE && 379 (prot & PROT_EXEC)) { 380 pkm.pm_file = handle; 381 pkm.pm_address = (uintptr_t) addr; 382 PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); 383 } 384 #endif 385 if (error == 0) 386 td->td_retval[0] = (register_t) (addr + pageoff); 387 done: 388 if (fp) 389 fdrop(fp, td); 390 391 return (error); 392 } 393 394 int 395 freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 396 { 397 struct mmap_args oargs; 398 399 oargs.addr = uap->addr; 400 oargs.len = uap->len; 401 oargs.prot = uap->prot; 402 oargs.flags = uap->flags; 403 oargs.fd = uap->fd; 404 oargs.pos = uap->pos; 405 return (sys_mmap(td, &oargs)); 406 } 407 408 #ifdef COMPAT_43 409 #ifndef _SYS_SYSPROTO_H_ 410 struct ommap_args { 411 caddr_t addr; 412 int len; 413 int prot; 414 int flags; 415 int fd; 416 long pos; 417 }; 418 #endif 419 int 420 ommap(td, uap) 421 struct thread *td; 422 struct ommap_args *uap; 423 { 424 struct mmap_args nargs; 425 static const char cvtbsdprot[8] = { 426 0, 427 PROT_EXEC, 428 PROT_WRITE, 429 PROT_EXEC | PROT_WRITE, 430 PROT_READ, 431 PROT_EXEC | PROT_READ, 432 PROT_WRITE | PROT_READ, 433 PROT_EXEC | PROT_WRITE | PROT_READ, 434 }; 435 436 #define OMAP_ANON 0x0002 437 #define OMAP_COPY 0x0020 438 #define OMAP_SHARED 0x0010 439 #define OMAP_FIXED 0x0100 440 441 nargs.addr = uap->addr; 442 nargs.len = uap->len; 443 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 444 nargs.flags = 0; 445 if (uap->flags & OMAP_ANON) 446 nargs.flags |= MAP_ANON; 447 if (uap->flags & OMAP_COPY) 448 nargs.flags |= MAP_COPY; 449 if (uap->flags & OMAP_SHARED) 450 nargs.flags |= MAP_SHARED; 451 else 452 nargs.flags |= MAP_PRIVATE; 453 if (uap->flags & OMAP_FIXED) 454 nargs.flags |= MAP_FIXED; 455 nargs.fd = uap->fd; 456 nargs.pos = uap->pos; 457 return (sys_mmap(td, &nargs)); 458 } 459 #endif /* COMPAT_43 */ 460 461 462 #ifndef _SYS_SYSPROTO_H_ 463 struct msync_args { 464 void *addr; 465 size_t len; 466 int flags; 467 }; 468 #endif 469 /* 470 * MPSAFE 471 */ 472 int 473 sys_msync(td, uap) 474 struct thread *td; 475 struct msync_args *uap; 476 { 477 vm_offset_t addr; 478 vm_size_t size, pageoff; 479 int flags; 480 vm_map_t map; 481 int rv; 482 483 addr = (vm_offset_t) uap->addr; 484 size = uap->len; 485 flags = uap->flags; 486 487 pageoff = (addr & PAGE_MASK); 488 addr -= pageoff; 489 size += pageoff; 490 size = (vm_size_t) round_page(size); 491 if (addr + size < addr) 492 return (EINVAL); 493 494 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 495 return (EINVAL); 496 497 map = &td->td_proc->p_vmspace->vm_map; 498 499 /* 500 * Clean the pages and interpret the return value. 501 */ 502 rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 503 (flags & MS_INVALIDATE) != 0); 504 switch (rv) { 505 case KERN_SUCCESS: 506 return (0); 507 case KERN_INVALID_ADDRESS: 508 return (EINVAL); /* Sun returns ENOMEM? */ 509 case KERN_INVALID_ARGUMENT: 510 return (EBUSY); 511 case KERN_FAILURE: 512 return (EIO); 513 default: 514 return (EINVAL); 515 } 516 } 517 518 #ifndef _SYS_SYSPROTO_H_ 519 struct munmap_args { 520 void *addr; 521 size_t len; 522 }; 523 #endif 524 /* 525 * MPSAFE 526 */ 527 int 528 sys_munmap(td, uap) 529 struct thread *td; 530 struct munmap_args *uap; 531 { 532 #ifdef HWPMC_HOOKS 533 struct pmckern_map_out pkm; 534 vm_map_entry_t entry; 535 #endif 536 vm_offset_t addr; 537 vm_size_t size, pageoff; 538 vm_map_t map; 539 540 addr = (vm_offset_t) uap->addr; 541 size = uap->len; 542 if (size == 0) 543 return (EINVAL); 544 545 pageoff = (addr & PAGE_MASK); 546 addr -= pageoff; 547 size += pageoff; 548 size = (vm_size_t) round_page(size); 549 if (addr + size < addr) 550 return (EINVAL); 551 552 /* 553 * Check for illegal addresses. Watch out for address wrap... 554 */ 555 map = &td->td_proc->p_vmspace->vm_map; 556 if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 557 return (EINVAL); 558 vm_map_lock(map); 559 #ifdef HWPMC_HOOKS 560 /* 561 * Inform hwpmc if the address range being unmapped contains 562 * an executable region. 563 */ 564 pkm.pm_address = (uintptr_t) NULL; 565 if (vm_map_lookup_entry(map, addr, &entry)) { 566 for (; 567 entry != &map->header && entry->start < addr + size; 568 entry = entry->next) { 569 if (vm_map_check_protection(map, entry->start, 570 entry->end, VM_PROT_EXECUTE) == TRUE) { 571 pkm.pm_address = (uintptr_t) addr; 572 pkm.pm_size = (size_t) size; 573 break; 574 } 575 } 576 } 577 #endif 578 vm_map_delete(map, addr, addr + size); 579 580 #ifdef HWPMC_HOOKS 581 /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 582 vm_map_lock_downgrade(map); 583 if (pkm.pm_address != (uintptr_t) NULL) 584 PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 585 vm_map_unlock_read(map); 586 #else 587 vm_map_unlock(map); 588 #endif 589 /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 590 return (0); 591 } 592 593 #ifndef _SYS_SYSPROTO_H_ 594 struct mprotect_args { 595 const void *addr; 596 size_t len; 597 int prot; 598 }; 599 #endif 600 /* 601 * MPSAFE 602 */ 603 int 604 sys_mprotect(td, uap) 605 struct thread *td; 606 struct mprotect_args *uap; 607 { 608 vm_offset_t addr; 609 vm_size_t size, pageoff; 610 vm_prot_t prot; 611 612 addr = (vm_offset_t) uap->addr; 613 size = uap->len; 614 prot = uap->prot & VM_PROT_ALL; 615 616 pageoff = (addr & PAGE_MASK); 617 addr -= pageoff; 618 size += pageoff; 619 size = (vm_size_t) round_page(size); 620 if (addr + size < addr) 621 return (EINVAL); 622 623 switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 624 addr + size, prot, FALSE)) { 625 case KERN_SUCCESS: 626 return (0); 627 case KERN_PROTECTION_FAILURE: 628 return (EACCES); 629 case KERN_RESOURCE_SHORTAGE: 630 return (ENOMEM); 631 } 632 return (EINVAL); 633 } 634 635 #ifndef _SYS_SYSPROTO_H_ 636 struct minherit_args { 637 void *addr; 638 size_t len; 639 int inherit; 640 }; 641 #endif 642 /* 643 * MPSAFE 644 */ 645 int 646 sys_minherit(td, uap) 647 struct thread *td; 648 struct minherit_args *uap; 649 { 650 vm_offset_t addr; 651 vm_size_t size, pageoff; 652 vm_inherit_t inherit; 653 654 addr = (vm_offset_t)uap->addr; 655 size = uap->len; 656 inherit = uap->inherit; 657 658 pageoff = (addr & PAGE_MASK); 659 addr -= pageoff; 660 size += pageoff; 661 size = (vm_size_t) round_page(size); 662 if (addr + size < addr) 663 return (EINVAL); 664 665 switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 666 addr + size, inherit)) { 667 case KERN_SUCCESS: 668 return (0); 669 case KERN_PROTECTION_FAILURE: 670 return (EACCES); 671 } 672 return (EINVAL); 673 } 674 675 #ifndef _SYS_SYSPROTO_H_ 676 struct madvise_args { 677 void *addr; 678 size_t len; 679 int behav; 680 }; 681 #endif 682 683 /* 684 * MPSAFE 685 */ 686 int 687 sys_madvise(td, uap) 688 struct thread *td; 689 struct madvise_args *uap; 690 { 691 vm_offset_t start, end; 692 vm_map_t map; 693 struct proc *p; 694 int error; 695 696 /* 697 * Check for our special case, advising the swap pager we are 698 * "immortal." 699 */ 700 if (uap->behav == MADV_PROTECT) { 701 error = priv_check(td, PRIV_VM_MADV_PROTECT); 702 if (error == 0) { 703 p = td->td_proc; 704 PROC_LOCK(p); 705 p->p_flag |= P_PROTECTED; 706 PROC_UNLOCK(p); 707 } 708 return (error); 709 } 710 /* 711 * Check for illegal behavior 712 */ 713 if (uap->behav < 0 || uap->behav > MADV_CORE) 714 return (EINVAL); 715 /* 716 * Check for illegal addresses. Watch out for address wrap... Note 717 * that VM_*_ADDRESS are not constants due to casts (argh). 718 */ 719 map = &td->td_proc->p_vmspace->vm_map; 720 if ((vm_offset_t)uap->addr < vm_map_min(map) || 721 (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) 722 return (EINVAL); 723 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 724 return (EINVAL); 725 726 /* 727 * Since this routine is only advisory, we default to conservative 728 * behavior. 729 */ 730 start = trunc_page((vm_offset_t) uap->addr); 731 end = round_page((vm_offset_t) uap->addr + uap->len); 732 733 if (vm_map_madvise(map, start, end, uap->behav)) 734 return (EINVAL); 735 return (0); 736 } 737 738 #ifndef _SYS_SYSPROTO_H_ 739 struct mincore_args { 740 const void *addr; 741 size_t len; 742 char *vec; 743 }; 744 #endif 745 746 /* 747 * MPSAFE 748 */ 749 int 750 sys_mincore(td, uap) 751 struct thread *td; 752 struct mincore_args *uap; 753 { 754 vm_offset_t addr, first_addr; 755 vm_offset_t end, cend; 756 pmap_t pmap; 757 vm_map_t map; 758 char *vec; 759 int error = 0; 760 int vecindex, lastvecindex; 761 vm_map_entry_t current; 762 vm_map_entry_t entry; 763 vm_object_t object; 764 vm_paddr_t locked_pa; 765 vm_page_t m; 766 vm_pindex_t pindex; 767 int mincoreinfo; 768 unsigned int timestamp; 769 boolean_t locked; 770 771 /* 772 * Make sure that the addresses presented are valid for user 773 * mode. 774 */ 775 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 776 end = addr + (vm_size_t)round_page(uap->len); 777 map = &td->td_proc->p_vmspace->vm_map; 778 if (end > vm_map_max(map) || end < addr) 779 return (ENOMEM); 780 781 /* 782 * Address of byte vector 783 */ 784 vec = uap->vec; 785 786 pmap = vmspace_pmap(td->td_proc->p_vmspace); 787 788 vm_map_lock_read(map); 789 RestartScan: 790 timestamp = map->timestamp; 791 792 if (!vm_map_lookup_entry(map, addr, &entry)) { 793 vm_map_unlock_read(map); 794 return (ENOMEM); 795 } 796 797 /* 798 * Do this on a map entry basis so that if the pages are not 799 * in the current processes address space, we can easily look 800 * up the pages elsewhere. 801 */ 802 lastvecindex = -1; 803 for (current = entry; 804 (current != &map->header) && (current->start < end); 805 current = current->next) { 806 807 /* 808 * check for contiguity 809 */ 810 if (current->end < end && 811 (entry->next == &map->header || 812 current->next->start > current->end)) { 813 vm_map_unlock_read(map); 814 return (ENOMEM); 815 } 816 817 /* 818 * ignore submaps (for now) or null objects 819 */ 820 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 821 current->object.vm_object == NULL) 822 continue; 823 824 /* 825 * limit this scan to the current map entry and the 826 * limits for the mincore call 827 */ 828 if (addr < current->start) 829 addr = current->start; 830 cend = current->end; 831 if (cend > end) 832 cend = end; 833 834 /* 835 * scan this entry one page at a time 836 */ 837 while (addr < cend) { 838 /* 839 * Check pmap first, it is likely faster, also 840 * it can provide info as to whether we are the 841 * one referencing or modifying the page. 842 */ 843 object = NULL; 844 locked_pa = 0; 845 retry: 846 m = NULL; 847 mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 848 if (locked_pa != 0) { 849 /* 850 * The page is mapped by this process but not 851 * both accessed and modified. It is also 852 * managed. Acquire the object lock so that 853 * other mappings might be examined. 854 */ 855 m = PHYS_TO_VM_PAGE(locked_pa); 856 if (m->object != object) { 857 if (object != NULL) 858 VM_OBJECT_UNLOCK(object); 859 object = m->object; 860 locked = VM_OBJECT_TRYLOCK(object); 861 vm_page_unlock(m); 862 if (!locked) { 863 VM_OBJECT_LOCK(object); 864 vm_page_lock(m); 865 goto retry; 866 } 867 } else 868 vm_page_unlock(m); 869 KASSERT(m->valid == VM_PAGE_BITS_ALL, 870 ("mincore: page %p is mapped but invalid", 871 m)); 872 } else if (mincoreinfo == 0) { 873 /* 874 * The page is not mapped by this process. If 875 * the object implements managed pages, then 876 * determine if the page is resident so that 877 * the mappings might be examined. 878 */ 879 if (current->object.vm_object != object) { 880 if (object != NULL) 881 VM_OBJECT_UNLOCK(object); 882 object = current->object.vm_object; 883 VM_OBJECT_LOCK(object); 884 } 885 if (object->type == OBJT_DEFAULT || 886 object->type == OBJT_SWAP || 887 object->type == OBJT_VNODE) { 888 pindex = OFF_TO_IDX(current->offset + 889 (addr - current->start)); 890 m = vm_page_lookup(object, pindex); 891 if (m != NULL && m->valid == 0) 892 m = NULL; 893 if (m != NULL) 894 mincoreinfo = MINCORE_INCORE; 895 } 896 } 897 if (m != NULL) { 898 /* Examine other mappings to the page. */ 899 if (m->dirty == 0 && pmap_is_modified(m)) 900 vm_page_dirty(m); 901 if (m->dirty != 0) 902 mincoreinfo |= MINCORE_MODIFIED_OTHER; 903 /* 904 * The first test for PGA_REFERENCED is an 905 * optimization. The second test is 906 * required because a concurrent pmap 907 * operation could clear the last reference 908 * and set PGA_REFERENCED before the call to 909 * pmap_is_referenced(). 910 */ 911 if ((m->aflags & PGA_REFERENCED) != 0 || 912 pmap_is_referenced(m) || 913 (m->aflags & PGA_REFERENCED) != 0) 914 mincoreinfo |= MINCORE_REFERENCED_OTHER; 915 } 916 if (object != NULL) 917 VM_OBJECT_UNLOCK(object); 918 919 /* 920 * subyte may page fault. In case it needs to modify 921 * the map, we release the lock. 922 */ 923 vm_map_unlock_read(map); 924 925 /* 926 * calculate index into user supplied byte vector 927 */ 928 vecindex = OFF_TO_IDX(addr - first_addr); 929 930 /* 931 * If we have skipped map entries, we need to make sure that 932 * the byte vector is zeroed for those skipped entries. 933 */ 934 while ((lastvecindex + 1) < vecindex) { 935 error = subyte(vec + lastvecindex, 0); 936 if (error) { 937 error = EFAULT; 938 goto done2; 939 } 940 ++lastvecindex; 941 } 942 943 /* 944 * Pass the page information to the user 945 */ 946 error = subyte(vec + vecindex, mincoreinfo); 947 if (error) { 948 error = EFAULT; 949 goto done2; 950 } 951 952 /* 953 * If the map has changed, due to the subyte, the previous 954 * output may be invalid. 955 */ 956 vm_map_lock_read(map); 957 if (timestamp != map->timestamp) 958 goto RestartScan; 959 960 lastvecindex = vecindex; 961 addr += PAGE_SIZE; 962 } 963 } 964 965 /* 966 * subyte may page fault. In case it needs to modify 967 * the map, we release the lock. 968 */ 969 vm_map_unlock_read(map); 970 971 /* 972 * Zero the last entries in the byte vector. 973 */ 974 vecindex = OFF_TO_IDX(end - first_addr); 975 while ((lastvecindex + 1) < vecindex) { 976 error = subyte(vec + lastvecindex, 0); 977 if (error) { 978 error = EFAULT; 979 goto done2; 980 } 981 ++lastvecindex; 982 } 983 984 /* 985 * If the map has changed, due to the subyte, the previous 986 * output may be invalid. 987 */ 988 vm_map_lock_read(map); 989 if (timestamp != map->timestamp) 990 goto RestartScan; 991 vm_map_unlock_read(map); 992 done2: 993 return (error); 994 } 995 996 #ifndef _SYS_SYSPROTO_H_ 997 struct mlock_args { 998 const void *addr; 999 size_t len; 1000 }; 1001 #endif 1002 /* 1003 * MPSAFE 1004 */ 1005 int 1006 sys_mlock(td, uap) 1007 struct thread *td; 1008 struct mlock_args *uap; 1009 { 1010 struct proc *proc; 1011 vm_offset_t addr, end, last, start; 1012 vm_size_t npages, size; 1013 unsigned long nsize; 1014 int error; 1015 1016 error = priv_check(td, PRIV_VM_MLOCK); 1017 if (error) 1018 return (error); 1019 addr = (vm_offset_t)uap->addr; 1020 size = uap->len; 1021 last = addr + size; 1022 start = trunc_page(addr); 1023 end = round_page(last); 1024 if (last < addr || end < addr) 1025 return (EINVAL); 1026 npages = atop(end - start); 1027 if (npages > vm_page_max_wired) 1028 return (ENOMEM); 1029 proc = td->td_proc; 1030 PROC_LOCK(proc); 1031 nsize = ptoa(npages + 1032 pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map))); 1033 if (nsize > lim_cur(proc, RLIMIT_MEMLOCK)) { 1034 PROC_UNLOCK(proc); 1035 return (ENOMEM); 1036 } 1037 PROC_UNLOCK(proc); 1038 if (npages + cnt.v_wire_count > vm_page_max_wired) 1039 return (EAGAIN); 1040 #ifdef RACCT 1041 PROC_LOCK(proc); 1042 error = racct_set(proc, RACCT_MEMLOCK, nsize); 1043 PROC_UNLOCK(proc); 1044 if (error != 0) 1045 return (ENOMEM); 1046 #endif 1047 error = vm_map_wire(&proc->p_vmspace->vm_map, start, end, 1048 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1049 #ifdef RACCT 1050 if (error != KERN_SUCCESS) { 1051 PROC_LOCK(proc); 1052 racct_set(proc, RACCT_MEMLOCK, 1053 ptoa(pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map)))); 1054 PROC_UNLOCK(proc); 1055 } 1056 #endif 1057 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1058 } 1059 1060 #ifndef _SYS_SYSPROTO_H_ 1061 struct mlockall_args { 1062 int how; 1063 }; 1064 #endif 1065 1066 /* 1067 * MPSAFE 1068 */ 1069 int 1070 sys_mlockall(td, uap) 1071 struct thread *td; 1072 struct mlockall_args *uap; 1073 { 1074 vm_map_t map; 1075 int error; 1076 1077 map = &td->td_proc->p_vmspace->vm_map; 1078 error = 0; 1079 1080 if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1081 return (EINVAL); 1082 1083 #if 0 1084 /* 1085 * If wiring all pages in the process would cause it to exceed 1086 * a hard resource limit, return ENOMEM. 1087 */ 1088 PROC_LOCK(td->td_proc); 1089 if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { 1090 PROC_UNLOCK(td->td_proc); 1091 return (ENOMEM); 1092 } 1093 PROC_UNLOCK(td->td_proc); 1094 #else 1095 error = priv_check(td, PRIV_VM_MLOCK); 1096 if (error) 1097 return (error); 1098 #endif 1099 #ifdef RACCT 1100 PROC_LOCK(td->td_proc); 1101 error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 1102 PROC_UNLOCK(td->td_proc); 1103 if (error != 0) 1104 return (ENOMEM); 1105 #endif 1106 1107 if (uap->how & MCL_FUTURE) { 1108 vm_map_lock(map); 1109 vm_map_modflags(map, MAP_WIREFUTURE, 0); 1110 vm_map_unlock(map); 1111 error = 0; 1112 } 1113 1114 if (uap->how & MCL_CURRENT) { 1115 /* 1116 * P1003.1-2001 mandates that all currently mapped pages 1117 * will be memory resident and locked (wired) upon return 1118 * from mlockall(). vm_map_wire() will wire pages, by 1119 * calling vm_fault_wire() for each page in the region. 1120 */ 1121 error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1122 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1123 error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1124 } 1125 #ifdef RACCT 1126 if (error != KERN_SUCCESS) { 1127 PROC_LOCK(td->td_proc); 1128 racct_set(td->td_proc, RACCT_MEMLOCK, 1129 ptoa(pmap_wired_count(vm_map_pmap(&td->td_proc->p_vmspace->vm_map)))); 1130 PROC_UNLOCK(td->td_proc); 1131 } 1132 #endif 1133 1134 return (error); 1135 } 1136 1137 #ifndef _SYS_SYSPROTO_H_ 1138 struct munlockall_args { 1139 register_t dummy; 1140 }; 1141 #endif 1142 1143 /* 1144 * MPSAFE 1145 */ 1146 int 1147 sys_munlockall(td, uap) 1148 struct thread *td; 1149 struct munlockall_args *uap; 1150 { 1151 vm_map_t map; 1152 int error; 1153 1154 map = &td->td_proc->p_vmspace->vm_map; 1155 error = priv_check(td, PRIV_VM_MUNLOCK); 1156 if (error) 1157 return (error); 1158 1159 /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1160 vm_map_lock(map); 1161 vm_map_modflags(map, 0, MAP_WIREFUTURE); 1162 vm_map_unlock(map); 1163 1164 /* Forcibly unwire all pages. */ 1165 error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1166 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1167 #ifdef RACCT 1168 if (error == KERN_SUCCESS) { 1169 PROC_LOCK(td->td_proc); 1170 racct_set(td->td_proc, RACCT_MEMLOCK, 0); 1171 PROC_UNLOCK(td->td_proc); 1172 } 1173 #endif 1174 1175 return (error); 1176 } 1177 1178 #ifndef _SYS_SYSPROTO_H_ 1179 struct munlock_args { 1180 const void *addr; 1181 size_t len; 1182 }; 1183 #endif 1184 /* 1185 * MPSAFE 1186 */ 1187 int 1188 sys_munlock(td, uap) 1189 struct thread *td; 1190 struct munlock_args *uap; 1191 { 1192 vm_offset_t addr, end, last, start; 1193 vm_size_t size; 1194 int error; 1195 1196 error = priv_check(td, PRIV_VM_MUNLOCK); 1197 if (error) 1198 return (error); 1199 addr = (vm_offset_t)uap->addr; 1200 size = uap->len; 1201 last = addr + size; 1202 start = trunc_page(addr); 1203 end = round_page(last); 1204 if (last < addr || end < addr) 1205 return (EINVAL); 1206 error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 1207 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1208 #ifdef RACCT 1209 if (error == KERN_SUCCESS) { 1210 PROC_LOCK(td->td_proc); 1211 racct_sub(td->td_proc, RACCT_MEMLOCK, ptoa(end - start)); 1212 PROC_UNLOCK(td->td_proc); 1213 } 1214 #endif 1215 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1216 } 1217 1218 /* 1219 * vm_mmap_vnode() 1220 * 1221 * Helper function for vm_mmap. Perform sanity check specific for mmap 1222 * operations on vnodes. 1223 * 1224 * For VCHR vnodes, the vnode lock is held over the call to 1225 * vm_mmap_cdev() to keep vp->v_rdev valid. 1226 */ 1227 int 1228 vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1229 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1230 struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 1231 boolean_t *writecounted) 1232 { 1233 struct vattr va; 1234 vm_object_t obj; 1235 vm_offset_t foff; 1236 struct mount *mp; 1237 struct ucred *cred; 1238 int error, flags, locktype, vfslocked; 1239 1240 mp = vp->v_mount; 1241 cred = td->td_ucred; 1242 if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) 1243 locktype = LK_EXCLUSIVE; 1244 else 1245 locktype = LK_SHARED; 1246 vfslocked = VFS_LOCK_GIANT(mp); 1247 if ((error = vget(vp, locktype, td)) != 0) { 1248 VFS_UNLOCK_GIANT(vfslocked); 1249 return (error); 1250 } 1251 foff = *foffp; 1252 flags = *flagsp; 1253 obj = vp->v_object; 1254 if (vp->v_type == VREG) { 1255 /* 1256 * Get the proper underlying object 1257 */ 1258 if (obj == NULL) { 1259 error = EINVAL; 1260 goto done; 1261 } 1262 if (obj->handle != vp) { 1263 vput(vp); 1264 vp = (struct vnode *)obj->handle; 1265 /* 1266 * Bypass filesystems obey the mpsafety of the 1267 * underlying fs. 1268 */ 1269 error = vget(vp, locktype, td); 1270 if (error != 0) { 1271 VFS_UNLOCK_GIANT(vfslocked); 1272 return (error); 1273 } 1274 } 1275 if (locktype == LK_EXCLUSIVE) { 1276 *writecounted = TRUE; 1277 vnode_pager_update_writecount(obj, 0, objsize); 1278 } 1279 } else if (vp->v_type == VCHR) { 1280 error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp, 1281 vp->v_rdev, foffp, objp); 1282 if (error == 0) 1283 goto mark_atime; 1284 goto done; 1285 } else { 1286 error = EINVAL; 1287 goto done; 1288 } 1289 if ((error = VOP_GETATTR(vp, &va, cred))) 1290 goto done; 1291 #ifdef MAC 1292 error = mac_vnode_check_mmap(cred, vp, prot, flags); 1293 if (error != 0) 1294 goto done; 1295 #endif 1296 if ((flags & MAP_SHARED) != 0) { 1297 if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1298 if (prot & PROT_WRITE) { 1299 error = EPERM; 1300 goto done; 1301 } 1302 *maxprotp &= ~VM_PROT_WRITE; 1303 } 1304 } 1305 /* 1306 * If it is a regular file without any references 1307 * we do not need to sync it. 1308 * Adjust object size to be the size of actual file. 1309 */ 1310 objsize = round_page(va.va_size); 1311 if (va.va_nlink == 0) 1312 flags |= MAP_NOSYNC; 1313 obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, cred); 1314 if (obj == NULL) { 1315 error = ENOMEM; 1316 goto done; 1317 } 1318 *objp = obj; 1319 *flagsp = flags; 1320 1321 mark_atime: 1322 vfs_mark_atime(vp, cred); 1323 1324 done: 1325 vput(vp); 1326 VFS_UNLOCK_GIANT(vfslocked); 1327 return (error); 1328 } 1329 1330 /* 1331 * vm_mmap_cdev() 1332 * 1333 * MPSAFE 1334 * 1335 * Helper function for vm_mmap. Perform sanity check specific for mmap 1336 * operations on cdevs. 1337 */ 1338 int 1339 vm_mmap_cdev(struct thread *td, vm_size_t objsize, 1340 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1341 struct cdev *cdev, vm_ooffset_t *foff, vm_object_t *objp) 1342 { 1343 vm_object_t obj; 1344 struct cdevsw *dsw; 1345 int error, flags, ref; 1346 1347 flags = *flagsp; 1348 1349 dsw = dev_refthread(cdev, &ref); 1350 if (dsw == NULL) 1351 return (ENXIO); 1352 if (dsw->d_flags & D_MMAP_ANON) { 1353 dev_relthread(cdev, ref); 1354 *maxprotp = VM_PROT_ALL; 1355 *flagsp |= MAP_ANON; 1356 return (0); 1357 } 1358 /* 1359 * cdevs do not provide private mappings of any kind. 1360 */ 1361 if ((*maxprotp & VM_PROT_WRITE) == 0 && 1362 (prot & PROT_WRITE) != 0) { 1363 dev_relthread(cdev, ref); 1364 return (EACCES); 1365 } 1366 if (flags & (MAP_PRIVATE|MAP_COPY)) { 1367 dev_relthread(cdev, ref); 1368 return (EINVAL); 1369 } 1370 /* 1371 * Force device mappings to be shared. 1372 */ 1373 flags |= MAP_SHARED; 1374 #ifdef MAC_XXX 1375 error = mac_cdev_check_mmap(td->td_ucred, cdev, prot); 1376 if (error != 0) { 1377 dev_relthread(cdev, ref); 1378 return (error); 1379 } 1380 #endif 1381 /* 1382 * First, try d_mmap_single(). If that is not implemented 1383 * (returns ENODEV), fall back to using the device pager. 1384 * Note that d_mmap_single() must return a reference to the 1385 * object (it needs to bump the reference count of the object 1386 * it returns somehow). 1387 * 1388 * XXX assumes VM_PROT_* == PROT_* 1389 */ 1390 error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 1391 dev_relthread(cdev, ref); 1392 if (error != ENODEV) 1393 return (error); 1394 obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 1395 td->td_ucred); 1396 if (obj == NULL) 1397 return (EINVAL); 1398 *objp = obj; 1399 *flagsp = flags; 1400 return (0); 1401 } 1402 1403 /* 1404 * vm_mmap_shm() 1405 * 1406 * MPSAFE 1407 * 1408 * Helper function for vm_mmap. Perform sanity check specific for mmap 1409 * operations on shm file descriptors. 1410 */ 1411 int 1412 vm_mmap_shm(struct thread *td, vm_size_t objsize, 1413 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1414 struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp) 1415 { 1416 int error; 1417 1418 if ((*flagsp & MAP_SHARED) != 0 && 1419 (*maxprotp & VM_PROT_WRITE) == 0 && 1420 (prot & PROT_WRITE) != 0) 1421 return (EACCES); 1422 #ifdef MAC 1423 error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp); 1424 if (error != 0) 1425 return (error); 1426 #endif 1427 error = shm_mmap(shmfd, objsize, foff, objp); 1428 if (error) 1429 return (error); 1430 return (0); 1431 } 1432 1433 /* 1434 * vm_mmap() 1435 * 1436 * MPSAFE 1437 * 1438 * Internal version of mmap. Currently used by mmap, exec, and sys5 1439 * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. 1440 */ 1441 int 1442 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1443 vm_prot_t maxprot, int flags, 1444 objtype_t handle_type, void *handle, 1445 vm_ooffset_t foff) 1446 { 1447 boolean_t fitit; 1448 vm_object_t object = NULL; 1449 struct thread *td = curthread; 1450 int docow, error, rv; 1451 boolean_t writecounted; 1452 1453 if (size == 0) 1454 return (0); 1455 1456 size = round_page(size); 1457 1458 if (map == &td->td_proc->p_vmspace->vm_map) { 1459 PROC_LOCK(td->td_proc); 1460 if (map->size + size > lim_cur(td->td_proc, RLIMIT_VMEM)) { 1461 PROC_UNLOCK(td->td_proc); 1462 return (ENOMEM); 1463 } 1464 if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 1465 PROC_UNLOCK(td->td_proc); 1466 return (ENOMEM); 1467 } 1468 PROC_UNLOCK(td->td_proc); 1469 } 1470 1471 /* 1472 * We currently can only deal with page aligned file offsets. 1473 * The check is here rather than in the syscall because the 1474 * kernel calls this function internally for other mmaping 1475 * operations (such as in exec) and non-aligned offsets will 1476 * cause pmap inconsistencies...so we want to be sure to 1477 * disallow this in all cases. 1478 */ 1479 if (foff & PAGE_MASK) 1480 return (EINVAL); 1481 1482 if ((flags & MAP_FIXED) == 0) { 1483 fitit = TRUE; 1484 *addr = round_page(*addr); 1485 } else { 1486 if (*addr != trunc_page(*addr)) 1487 return (EINVAL); 1488 fitit = FALSE; 1489 } 1490 writecounted = FALSE; 1491 1492 /* 1493 * Lookup/allocate object. 1494 */ 1495 switch (handle_type) { 1496 case OBJT_DEVICE: 1497 error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, 1498 handle, &foff, &object); 1499 break; 1500 case OBJT_VNODE: 1501 error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 1502 handle, &foff, &object, &writecounted); 1503 break; 1504 case OBJT_SWAP: 1505 error = vm_mmap_shm(td, size, prot, &maxprot, &flags, 1506 handle, foff, &object); 1507 break; 1508 case OBJT_DEFAULT: 1509 if (handle == NULL) { 1510 error = 0; 1511 break; 1512 } 1513 /* FALLTHROUGH */ 1514 default: 1515 error = EINVAL; 1516 break; 1517 } 1518 if (error) 1519 return (error); 1520 if (flags & MAP_ANON) { 1521 object = NULL; 1522 docow = 0; 1523 /* 1524 * Unnamed anonymous regions always start at 0. 1525 */ 1526 if (handle == 0) 1527 foff = 0; 1528 } else if (flags & MAP_PREFAULT_READ) 1529 docow = MAP_PREFAULT; 1530 else 1531 docow = MAP_PREFAULT_PARTIAL; 1532 1533 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 1534 docow |= MAP_COPY_ON_WRITE; 1535 if (flags & MAP_NOSYNC) 1536 docow |= MAP_DISABLE_SYNCER; 1537 if (flags & MAP_NOCORE) 1538 docow |= MAP_DISABLE_COREDUMP; 1539 /* Shared memory is also shared with children. */ 1540 if (flags & MAP_SHARED) 1541 docow |= MAP_INHERIT_SHARE; 1542 if (writecounted) 1543 docow |= MAP_VN_WRITECOUNT; 1544 1545 if (flags & MAP_STACK) 1546 rv = vm_map_stack(map, *addr, size, prot, maxprot, 1547 docow | MAP_STACK_GROWS_DOWN); 1548 else if (fitit) 1549 rv = vm_map_find(map, object, foff, addr, size, 1550 object != NULL && object->type == OBJT_DEVICE ? 1551 VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, prot, maxprot, docow); 1552 else 1553 rv = vm_map_fixed(map, object, foff, *addr, size, 1554 prot, maxprot, docow); 1555 1556 if (rv == KERN_SUCCESS) { 1557 /* 1558 * If the process has requested that all future mappings 1559 * be wired, then heed this. 1560 */ 1561 if (map->flags & MAP_WIREFUTURE) 1562 vm_map_wire(map, *addr, *addr + size, 1563 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1564 } else { 1565 /* 1566 * If this mapping was accounted for in the vnode's 1567 * writecount, then undo that now. 1568 */ 1569 if (writecounted) 1570 vnode_pager_release_writecount(object, 0, size); 1571 /* 1572 * Lose the object reference. Will destroy the 1573 * object if it's an unnamed anonymous mapping 1574 * or named anonymous without other references. 1575 */ 1576 vm_object_deallocate(object); 1577 } 1578 return (vm_mmap_to_errno(rv)); 1579 } 1580 1581 /* 1582 * Translate a Mach VM return code to zero on success or the appropriate errno 1583 * on failure. 1584 */ 1585 int 1586 vm_mmap_to_errno(int rv) 1587 { 1588 1589 switch (rv) { 1590 case KERN_SUCCESS: 1591 return (0); 1592 case KERN_INVALID_ADDRESS: 1593 case KERN_NO_SPACE: 1594 return (ENOMEM); 1595 case KERN_PROTECTION_FAILURE: 1596 return (EACCES); 1597 default: 1598 return (EINVAL); 1599 } 1600 } 1601