1 /*- 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35 * 36 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37 */ 38 39 /* 40 * Mapped file (mmap) interface to VM 41 */ 42 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include "opt_compat.h" 47 #include "opt_hwpmc_hooks.h" 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/capability.h> 52 #include <sys/kernel.h> 53 #include <sys/lock.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/filedesc.h> 57 #include <sys/priv.h> 58 #include <sys/proc.h> 59 #include <sys/racct.h> 60 #include <sys/resource.h> 61 #include <sys/resourcevar.h> 62 #include <sys/vnode.h> 63 #include <sys/fcntl.h> 64 #include <sys/file.h> 65 #include <sys/mman.h> 66 #include <sys/mount.h> 67 #include <sys/conf.h> 68 #include <sys/stat.h> 69 #include <sys/sysent.h> 70 #include <sys/vmmeter.h> 71 72 #include <security/mac/mac_framework.h> 73 74 #include <vm/vm.h> 75 #include <vm/vm_param.h> 76 #include <vm/pmap.h> 77 #include <vm/vm_map.h> 78 #include <vm/vm_object.h> 79 #include <vm/vm_page.h> 80 #include <vm/vm_pager.h> 81 #include <vm/vm_pageout.h> 82 #include <vm/vm_extern.h> 83 #include <vm/vm_page.h> 84 #include <vm/vnode_pager.h> 85 86 #ifdef HWPMC_HOOKS 87 #include <sys/pmckern.h> 88 #endif 89 90 #ifndef _SYS_SYSPROTO_H_ 91 struct sbrk_args { 92 int incr; 93 }; 94 #endif 95 96 static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 97 int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *); 98 static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 99 int *, struct cdev *, vm_ooffset_t *, vm_object_t *); 100 static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 101 int *, struct shmfd *, vm_ooffset_t, vm_object_t *); 102 103 /* 104 * MPSAFE 105 */ 106 /* ARGSUSED */ 107 int 108 sys_sbrk(td, uap) 109 struct thread *td; 110 struct sbrk_args *uap; 111 { 112 /* Not yet implemented */ 113 return (EOPNOTSUPP); 114 } 115 116 #ifndef _SYS_SYSPROTO_H_ 117 struct sstk_args { 118 int incr; 119 }; 120 #endif 121 122 /* 123 * MPSAFE 124 */ 125 /* ARGSUSED */ 126 int 127 sys_sstk(td, uap) 128 struct thread *td; 129 struct sstk_args *uap; 130 { 131 /* Not yet implemented */ 132 return (EOPNOTSUPP); 133 } 134 135 #if defined(COMPAT_43) 136 #ifndef _SYS_SYSPROTO_H_ 137 struct getpagesize_args { 138 int dummy; 139 }; 140 #endif 141 142 int 143 ogetpagesize(td, uap) 144 struct thread *td; 145 struct getpagesize_args *uap; 146 { 147 /* MP SAFE */ 148 td->td_retval[0] = PAGE_SIZE; 149 return (0); 150 } 151 #endif /* COMPAT_43 */ 152 153 154 /* 155 * Memory Map (mmap) system call. Note that the file offset 156 * and address are allowed to be NOT page aligned, though if 157 * the MAP_FIXED flag it set, both must have the same remainder 158 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 159 * page-aligned, the actual mapping starts at trunc_page(addr) 160 * and the return value is adjusted up by the page offset. 161 * 162 * Generally speaking, only character devices which are themselves 163 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 164 * there would be no cache coherency between a descriptor and a VM mapping 165 * both to the same character device. 166 */ 167 #ifndef _SYS_SYSPROTO_H_ 168 struct mmap_args { 169 void *addr; 170 size_t len; 171 int prot; 172 int flags; 173 int fd; 174 long pad; 175 off_t pos; 176 }; 177 #endif 178 179 /* 180 * MPSAFE 181 */ 182 int 183 sys_mmap(td, uap) 184 struct thread *td; 185 struct mmap_args *uap; 186 { 187 #ifdef HWPMC_HOOKS 188 struct pmckern_map_in pkm; 189 #endif 190 struct file *fp; 191 struct vnode *vp; 192 vm_offset_t addr; 193 vm_size_t size, pageoff; 194 vm_prot_t cap_maxprot, prot, maxprot; 195 void *handle; 196 objtype_t handle_type; 197 int flags, error; 198 off_t pos; 199 struct vmspace *vms = td->td_proc->p_vmspace; 200 cap_rights_t rights; 201 202 addr = (vm_offset_t) uap->addr; 203 size = uap->len; 204 prot = uap->prot & VM_PROT_ALL; 205 flags = uap->flags; 206 pos = uap->pos; 207 208 fp = NULL; 209 210 /* Make sure mapping fits into numeric range, etc. */ 211 if ((uap->len == 0 && !SV_CURPROC_FLAG(SV_AOUT) && 212 curproc->p_osrel >= P_OSREL_MAP_ANON) || 213 ((flags & MAP_ANON) && (uap->fd != -1 || pos != 0))) 214 return (EINVAL); 215 216 if (flags & MAP_STACK) { 217 if ((uap->fd != -1) || 218 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 219 return (EINVAL); 220 flags |= MAP_ANON; 221 pos = 0; 222 } 223 224 /* 225 * Align the file position to a page boundary, 226 * and save its page offset component. 227 */ 228 pageoff = (pos & PAGE_MASK); 229 pos -= pageoff; 230 231 /* Adjust size for rounding (on both ends). */ 232 size += pageoff; /* low end... */ 233 size = (vm_size_t) round_page(size); /* hi end */ 234 235 /* 236 * Check for illegal addresses. Watch out for address wrap... Note 237 * that VM_*_ADDRESS are not constants due to casts (argh). 238 */ 239 if (flags & MAP_FIXED) { 240 /* 241 * The specified address must have the same remainder 242 * as the file offset taken modulo PAGE_SIZE, so it 243 * should be aligned after adjustment by pageoff. 244 */ 245 addr -= pageoff; 246 if (addr & PAGE_MASK) 247 return (EINVAL); 248 249 /* Address range must be all in user VM space. */ 250 if (addr < vm_map_min(&vms->vm_map) || 251 addr + size > vm_map_max(&vms->vm_map)) 252 return (EINVAL); 253 if (addr + size < addr) 254 return (EINVAL); 255 } else { 256 /* 257 * XXX for non-fixed mappings where no hint is provided or 258 * the hint would fall in the potential heap space, 259 * place it after the end of the largest possible heap. 260 * 261 * There should really be a pmap call to determine a reasonable 262 * location. 263 */ 264 PROC_LOCK(td->td_proc); 265 if (addr == 0 || 266 (addr >= round_page((vm_offset_t)vms->vm_taddr) && 267 addr < round_page((vm_offset_t)vms->vm_daddr + 268 lim_max(td->td_proc, RLIMIT_DATA)))) 269 addr = round_page((vm_offset_t)vms->vm_daddr + 270 lim_max(td->td_proc, RLIMIT_DATA)); 271 PROC_UNLOCK(td->td_proc); 272 } 273 if (flags & MAP_ANON) { 274 /* 275 * Mapping blank space is trivial. 276 */ 277 handle = NULL; 278 handle_type = OBJT_DEFAULT; 279 maxprot = VM_PROT_ALL; 280 cap_maxprot = VM_PROT_ALL; 281 } else { 282 /* 283 * Mapping file, get fp for validation and don't let the 284 * descriptor disappear on us if we block. Check capability 285 * rights, but also return the maximum rights to be combined 286 * with maxprot later. 287 */ 288 rights = CAP_MMAP; 289 if (prot & PROT_READ) 290 rights |= CAP_READ; 291 if ((flags & MAP_SHARED) != 0) { 292 if (prot & PROT_WRITE) 293 rights |= CAP_WRITE; 294 } 295 if (prot & PROT_EXEC) 296 rights |= CAP_MAPEXEC; 297 if ((error = fget_mmap(td, uap->fd, rights, &cap_maxprot, 298 &fp)) != 0) 299 goto done; 300 if (fp->f_type == DTYPE_SHM) { 301 handle = fp->f_data; 302 handle_type = OBJT_SWAP; 303 maxprot = VM_PROT_NONE; 304 305 /* FREAD should always be set. */ 306 if (fp->f_flag & FREAD) 307 maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; 308 if (fp->f_flag & FWRITE) 309 maxprot |= VM_PROT_WRITE; 310 goto map; 311 } 312 if (fp->f_type != DTYPE_VNODE) { 313 error = ENODEV; 314 goto done; 315 } 316 #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ 317 defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) 318 /* 319 * POSIX shared-memory objects are defined to have 320 * kernel persistence, and are not defined to support 321 * read(2)/write(2) -- or even open(2). Thus, we can 322 * use MAP_ASYNC to trade on-disk coherence for speed. 323 * The shm_open(3) library routine turns on the FPOSIXSHM 324 * flag to request this behavior. 325 */ 326 if (fp->f_flag & FPOSIXSHM) 327 flags |= MAP_NOSYNC; 328 #endif 329 vp = fp->f_vnode; 330 /* 331 * Ensure that file and memory protections are 332 * compatible. Note that we only worry about 333 * writability if mapping is shared; in this case, 334 * current and max prot are dictated by the open file. 335 * XXX use the vnode instead? Problem is: what 336 * credentials do we use for determination? What if 337 * proc does a setuid? 338 */ 339 if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC) 340 maxprot = VM_PROT_NONE; 341 else 342 maxprot = VM_PROT_EXECUTE; 343 if (fp->f_flag & FREAD) { 344 maxprot |= VM_PROT_READ; 345 } else if (prot & PROT_READ) { 346 error = EACCES; 347 goto done; 348 } 349 /* 350 * If we are sharing potential changes (either via 351 * MAP_SHARED or via the implicit sharing of character 352 * device mappings), and we are trying to get write 353 * permission although we opened it without asking 354 * for it, bail out. 355 */ 356 if ((flags & MAP_SHARED) != 0) { 357 if ((fp->f_flag & FWRITE) != 0) { 358 maxprot |= VM_PROT_WRITE; 359 } else if ((prot & PROT_WRITE) != 0) { 360 error = EACCES; 361 goto done; 362 } 363 } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) { 364 maxprot |= VM_PROT_WRITE; 365 cap_maxprot |= VM_PROT_WRITE; 366 } 367 handle = (void *)vp; 368 handle_type = OBJT_VNODE; 369 } 370 map: 371 td->td_fpop = fp; 372 maxprot &= cap_maxprot; 373 error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, 374 flags, handle_type, handle, pos); 375 td->td_fpop = NULL; 376 #ifdef HWPMC_HOOKS 377 /* inform hwpmc(4) if an executable is being mapped */ 378 if (error == 0 && handle_type == OBJT_VNODE && 379 (prot & PROT_EXEC)) { 380 pkm.pm_file = handle; 381 pkm.pm_address = (uintptr_t) addr; 382 PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); 383 } 384 #endif 385 if (error == 0) 386 td->td_retval[0] = (register_t) (addr + pageoff); 387 done: 388 if (fp) 389 fdrop(fp, td); 390 391 return (error); 392 } 393 394 int 395 freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 396 { 397 struct mmap_args oargs; 398 399 oargs.addr = uap->addr; 400 oargs.len = uap->len; 401 oargs.prot = uap->prot; 402 oargs.flags = uap->flags; 403 oargs.fd = uap->fd; 404 oargs.pos = uap->pos; 405 return (sys_mmap(td, &oargs)); 406 } 407 408 #ifdef COMPAT_43 409 #ifndef _SYS_SYSPROTO_H_ 410 struct ommap_args { 411 caddr_t addr; 412 int len; 413 int prot; 414 int flags; 415 int fd; 416 long pos; 417 }; 418 #endif 419 int 420 ommap(td, uap) 421 struct thread *td; 422 struct ommap_args *uap; 423 { 424 struct mmap_args nargs; 425 static const char cvtbsdprot[8] = { 426 0, 427 PROT_EXEC, 428 PROT_WRITE, 429 PROT_EXEC | PROT_WRITE, 430 PROT_READ, 431 PROT_EXEC | PROT_READ, 432 PROT_WRITE | PROT_READ, 433 PROT_EXEC | PROT_WRITE | PROT_READ, 434 }; 435 436 #define OMAP_ANON 0x0002 437 #define OMAP_COPY 0x0020 438 #define OMAP_SHARED 0x0010 439 #define OMAP_FIXED 0x0100 440 441 nargs.addr = uap->addr; 442 nargs.len = uap->len; 443 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 444 nargs.flags = 0; 445 if (uap->flags & OMAP_ANON) 446 nargs.flags |= MAP_ANON; 447 if (uap->flags & OMAP_COPY) 448 nargs.flags |= MAP_COPY; 449 if (uap->flags & OMAP_SHARED) 450 nargs.flags |= MAP_SHARED; 451 else 452 nargs.flags |= MAP_PRIVATE; 453 if (uap->flags & OMAP_FIXED) 454 nargs.flags |= MAP_FIXED; 455 nargs.fd = uap->fd; 456 nargs.pos = uap->pos; 457 return (sys_mmap(td, &nargs)); 458 } 459 #endif /* COMPAT_43 */ 460 461 462 #ifndef _SYS_SYSPROTO_H_ 463 struct msync_args { 464 void *addr; 465 size_t len; 466 int flags; 467 }; 468 #endif 469 /* 470 * MPSAFE 471 */ 472 int 473 sys_msync(td, uap) 474 struct thread *td; 475 struct msync_args *uap; 476 { 477 vm_offset_t addr; 478 vm_size_t size, pageoff; 479 int flags; 480 vm_map_t map; 481 int rv; 482 483 addr = (vm_offset_t) uap->addr; 484 size = uap->len; 485 flags = uap->flags; 486 487 pageoff = (addr & PAGE_MASK); 488 addr -= pageoff; 489 size += pageoff; 490 size = (vm_size_t) round_page(size); 491 if (addr + size < addr) 492 return (EINVAL); 493 494 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 495 return (EINVAL); 496 497 map = &td->td_proc->p_vmspace->vm_map; 498 499 /* 500 * Clean the pages and interpret the return value. 501 */ 502 rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 503 (flags & MS_INVALIDATE) != 0); 504 switch (rv) { 505 case KERN_SUCCESS: 506 return (0); 507 case KERN_INVALID_ADDRESS: 508 return (EINVAL); /* Sun returns ENOMEM? */ 509 case KERN_INVALID_ARGUMENT: 510 return (EBUSY); 511 case KERN_FAILURE: 512 return (EIO); 513 default: 514 return (EINVAL); 515 } 516 } 517 518 #ifndef _SYS_SYSPROTO_H_ 519 struct munmap_args { 520 void *addr; 521 size_t len; 522 }; 523 #endif 524 /* 525 * MPSAFE 526 */ 527 int 528 sys_munmap(td, uap) 529 struct thread *td; 530 struct munmap_args *uap; 531 { 532 #ifdef HWPMC_HOOKS 533 struct pmckern_map_out pkm; 534 vm_map_entry_t entry; 535 #endif 536 vm_offset_t addr; 537 vm_size_t size, pageoff; 538 vm_map_t map; 539 540 addr = (vm_offset_t) uap->addr; 541 size = uap->len; 542 if (size == 0) 543 return (EINVAL); 544 545 pageoff = (addr & PAGE_MASK); 546 addr -= pageoff; 547 size += pageoff; 548 size = (vm_size_t) round_page(size); 549 if (addr + size < addr) 550 return (EINVAL); 551 552 /* 553 * Check for illegal addresses. Watch out for address wrap... 554 */ 555 map = &td->td_proc->p_vmspace->vm_map; 556 if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 557 return (EINVAL); 558 vm_map_lock(map); 559 #ifdef HWPMC_HOOKS 560 /* 561 * Inform hwpmc if the address range being unmapped contains 562 * an executable region. 563 */ 564 pkm.pm_address = (uintptr_t) NULL; 565 if (vm_map_lookup_entry(map, addr, &entry)) { 566 for (; 567 entry != &map->header && entry->start < addr + size; 568 entry = entry->next) { 569 if (vm_map_check_protection(map, entry->start, 570 entry->end, VM_PROT_EXECUTE) == TRUE) { 571 pkm.pm_address = (uintptr_t) addr; 572 pkm.pm_size = (size_t) size; 573 break; 574 } 575 } 576 } 577 #endif 578 vm_map_delete(map, addr, addr + size); 579 580 #ifdef HWPMC_HOOKS 581 /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 582 vm_map_lock_downgrade(map); 583 if (pkm.pm_address != (uintptr_t) NULL) 584 PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 585 vm_map_unlock_read(map); 586 #else 587 vm_map_unlock(map); 588 #endif 589 /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 590 return (0); 591 } 592 593 #ifndef _SYS_SYSPROTO_H_ 594 struct mprotect_args { 595 const void *addr; 596 size_t len; 597 int prot; 598 }; 599 #endif 600 /* 601 * MPSAFE 602 */ 603 int 604 sys_mprotect(td, uap) 605 struct thread *td; 606 struct mprotect_args *uap; 607 { 608 vm_offset_t addr; 609 vm_size_t size, pageoff; 610 vm_prot_t prot; 611 612 addr = (vm_offset_t) uap->addr; 613 size = uap->len; 614 prot = uap->prot & VM_PROT_ALL; 615 616 pageoff = (addr & PAGE_MASK); 617 addr -= pageoff; 618 size += pageoff; 619 size = (vm_size_t) round_page(size); 620 if (addr + size < addr) 621 return (EINVAL); 622 623 switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 624 addr + size, prot, FALSE)) { 625 case KERN_SUCCESS: 626 return (0); 627 case KERN_PROTECTION_FAILURE: 628 return (EACCES); 629 case KERN_RESOURCE_SHORTAGE: 630 return (ENOMEM); 631 } 632 return (EINVAL); 633 } 634 635 #ifndef _SYS_SYSPROTO_H_ 636 struct minherit_args { 637 void *addr; 638 size_t len; 639 int inherit; 640 }; 641 #endif 642 /* 643 * MPSAFE 644 */ 645 int 646 sys_minherit(td, uap) 647 struct thread *td; 648 struct minherit_args *uap; 649 { 650 vm_offset_t addr; 651 vm_size_t size, pageoff; 652 vm_inherit_t inherit; 653 654 addr = (vm_offset_t)uap->addr; 655 size = uap->len; 656 inherit = uap->inherit; 657 658 pageoff = (addr & PAGE_MASK); 659 addr -= pageoff; 660 size += pageoff; 661 size = (vm_size_t) round_page(size); 662 if (addr + size < addr) 663 return (EINVAL); 664 665 switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 666 addr + size, inherit)) { 667 case KERN_SUCCESS: 668 return (0); 669 case KERN_PROTECTION_FAILURE: 670 return (EACCES); 671 } 672 return (EINVAL); 673 } 674 675 #ifndef _SYS_SYSPROTO_H_ 676 struct madvise_args { 677 void *addr; 678 size_t len; 679 int behav; 680 }; 681 #endif 682 683 /* 684 * MPSAFE 685 */ 686 int 687 sys_madvise(td, uap) 688 struct thread *td; 689 struct madvise_args *uap; 690 { 691 vm_offset_t start, end; 692 vm_map_t map; 693 struct proc *p; 694 int error; 695 696 /* 697 * Check for our special case, advising the swap pager we are 698 * "immortal." 699 */ 700 if (uap->behav == MADV_PROTECT) { 701 error = priv_check(td, PRIV_VM_MADV_PROTECT); 702 if (error == 0) { 703 p = td->td_proc; 704 PROC_LOCK(p); 705 p->p_flag |= P_PROTECTED; 706 PROC_UNLOCK(p); 707 } 708 return (error); 709 } 710 /* 711 * Check for illegal behavior 712 */ 713 if (uap->behav < 0 || uap->behav > MADV_CORE) 714 return (EINVAL); 715 /* 716 * Check for illegal addresses. Watch out for address wrap... Note 717 * that VM_*_ADDRESS are not constants due to casts (argh). 718 */ 719 map = &td->td_proc->p_vmspace->vm_map; 720 if ((vm_offset_t)uap->addr < vm_map_min(map) || 721 (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) 722 return (EINVAL); 723 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 724 return (EINVAL); 725 726 /* 727 * Since this routine is only advisory, we default to conservative 728 * behavior. 729 */ 730 start = trunc_page((vm_offset_t) uap->addr); 731 end = round_page((vm_offset_t) uap->addr + uap->len); 732 733 if (vm_map_madvise(map, start, end, uap->behav)) 734 return (EINVAL); 735 return (0); 736 } 737 738 #ifndef _SYS_SYSPROTO_H_ 739 struct mincore_args { 740 const void *addr; 741 size_t len; 742 char *vec; 743 }; 744 #endif 745 746 /* 747 * MPSAFE 748 */ 749 int 750 sys_mincore(td, uap) 751 struct thread *td; 752 struct mincore_args *uap; 753 { 754 vm_offset_t addr, first_addr; 755 vm_offset_t end, cend; 756 pmap_t pmap; 757 vm_map_t map; 758 char *vec; 759 int error = 0; 760 int vecindex, lastvecindex; 761 vm_map_entry_t current; 762 vm_map_entry_t entry; 763 vm_object_t object; 764 vm_paddr_t locked_pa; 765 vm_page_t m; 766 vm_pindex_t pindex; 767 int mincoreinfo; 768 unsigned int timestamp; 769 boolean_t locked; 770 771 /* 772 * Make sure that the addresses presented are valid for user 773 * mode. 774 */ 775 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 776 end = addr + (vm_size_t)round_page(uap->len); 777 map = &td->td_proc->p_vmspace->vm_map; 778 if (end > vm_map_max(map) || end < addr) 779 return (ENOMEM); 780 781 /* 782 * Address of byte vector 783 */ 784 vec = uap->vec; 785 786 pmap = vmspace_pmap(td->td_proc->p_vmspace); 787 788 vm_map_lock_read(map); 789 RestartScan: 790 timestamp = map->timestamp; 791 792 if (!vm_map_lookup_entry(map, addr, &entry)) { 793 vm_map_unlock_read(map); 794 return (ENOMEM); 795 } 796 797 /* 798 * Do this on a map entry basis so that if the pages are not 799 * in the current processes address space, we can easily look 800 * up the pages elsewhere. 801 */ 802 lastvecindex = -1; 803 for (current = entry; 804 (current != &map->header) && (current->start < end); 805 current = current->next) { 806 807 /* 808 * check for contiguity 809 */ 810 if (current->end < end && 811 (entry->next == &map->header || 812 current->next->start > current->end)) { 813 vm_map_unlock_read(map); 814 return (ENOMEM); 815 } 816 817 /* 818 * ignore submaps (for now) or null objects 819 */ 820 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 821 current->object.vm_object == NULL) 822 continue; 823 824 /* 825 * limit this scan to the current map entry and the 826 * limits for the mincore call 827 */ 828 if (addr < current->start) 829 addr = current->start; 830 cend = current->end; 831 if (cend > end) 832 cend = end; 833 834 /* 835 * scan this entry one page at a time 836 */ 837 while (addr < cend) { 838 /* 839 * Check pmap first, it is likely faster, also 840 * it can provide info as to whether we are the 841 * one referencing or modifying the page. 842 */ 843 object = NULL; 844 locked_pa = 0; 845 retry: 846 m = NULL; 847 mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 848 if (locked_pa != 0) { 849 /* 850 * The page is mapped by this process but not 851 * both accessed and modified. It is also 852 * managed. Acquire the object lock so that 853 * other mappings might be examined. 854 */ 855 m = PHYS_TO_VM_PAGE(locked_pa); 856 if (m->object != object) { 857 if (object != NULL) 858 VM_OBJECT_UNLOCK(object); 859 object = m->object; 860 locked = VM_OBJECT_TRYLOCK(object); 861 vm_page_unlock(m); 862 if (!locked) { 863 VM_OBJECT_LOCK(object); 864 vm_page_lock(m); 865 goto retry; 866 } 867 } else 868 vm_page_unlock(m); 869 KASSERT(m->valid == VM_PAGE_BITS_ALL, 870 ("mincore: page %p is mapped but invalid", 871 m)); 872 } else if (mincoreinfo == 0) { 873 /* 874 * The page is not mapped by this process. If 875 * the object implements managed pages, then 876 * determine if the page is resident so that 877 * the mappings might be examined. 878 */ 879 if (current->object.vm_object != object) { 880 if (object != NULL) 881 VM_OBJECT_UNLOCK(object); 882 object = current->object.vm_object; 883 VM_OBJECT_LOCK(object); 884 } 885 if (object->type == OBJT_DEFAULT || 886 object->type == OBJT_SWAP || 887 object->type == OBJT_VNODE) { 888 pindex = OFF_TO_IDX(current->offset + 889 (addr - current->start)); 890 m = vm_page_lookup(object, pindex); 891 if (m == NULL && 892 vm_page_is_cached(object, pindex)) 893 mincoreinfo = MINCORE_INCORE; 894 if (m != NULL && m->valid == 0) 895 m = NULL; 896 if (m != NULL) 897 mincoreinfo = MINCORE_INCORE; 898 } 899 } 900 if (m != NULL) { 901 /* Examine other mappings to the page. */ 902 if (m->dirty == 0 && pmap_is_modified(m)) 903 vm_page_dirty(m); 904 if (m->dirty != 0) 905 mincoreinfo |= MINCORE_MODIFIED_OTHER; 906 /* 907 * The first test for PGA_REFERENCED is an 908 * optimization. The second test is 909 * required because a concurrent pmap 910 * operation could clear the last reference 911 * and set PGA_REFERENCED before the call to 912 * pmap_is_referenced(). 913 */ 914 if ((m->aflags & PGA_REFERENCED) != 0 || 915 pmap_is_referenced(m) || 916 (m->aflags & PGA_REFERENCED) != 0) 917 mincoreinfo |= MINCORE_REFERENCED_OTHER; 918 } 919 if (object != NULL) 920 VM_OBJECT_UNLOCK(object); 921 922 /* 923 * subyte may page fault. In case it needs to modify 924 * the map, we release the lock. 925 */ 926 vm_map_unlock_read(map); 927 928 /* 929 * calculate index into user supplied byte vector 930 */ 931 vecindex = OFF_TO_IDX(addr - first_addr); 932 933 /* 934 * If we have skipped map entries, we need to make sure that 935 * the byte vector is zeroed for those skipped entries. 936 */ 937 while ((lastvecindex + 1) < vecindex) { 938 error = subyte(vec + lastvecindex, 0); 939 if (error) { 940 error = EFAULT; 941 goto done2; 942 } 943 ++lastvecindex; 944 } 945 946 /* 947 * Pass the page information to the user 948 */ 949 error = subyte(vec + vecindex, mincoreinfo); 950 if (error) { 951 error = EFAULT; 952 goto done2; 953 } 954 955 /* 956 * If the map has changed, due to the subyte, the previous 957 * output may be invalid. 958 */ 959 vm_map_lock_read(map); 960 if (timestamp != map->timestamp) 961 goto RestartScan; 962 963 lastvecindex = vecindex; 964 addr += PAGE_SIZE; 965 } 966 } 967 968 /* 969 * subyte may page fault. In case it needs to modify 970 * the map, we release the lock. 971 */ 972 vm_map_unlock_read(map); 973 974 /* 975 * Zero the last entries in the byte vector. 976 */ 977 vecindex = OFF_TO_IDX(end - first_addr); 978 while ((lastvecindex + 1) < vecindex) { 979 error = subyte(vec + lastvecindex, 0); 980 if (error) { 981 error = EFAULT; 982 goto done2; 983 } 984 ++lastvecindex; 985 } 986 987 /* 988 * If the map has changed, due to the subyte, the previous 989 * output may be invalid. 990 */ 991 vm_map_lock_read(map); 992 if (timestamp != map->timestamp) 993 goto RestartScan; 994 vm_map_unlock_read(map); 995 done2: 996 return (error); 997 } 998 999 #ifndef _SYS_SYSPROTO_H_ 1000 struct mlock_args { 1001 const void *addr; 1002 size_t len; 1003 }; 1004 #endif 1005 /* 1006 * MPSAFE 1007 */ 1008 int 1009 sys_mlock(td, uap) 1010 struct thread *td; 1011 struct mlock_args *uap; 1012 { 1013 struct proc *proc; 1014 vm_offset_t addr, end, last, start; 1015 vm_size_t npages, size; 1016 unsigned long nsize; 1017 int error; 1018 1019 error = priv_check(td, PRIV_VM_MLOCK); 1020 if (error) 1021 return (error); 1022 addr = (vm_offset_t)uap->addr; 1023 size = uap->len; 1024 last = addr + size; 1025 start = trunc_page(addr); 1026 end = round_page(last); 1027 if (last < addr || end < addr) 1028 return (EINVAL); 1029 npages = atop(end - start); 1030 if (npages > vm_page_max_wired) 1031 return (ENOMEM); 1032 proc = td->td_proc; 1033 PROC_LOCK(proc); 1034 nsize = ptoa(npages + 1035 pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map))); 1036 if (nsize > lim_cur(proc, RLIMIT_MEMLOCK)) { 1037 PROC_UNLOCK(proc); 1038 return (ENOMEM); 1039 } 1040 PROC_UNLOCK(proc); 1041 if (npages + cnt.v_wire_count > vm_page_max_wired) 1042 return (EAGAIN); 1043 #ifdef RACCT 1044 PROC_LOCK(proc); 1045 error = racct_set(proc, RACCT_MEMLOCK, nsize); 1046 PROC_UNLOCK(proc); 1047 if (error != 0) 1048 return (ENOMEM); 1049 #endif 1050 error = vm_map_wire(&proc->p_vmspace->vm_map, start, end, 1051 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1052 #ifdef RACCT 1053 if (error != KERN_SUCCESS) { 1054 PROC_LOCK(proc); 1055 racct_set(proc, RACCT_MEMLOCK, 1056 ptoa(pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map)))); 1057 PROC_UNLOCK(proc); 1058 } 1059 #endif 1060 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1061 } 1062 1063 #ifndef _SYS_SYSPROTO_H_ 1064 struct mlockall_args { 1065 int how; 1066 }; 1067 #endif 1068 1069 /* 1070 * MPSAFE 1071 */ 1072 int 1073 sys_mlockall(td, uap) 1074 struct thread *td; 1075 struct mlockall_args *uap; 1076 { 1077 vm_map_t map; 1078 int error; 1079 1080 map = &td->td_proc->p_vmspace->vm_map; 1081 error = 0; 1082 1083 if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1084 return (EINVAL); 1085 1086 #if 0 1087 /* 1088 * If wiring all pages in the process would cause it to exceed 1089 * a hard resource limit, return ENOMEM. 1090 */ 1091 PROC_LOCK(td->td_proc); 1092 if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { 1093 PROC_UNLOCK(td->td_proc); 1094 return (ENOMEM); 1095 } 1096 PROC_UNLOCK(td->td_proc); 1097 #else 1098 error = priv_check(td, PRIV_VM_MLOCK); 1099 if (error) 1100 return (error); 1101 #endif 1102 #ifdef RACCT 1103 PROC_LOCK(td->td_proc); 1104 error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 1105 PROC_UNLOCK(td->td_proc); 1106 if (error != 0) 1107 return (ENOMEM); 1108 #endif 1109 1110 if (uap->how & MCL_FUTURE) { 1111 vm_map_lock(map); 1112 vm_map_modflags(map, MAP_WIREFUTURE, 0); 1113 vm_map_unlock(map); 1114 error = 0; 1115 } 1116 1117 if (uap->how & MCL_CURRENT) { 1118 /* 1119 * P1003.1-2001 mandates that all currently mapped pages 1120 * will be memory resident and locked (wired) upon return 1121 * from mlockall(). vm_map_wire() will wire pages, by 1122 * calling vm_fault_wire() for each page in the region. 1123 */ 1124 error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1125 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1126 error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1127 } 1128 #ifdef RACCT 1129 if (error != KERN_SUCCESS) { 1130 PROC_LOCK(td->td_proc); 1131 racct_set(td->td_proc, RACCT_MEMLOCK, 1132 ptoa(pmap_wired_count(vm_map_pmap(&td->td_proc->p_vmspace->vm_map)))); 1133 PROC_UNLOCK(td->td_proc); 1134 } 1135 #endif 1136 1137 return (error); 1138 } 1139 1140 #ifndef _SYS_SYSPROTO_H_ 1141 struct munlockall_args { 1142 register_t dummy; 1143 }; 1144 #endif 1145 1146 /* 1147 * MPSAFE 1148 */ 1149 int 1150 sys_munlockall(td, uap) 1151 struct thread *td; 1152 struct munlockall_args *uap; 1153 { 1154 vm_map_t map; 1155 int error; 1156 1157 map = &td->td_proc->p_vmspace->vm_map; 1158 error = priv_check(td, PRIV_VM_MUNLOCK); 1159 if (error) 1160 return (error); 1161 1162 /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1163 vm_map_lock(map); 1164 vm_map_modflags(map, 0, MAP_WIREFUTURE); 1165 vm_map_unlock(map); 1166 1167 /* Forcibly unwire all pages. */ 1168 error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1169 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1170 #ifdef RACCT 1171 if (error == KERN_SUCCESS) { 1172 PROC_LOCK(td->td_proc); 1173 racct_set(td->td_proc, RACCT_MEMLOCK, 0); 1174 PROC_UNLOCK(td->td_proc); 1175 } 1176 #endif 1177 1178 return (error); 1179 } 1180 1181 #ifndef _SYS_SYSPROTO_H_ 1182 struct munlock_args { 1183 const void *addr; 1184 size_t len; 1185 }; 1186 #endif 1187 /* 1188 * MPSAFE 1189 */ 1190 int 1191 sys_munlock(td, uap) 1192 struct thread *td; 1193 struct munlock_args *uap; 1194 { 1195 vm_offset_t addr, end, last, start; 1196 vm_size_t size; 1197 int error; 1198 1199 error = priv_check(td, PRIV_VM_MUNLOCK); 1200 if (error) 1201 return (error); 1202 addr = (vm_offset_t)uap->addr; 1203 size = uap->len; 1204 last = addr + size; 1205 start = trunc_page(addr); 1206 end = round_page(last); 1207 if (last < addr || end < addr) 1208 return (EINVAL); 1209 error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 1210 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1211 #ifdef RACCT 1212 if (error == KERN_SUCCESS) { 1213 PROC_LOCK(td->td_proc); 1214 racct_sub(td->td_proc, RACCT_MEMLOCK, ptoa(end - start)); 1215 PROC_UNLOCK(td->td_proc); 1216 } 1217 #endif 1218 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1219 } 1220 1221 /* 1222 * vm_mmap_vnode() 1223 * 1224 * Helper function for vm_mmap. Perform sanity check specific for mmap 1225 * operations on vnodes. 1226 * 1227 * For VCHR vnodes, the vnode lock is held over the call to 1228 * vm_mmap_cdev() to keep vp->v_rdev valid. 1229 */ 1230 int 1231 vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1232 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1233 struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 1234 boolean_t *writecounted) 1235 { 1236 struct vattr va; 1237 vm_object_t obj; 1238 vm_offset_t foff; 1239 struct mount *mp; 1240 struct ucred *cred; 1241 int error, flags, locktype, vfslocked; 1242 1243 mp = vp->v_mount; 1244 cred = td->td_ucred; 1245 if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) 1246 locktype = LK_EXCLUSIVE; 1247 else 1248 locktype = LK_SHARED; 1249 vfslocked = VFS_LOCK_GIANT(mp); 1250 if ((error = vget(vp, locktype, td)) != 0) { 1251 VFS_UNLOCK_GIANT(vfslocked); 1252 return (error); 1253 } 1254 foff = *foffp; 1255 flags = *flagsp; 1256 obj = vp->v_object; 1257 if (vp->v_type == VREG) { 1258 /* 1259 * Get the proper underlying object 1260 */ 1261 if (obj == NULL) { 1262 error = EINVAL; 1263 goto done; 1264 } 1265 if (obj->handle != vp) { 1266 vput(vp); 1267 vp = (struct vnode *)obj->handle; 1268 /* 1269 * Bypass filesystems obey the mpsafety of the 1270 * underlying fs. 1271 */ 1272 error = vget(vp, locktype, td); 1273 if (error != 0) { 1274 VFS_UNLOCK_GIANT(vfslocked); 1275 return (error); 1276 } 1277 } 1278 if (locktype == LK_EXCLUSIVE) { 1279 *writecounted = TRUE; 1280 vnode_pager_update_writecount(obj, 0, objsize); 1281 } 1282 } else if (vp->v_type == VCHR) { 1283 error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp, 1284 vp->v_rdev, foffp, objp); 1285 if (error == 0) 1286 goto mark_atime; 1287 goto done; 1288 } else { 1289 error = EINVAL; 1290 goto done; 1291 } 1292 if ((error = VOP_GETATTR(vp, &va, cred))) 1293 goto done; 1294 #ifdef MAC 1295 error = mac_vnode_check_mmap(cred, vp, prot, flags); 1296 if (error != 0) 1297 goto done; 1298 #endif 1299 if ((flags & MAP_SHARED) != 0) { 1300 if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1301 if (prot & PROT_WRITE) { 1302 error = EPERM; 1303 goto done; 1304 } 1305 *maxprotp &= ~VM_PROT_WRITE; 1306 } 1307 } 1308 /* 1309 * If it is a regular file without any references 1310 * we do not need to sync it. 1311 * Adjust object size to be the size of actual file. 1312 */ 1313 objsize = round_page(va.va_size); 1314 if (va.va_nlink == 0) 1315 flags |= MAP_NOSYNC; 1316 obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, cred); 1317 if (obj == NULL) { 1318 error = ENOMEM; 1319 goto done; 1320 } 1321 *objp = obj; 1322 *flagsp = flags; 1323 1324 mark_atime: 1325 vfs_mark_atime(vp, cred); 1326 1327 done: 1328 vput(vp); 1329 VFS_UNLOCK_GIANT(vfslocked); 1330 return (error); 1331 } 1332 1333 /* 1334 * vm_mmap_cdev() 1335 * 1336 * MPSAFE 1337 * 1338 * Helper function for vm_mmap. Perform sanity check specific for mmap 1339 * operations on cdevs. 1340 */ 1341 int 1342 vm_mmap_cdev(struct thread *td, vm_size_t objsize, 1343 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1344 struct cdev *cdev, vm_ooffset_t *foff, vm_object_t *objp) 1345 { 1346 vm_object_t obj; 1347 struct cdevsw *dsw; 1348 int error, flags, ref; 1349 1350 flags = *flagsp; 1351 1352 dsw = dev_refthread(cdev, &ref); 1353 if (dsw == NULL) 1354 return (ENXIO); 1355 if (dsw->d_flags & D_MMAP_ANON) { 1356 dev_relthread(cdev, ref); 1357 *maxprotp = VM_PROT_ALL; 1358 *flagsp |= MAP_ANON; 1359 return (0); 1360 } 1361 /* 1362 * cdevs do not provide private mappings of any kind. 1363 */ 1364 if ((*maxprotp & VM_PROT_WRITE) == 0 && 1365 (prot & PROT_WRITE) != 0) { 1366 dev_relthread(cdev, ref); 1367 return (EACCES); 1368 } 1369 if (flags & (MAP_PRIVATE|MAP_COPY)) { 1370 dev_relthread(cdev, ref); 1371 return (EINVAL); 1372 } 1373 /* 1374 * Force device mappings to be shared. 1375 */ 1376 flags |= MAP_SHARED; 1377 #ifdef MAC_XXX 1378 error = mac_cdev_check_mmap(td->td_ucred, cdev, prot); 1379 if (error != 0) { 1380 dev_relthread(cdev, ref); 1381 return (error); 1382 } 1383 #endif 1384 /* 1385 * First, try d_mmap_single(). If that is not implemented 1386 * (returns ENODEV), fall back to using the device pager. 1387 * Note that d_mmap_single() must return a reference to the 1388 * object (it needs to bump the reference count of the object 1389 * it returns somehow). 1390 * 1391 * XXX assumes VM_PROT_* == PROT_* 1392 */ 1393 error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 1394 dev_relthread(cdev, ref); 1395 if (error != ENODEV) 1396 return (error); 1397 obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 1398 td->td_ucred); 1399 if (obj == NULL) 1400 return (EINVAL); 1401 *objp = obj; 1402 *flagsp = flags; 1403 return (0); 1404 } 1405 1406 /* 1407 * vm_mmap_shm() 1408 * 1409 * MPSAFE 1410 * 1411 * Helper function for vm_mmap. Perform sanity check specific for mmap 1412 * operations on shm file descriptors. 1413 */ 1414 int 1415 vm_mmap_shm(struct thread *td, vm_size_t objsize, 1416 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1417 struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp) 1418 { 1419 int error; 1420 1421 if ((*flagsp & MAP_SHARED) != 0 && 1422 (*maxprotp & VM_PROT_WRITE) == 0 && 1423 (prot & PROT_WRITE) != 0) 1424 return (EACCES); 1425 #ifdef MAC 1426 error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp); 1427 if (error != 0) 1428 return (error); 1429 #endif 1430 error = shm_mmap(shmfd, objsize, foff, objp); 1431 if (error) 1432 return (error); 1433 return (0); 1434 } 1435 1436 /* 1437 * vm_mmap() 1438 * 1439 * MPSAFE 1440 * 1441 * Internal version of mmap. Currently used by mmap, exec, and sys5 1442 * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. 1443 */ 1444 int 1445 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1446 vm_prot_t maxprot, int flags, 1447 objtype_t handle_type, void *handle, 1448 vm_ooffset_t foff) 1449 { 1450 boolean_t fitit; 1451 vm_object_t object = NULL; 1452 struct thread *td = curthread; 1453 int docow, error, rv; 1454 boolean_t writecounted; 1455 1456 if (size == 0) 1457 return (0); 1458 1459 size = round_page(size); 1460 1461 if (map == &td->td_proc->p_vmspace->vm_map) { 1462 PROC_LOCK(td->td_proc); 1463 if (map->size + size > lim_cur(td->td_proc, RLIMIT_VMEM)) { 1464 PROC_UNLOCK(td->td_proc); 1465 return (ENOMEM); 1466 } 1467 if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 1468 PROC_UNLOCK(td->td_proc); 1469 return (ENOMEM); 1470 } 1471 PROC_UNLOCK(td->td_proc); 1472 } 1473 1474 /* 1475 * We currently can only deal with page aligned file offsets. 1476 * The check is here rather than in the syscall because the 1477 * kernel calls this function internally for other mmaping 1478 * operations (such as in exec) and non-aligned offsets will 1479 * cause pmap inconsistencies...so we want to be sure to 1480 * disallow this in all cases. 1481 */ 1482 if (foff & PAGE_MASK) 1483 return (EINVAL); 1484 1485 if ((flags & MAP_FIXED) == 0) { 1486 fitit = TRUE; 1487 *addr = round_page(*addr); 1488 } else { 1489 if (*addr != trunc_page(*addr)) 1490 return (EINVAL); 1491 fitit = FALSE; 1492 } 1493 writecounted = FALSE; 1494 1495 /* 1496 * Lookup/allocate object. 1497 */ 1498 switch (handle_type) { 1499 case OBJT_DEVICE: 1500 error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, 1501 handle, &foff, &object); 1502 break; 1503 case OBJT_VNODE: 1504 error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 1505 handle, &foff, &object, &writecounted); 1506 break; 1507 case OBJT_SWAP: 1508 error = vm_mmap_shm(td, size, prot, &maxprot, &flags, 1509 handle, foff, &object); 1510 break; 1511 case OBJT_DEFAULT: 1512 if (handle == NULL) { 1513 error = 0; 1514 break; 1515 } 1516 /* FALLTHROUGH */ 1517 default: 1518 error = EINVAL; 1519 break; 1520 } 1521 if (error) 1522 return (error); 1523 if (flags & MAP_ANON) { 1524 object = NULL; 1525 docow = 0; 1526 /* 1527 * Unnamed anonymous regions always start at 0. 1528 */ 1529 if (handle == 0) 1530 foff = 0; 1531 } else if (flags & MAP_PREFAULT_READ) 1532 docow = MAP_PREFAULT; 1533 else 1534 docow = MAP_PREFAULT_PARTIAL; 1535 1536 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 1537 docow |= MAP_COPY_ON_WRITE; 1538 if (flags & MAP_NOSYNC) 1539 docow |= MAP_DISABLE_SYNCER; 1540 if (flags & MAP_NOCORE) 1541 docow |= MAP_DISABLE_COREDUMP; 1542 /* Shared memory is also shared with children. */ 1543 if (flags & MAP_SHARED) 1544 docow |= MAP_INHERIT_SHARE; 1545 if (writecounted) 1546 docow |= MAP_VN_WRITECOUNT; 1547 1548 if (flags & MAP_STACK) 1549 rv = vm_map_stack(map, *addr, size, prot, maxprot, 1550 docow | MAP_STACK_GROWS_DOWN); 1551 else if (fitit) 1552 rv = vm_map_find(map, object, foff, addr, size, 1553 object != NULL && object->type == OBJT_DEVICE ? 1554 VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, prot, maxprot, docow); 1555 else 1556 rv = vm_map_fixed(map, object, foff, *addr, size, 1557 prot, maxprot, docow); 1558 1559 if (rv == KERN_SUCCESS) { 1560 /* 1561 * If the process has requested that all future mappings 1562 * be wired, then heed this. 1563 */ 1564 if (map->flags & MAP_WIREFUTURE) { 1565 vm_map_wire(map, *addr, *addr + size, 1566 VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? 1567 VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); 1568 } 1569 } else { 1570 /* 1571 * If this mapping was accounted for in the vnode's 1572 * writecount, then undo that now. 1573 */ 1574 if (writecounted) 1575 vnode_pager_release_writecount(object, 0, size); 1576 /* 1577 * Lose the object reference. Will destroy the 1578 * object if it's an unnamed anonymous mapping 1579 * or named anonymous without other references. 1580 */ 1581 vm_object_deallocate(object); 1582 } 1583 return (vm_mmap_to_errno(rv)); 1584 } 1585 1586 /* 1587 * Translate a Mach VM return code to zero on success or the appropriate errno 1588 * on failure. 1589 */ 1590 int 1591 vm_mmap_to_errno(int rv) 1592 { 1593 1594 switch (rv) { 1595 case KERN_SUCCESS: 1596 return (0); 1597 case KERN_INVALID_ADDRESS: 1598 case KERN_NO_SPACE: 1599 return (ENOMEM); 1600 case KERN_PROTECTION_FAILURE: 1601 return (EACCES); 1602 default: 1603 return (EINVAL); 1604 } 1605 } 1606