1 /*- 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35 * 36 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37 */ 38 39 /* 40 * Mapped file (mmap) interface to VM 41 */ 42 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include "opt_compat.h" 47 #include "opt_hwpmc_hooks.h" 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/capability.h> 52 #include <sys/kernel.h> 53 #include <sys/lock.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/filedesc.h> 57 #include <sys/priv.h> 58 #include <sys/proc.h> 59 #include <sys/racct.h> 60 #include <sys/resource.h> 61 #include <sys/resourcevar.h> 62 #include <sys/vnode.h> 63 #include <sys/fcntl.h> 64 #include <sys/file.h> 65 #include <sys/mman.h> 66 #include <sys/mount.h> 67 #include <sys/conf.h> 68 #include <sys/stat.h> 69 #include <sys/sysent.h> 70 #include <sys/vmmeter.h> 71 72 #include <security/mac/mac_framework.h> 73 74 #include <vm/vm.h> 75 #include <vm/vm_param.h> 76 #include <vm/pmap.h> 77 #include <vm/vm_map.h> 78 #include <vm/vm_object.h> 79 #include <vm/vm_page.h> 80 #include <vm/vm_pager.h> 81 #include <vm/vm_pageout.h> 82 #include <vm/vm_extern.h> 83 #include <vm/vm_page.h> 84 85 #ifdef HWPMC_HOOKS 86 #include <sys/pmckern.h> 87 #endif 88 89 #ifndef _SYS_SYSPROTO_H_ 90 struct sbrk_args { 91 int incr; 92 }; 93 #endif 94 95 static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 96 int *, struct vnode *, vm_ooffset_t *, vm_object_t *); 97 static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 98 int *, struct cdev *, vm_ooffset_t *, vm_object_t *); 99 static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 100 int *, struct shmfd *, vm_ooffset_t, vm_object_t *); 101 102 /* 103 * MPSAFE 104 */ 105 /* ARGSUSED */ 106 int 107 sys_sbrk(td, uap) 108 struct thread *td; 109 struct sbrk_args *uap; 110 { 111 /* Not yet implemented */ 112 return (EOPNOTSUPP); 113 } 114 115 #ifndef _SYS_SYSPROTO_H_ 116 struct sstk_args { 117 int incr; 118 }; 119 #endif 120 121 /* 122 * MPSAFE 123 */ 124 /* ARGSUSED */ 125 int 126 sys_sstk(td, uap) 127 struct thread *td; 128 struct sstk_args *uap; 129 { 130 /* Not yet implemented */ 131 return (EOPNOTSUPP); 132 } 133 134 #if defined(COMPAT_43) 135 #ifndef _SYS_SYSPROTO_H_ 136 struct getpagesize_args { 137 int dummy; 138 }; 139 #endif 140 141 /* ARGSUSED */ 142 int 143 ogetpagesize(td, uap) 144 struct thread *td; 145 struct getpagesize_args *uap; 146 { 147 /* MP SAFE */ 148 td->td_retval[0] = PAGE_SIZE; 149 return (0); 150 } 151 #endif /* COMPAT_43 */ 152 153 154 /* 155 * Memory Map (mmap) system call. Note that the file offset 156 * and address are allowed to be NOT page aligned, though if 157 * the MAP_FIXED flag it set, both must have the same remainder 158 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 159 * page-aligned, the actual mapping starts at trunc_page(addr) 160 * and the return value is adjusted up by the page offset. 161 * 162 * Generally speaking, only character devices which are themselves 163 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 164 * there would be no cache coherency between a descriptor and a VM mapping 165 * both to the same character device. 166 */ 167 #ifndef _SYS_SYSPROTO_H_ 168 struct mmap_args { 169 void *addr; 170 size_t len; 171 int prot; 172 int flags; 173 int fd; 174 long pad; 175 off_t pos; 176 }; 177 #endif 178 179 /* 180 * MPSAFE 181 */ 182 int 183 sys_mmap(td, uap) 184 struct thread *td; 185 struct mmap_args *uap; 186 { 187 #ifdef HWPMC_HOOKS 188 struct pmckern_map_in pkm; 189 #endif 190 struct file *fp; 191 struct vnode *vp; 192 vm_offset_t addr; 193 vm_size_t size, pageoff; 194 vm_prot_t cap_maxprot, prot, maxprot; 195 void *handle; 196 objtype_t handle_type; 197 int flags, error; 198 off_t pos; 199 struct vmspace *vms = td->td_proc->p_vmspace; 200 cap_rights_t rights; 201 202 addr = (vm_offset_t) uap->addr; 203 size = uap->len; 204 prot = uap->prot & VM_PROT_ALL; 205 flags = uap->flags; 206 pos = uap->pos; 207 208 fp = NULL; 209 210 /* Make sure mapping fits into numeric range, etc. */ 211 if ((uap->len == 0 && !SV_CURPROC_FLAG(SV_AOUT) && 212 curproc->p_osrel >= P_OSREL_MAP_ANON) || 213 ((flags & MAP_ANON) && (uap->fd != -1 || pos != 0))) 214 return (EINVAL); 215 216 if (flags & MAP_STACK) { 217 if ((uap->fd != -1) || 218 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 219 return (EINVAL); 220 flags |= MAP_ANON; 221 pos = 0; 222 } 223 224 /* 225 * Align the file position to a page boundary, 226 * and save its page offset component. 227 */ 228 pageoff = (pos & PAGE_MASK); 229 pos -= pageoff; 230 231 /* Adjust size for rounding (on both ends). */ 232 size += pageoff; /* low end... */ 233 size = (vm_size_t) round_page(size); /* hi end */ 234 235 /* 236 * Check for illegal addresses. Watch out for address wrap... Note 237 * that VM_*_ADDRESS are not constants due to casts (argh). 238 */ 239 if (flags & MAP_FIXED) { 240 /* 241 * The specified address must have the same remainder 242 * as the file offset taken modulo PAGE_SIZE, so it 243 * should be aligned after adjustment by pageoff. 244 */ 245 addr -= pageoff; 246 if (addr & PAGE_MASK) 247 return (EINVAL); 248 249 /* Address range must be all in user VM space. */ 250 if (addr < vm_map_min(&vms->vm_map) || 251 addr + size > vm_map_max(&vms->vm_map)) 252 return (EINVAL); 253 if (addr + size < addr) 254 return (EINVAL); 255 } else { 256 /* 257 * XXX for non-fixed mappings where no hint is provided or 258 * the hint would fall in the potential heap space, 259 * place it after the end of the largest possible heap. 260 * 261 * There should really be a pmap call to determine a reasonable 262 * location. 263 */ 264 PROC_LOCK(td->td_proc); 265 if (addr == 0 || 266 (addr >= round_page((vm_offset_t)vms->vm_taddr) && 267 addr < round_page((vm_offset_t)vms->vm_daddr + 268 lim_max(td->td_proc, RLIMIT_DATA)))) 269 addr = round_page((vm_offset_t)vms->vm_daddr + 270 lim_max(td->td_proc, RLIMIT_DATA)); 271 PROC_UNLOCK(td->td_proc); 272 } 273 if (flags & MAP_ANON) { 274 /* 275 * Mapping blank space is trivial. 276 */ 277 handle = NULL; 278 handle_type = OBJT_DEFAULT; 279 maxprot = VM_PROT_ALL; 280 cap_maxprot = VM_PROT_ALL; 281 } else { 282 /* 283 * Mapping file, get fp for validation and don't let the 284 * descriptor disappear on us if we block. Check capability 285 * rights, but also return the maximum rights to be combined 286 * with maxprot later. 287 */ 288 rights = CAP_MMAP; 289 if (prot & PROT_READ) 290 rights |= CAP_READ; 291 if ((flags & MAP_SHARED) != 0) { 292 if (prot & PROT_WRITE) 293 rights |= CAP_WRITE; 294 } 295 if (prot & PROT_EXEC) 296 rights |= CAP_MAPEXEC; 297 if ((error = fget_mmap(td, uap->fd, rights, &cap_maxprot, 298 &fp)) != 0) 299 goto done; 300 if (fp->f_type == DTYPE_SHM) { 301 handle = fp->f_data; 302 handle_type = OBJT_SWAP; 303 maxprot = VM_PROT_NONE; 304 305 /* FREAD should always be set. */ 306 if (fp->f_flag & FREAD) 307 maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; 308 if (fp->f_flag & FWRITE) 309 maxprot |= VM_PROT_WRITE; 310 goto map; 311 } 312 if (fp->f_type != DTYPE_VNODE) { 313 error = ENODEV; 314 goto done; 315 } 316 #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ 317 defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) 318 /* 319 * POSIX shared-memory objects are defined to have 320 * kernel persistence, and are not defined to support 321 * read(2)/write(2) -- or even open(2). Thus, we can 322 * use MAP_ASYNC to trade on-disk coherence for speed. 323 * The shm_open(3) library routine turns on the FPOSIXSHM 324 * flag to request this behavior. 325 */ 326 if (fp->f_flag & FPOSIXSHM) 327 flags |= MAP_NOSYNC; 328 #endif 329 vp = fp->f_vnode; 330 /* 331 * Ensure that file and memory protections are 332 * compatible. Note that we only worry about 333 * writability if mapping is shared; in this case, 334 * current and max prot are dictated by the open file. 335 * XXX use the vnode instead? Problem is: what 336 * credentials do we use for determination? What if 337 * proc does a setuid? 338 */ 339 if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC) 340 maxprot = VM_PROT_NONE; 341 else 342 maxprot = VM_PROT_EXECUTE; 343 if (fp->f_flag & FREAD) { 344 maxprot |= VM_PROT_READ; 345 } else if (prot & PROT_READ) { 346 error = EACCES; 347 goto done; 348 } 349 /* 350 * If we are sharing potential changes (either via 351 * MAP_SHARED or via the implicit sharing of character 352 * device mappings), and we are trying to get write 353 * permission although we opened it without asking 354 * for it, bail out. 355 */ 356 if ((flags & MAP_SHARED) != 0) { 357 if ((fp->f_flag & FWRITE) != 0) { 358 maxprot |= VM_PROT_WRITE; 359 } else if ((prot & PROT_WRITE) != 0) { 360 error = EACCES; 361 goto done; 362 } 363 } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) { 364 maxprot |= VM_PROT_WRITE; 365 cap_maxprot |= VM_PROT_WRITE; 366 } 367 handle = (void *)vp; 368 handle_type = OBJT_VNODE; 369 } 370 map: 371 td->td_fpop = fp; 372 maxprot &= cap_maxprot; 373 error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, 374 flags, handle_type, handle, pos); 375 td->td_fpop = NULL; 376 #ifdef HWPMC_HOOKS 377 /* inform hwpmc(4) if an executable is being mapped */ 378 if (error == 0 && handle_type == OBJT_VNODE && 379 (prot & PROT_EXEC)) { 380 pkm.pm_file = handle; 381 pkm.pm_address = (uintptr_t) addr; 382 PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); 383 } 384 #endif 385 if (error == 0) 386 td->td_retval[0] = (register_t) (addr + pageoff); 387 done: 388 if (fp) 389 fdrop(fp, td); 390 391 return (error); 392 } 393 394 int 395 freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 396 { 397 struct mmap_args oargs; 398 399 oargs.addr = uap->addr; 400 oargs.len = uap->len; 401 oargs.prot = uap->prot; 402 oargs.flags = uap->flags; 403 oargs.fd = uap->fd; 404 oargs.pos = uap->pos; 405 return (sys_mmap(td, &oargs)); 406 } 407 408 #ifdef COMPAT_43 409 #ifndef _SYS_SYSPROTO_H_ 410 struct ommap_args { 411 caddr_t addr; 412 int len; 413 int prot; 414 int flags; 415 int fd; 416 long pos; 417 }; 418 #endif 419 int 420 ommap(td, uap) 421 struct thread *td; 422 struct ommap_args *uap; 423 { 424 struct mmap_args nargs; 425 static const char cvtbsdprot[8] = { 426 0, 427 PROT_EXEC, 428 PROT_WRITE, 429 PROT_EXEC | PROT_WRITE, 430 PROT_READ, 431 PROT_EXEC | PROT_READ, 432 PROT_WRITE | PROT_READ, 433 PROT_EXEC | PROT_WRITE | PROT_READ, 434 }; 435 436 #define OMAP_ANON 0x0002 437 #define OMAP_COPY 0x0020 438 #define OMAP_SHARED 0x0010 439 #define OMAP_FIXED 0x0100 440 441 nargs.addr = uap->addr; 442 nargs.len = uap->len; 443 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 444 nargs.flags = 0; 445 if (uap->flags & OMAP_ANON) 446 nargs.flags |= MAP_ANON; 447 if (uap->flags & OMAP_COPY) 448 nargs.flags |= MAP_COPY; 449 if (uap->flags & OMAP_SHARED) 450 nargs.flags |= MAP_SHARED; 451 else 452 nargs.flags |= MAP_PRIVATE; 453 if (uap->flags & OMAP_FIXED) 454 nargs.flags |= MAP_FIXED; 455 nargs.fd = uap->fd; 456 nargs.pos = uap->pos; 457 return (sys_mmap(td, &nargs)); 458 } 459 #endif /* COMPAT_43 */ 460 461 462 #ifndef _SYS_SYSPROTO_H_ 463 struct msync_args { 464 void *addr; 465 size_t len; 466 int flags; 467 }; 468 #endif 469 /* 470 * MPSAFE 471 */ 472 int 473 sys_msync(td, uap) 474 struct thread *td; 475 struct msync_args *uap; 476 { 477 vm_offset_t addr; 478 vm_size_t size, pageoff; 479 int flags; 480 vm_map_t map; 481 int rv; 482 483 addr = (vm_offset_t) uap->addr; 484 size = uap->len; 485 flags = uap->flags; 486 487 pageoff = (addr & PAGE_MASK); 488 addr -= pageoff; 489 size += pageoff; 490 size = (vm_size_t) round_page(size); 491 if (addr + size < addr) 492 return (EINVAL); 493 494 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 495 return (EINVAL); 496 497 map = &td->td_proc->p_vmspace->vm_map; 498 499 /* 500 * Clean the pages and interpret the return value. 501 */ 502 rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 503 (flags & MS_INVALIDATE) != 0); 504 switch (rv) { 505 case KERN_SUCCESS: 506 return (0); 507 case KERN_INVALID_ADDRESS: 508 return (EINVAL); /* Sun returns ENOMEM? */ 509 case KERN_INVALID_ARGUMENT: 510 return (EBUSY); 511 default: 512 return (EINVAL); 513 } 514 } 515 516 #ifndef _SYS_SYSPROTO_H_ 517 struct munmap_args { 518 void *addr; 519 size_t len; 520 }; 521 #endif 522 /* 523 * MPSAFE 524 */ 525 int 526 sys_munmap(td, uap) 527 struct thread *td; 528 struct munmap_args *uap; 529 { 530 #ifdef HWPMC_HOOKS 531 struct pmckern_map_out pkm; 532 vm_map_entry_t entry; 533 #endif 534 vm_offset_t addr; 535 vm_size_t size, pageoff; 536 vm_map_t map; 537 538 addr = (vm_offset_t) uap->addr; 539 size = uap->len; 540 if (size == 0) 541 return (EINVAL); 542 543 pageoff = (addr & PAGE_MASK); 544 addr -= pageoff; 545 size += pageoff; 546 size = (vm_size_t) round_page(size); 547 if (addr + size < addr) 548 return (EINVAL); 549 550 /* 551 * Check for illegal addresses. Watch out for address wrap... 552 */ 553 map = &td->td_proc->p_vmspace->vm_map; 554 if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 555 return (EINVAL); 556 vm_map_lock(map); 557 #ifdef HWPMC_HOOKS 558 /* 559 * Inform hwpmc if the address range being unmapped contains 560 * an executable region. 561 */ 562 pkm.pm_address = (uintptr_t) NULL; 563 if (vm_map_lookup_entry(map, addr, &entry)) { 564 for (; 565 entry != &map->header && entry->start < addr + size; 566 entry = entry->next) { 567 if (vm_map_check_protection(map, entry->start, 568 entry->end, VM_PROT_EXECUTE) == TRUE) { 569 pkm.pm_address = (uintptr_t) addr; 570 pkm.pm_size = (size_t) size; 571 break; 572 } 573 } 574 } 575 #endif 576 vm_map_delete(map, addr, addr + size); 577 578 #ifdef HWPMC_HOOKS 579 /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 580 vm_map_lock_downgrade(map); 581 if (pkm.pm_address != (uintptr_t) NULL) 582 PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 583 vm_map_unlock_read(map); 584 #else 585 vm_map_unlock(map); 586 #endif 587 /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 588 return (0); 589 } 590 591 #ifndef _SYS_SYSPROTO_H_ 592 struct mprotect_args { 593 const void *addr; 594 size_t len; 595 int prot; 596 }; 597 #endif 598 /* 599 * MPSAFE 600 */ 601 int 602 sys_mprotect(td, uap) 603 struct thread *td; 604 struct mprotect_args *uap; 605 { 606 vm_offset_t addr; 607 vm_size_t size, pageoff; 608 vm_prot_t prot; 609 610 addr = (vm_offset_t) uap->addr; 611 size = uap->len; 612 prot = uap->prot & VM_PROT_ALL; 613 614 pageoff = (addr & PAGE_MASK); 615 addr -= pageoff; 616 size += pageoff; 617 size = (vm_size_t) round_page(size); 618 if (addr + size < addr) 619 return (EINVAL); 620 621 switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 622 addr + size, prot, FALSE)) { 623 case KERN_SUCCESS: 624 return (0); 625 case KERN_PROTECTION_FAILURE: 626 return (EACCES); 627 case KERN_RESOURCE_SHORTAGE: 628 return (ENOMEM); 629 } 630 return (EINVAL); 631 } 632 633 #ifndef _SYS_SYSPROTO_H_ 634 struct minherit_args { 635 void *addr; 636 size_t len; 637 int inherit; 638 }; 639 #endif 640 /* 641 * MPSAFE 642 */ 643 int 644 sys_minherit(td, uap) 645 struct thread *td; 646 struct minherit_args *uap; 647 { 648 vm_offset_t addr; 649 vm_size_t size, pageoff; 650 vm_inherit_t inherit; 651 652 addr = (vm_offset_t)uap->addr; 653 size = uap->len; 654 inherit = uap->inherit; 655 656 pageoff = (addr & PAGE_MASK); 657 addr -= pageoff; 658 size += pageoff; 659 size = (vm_size_t) round_page(size); 660 if (addr + size < addr) 661 return (EINVAL); 662 663 switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 664 addr + size, inherit)) { 665 case KERN_SUCCESS: 666 return (0); 667 case KERN_PROTECTION_FAILURE: 668 return (EACCES); 669 } 670 return (EINVAL); 671 } 672 673 #ifndef _SYS_SYSPROTO_H_ 674 struct madvise_args { 675 void *addr; 676 size_t len; 677 int behav; 678 }; 679 #endif 680 681 /* 682 * MPSAFE 683 */ 684 /* ARGSUSED */ 685 int 686 sys_madvise(td, uap) 687 struct thread *td; 688 struct madvise_args *uap; 689 { 690 vm_offset_t start, end; 691 vm_map_t map; 692 struct proc *p; 693 int error; 694 695 /* 696 * Check for our special case, advising the swap pager we are 697 * "immortal." 698 */ 699 if (uap->behav == MADV_PROTECT) { 700 error = priv_check(td, PRIV_VM_MADV_PROTECT); 701 if (error == 0) { 702 p = td->td_proc; 703 PROC_LOCK(p); 704 p->p_flag |= P_PROTECTED; 705 PROC_UNLOCK(p); 706 } 707 return (error); 708 } 709 /* 710 * Check for illegal behavior 711 */ 712 if (uap->behav < 0 || uap->behav > MADV_CORE) 713 return (EINVAL); 714 /* 715 * Check for illegal addresses. Watch out for address wrap... Note 716 * that VM_*_ADDRESS are not constants due to casts (argh). 717 */ 718 map = &td->td_proc->p_vmspace->vm_map; 719 if ((vm_offset_t)uap->addr < vm_map_min(map) || 720 (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) 721 return (EINVAL); 722 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 723 return (EINVAL); 724 725 /* 726 * Since this routine is only advisory, we default to conservative 727 * behavior. 728 */ 729 start = trunc_page((vm_offset_t) uap->addr); 730 end = round_page((vm_offset_t) uap->addr + uap->len); 731 732 if (vm_map_madvise(map, start, end, uap->behav)) 733 return (EINVAL); 734 return (0); 735 } 736 737 #ifndef _SYS_SYSPROTO_H_ 738 struct mincore_args { 739 const void *addr; 740 size_t len; 741 char *vec; 742 }; 743 #endif 744 745 /* 746 * MPSAFE 747 */ 748 /* ARGSUSED */ 749 int 750 sys_mincore(td, uap) 751 struct thread *td; 752 struct mincore_args *uap; 753 { 754 vm_offset_t addr, first_addr; 755 vm_offset_t end, cend; 756 pmap_t pmap; 757 vm_map_t map; 758 char *vec; 759 int error = 0; 760 int vecindex, lastvecindex; 761 vm_map_entry_t current; 762 vm_map_entry_t entry; 763 vm_object_t object; 764 vm_paddr_t locked_pa; 765 vm_page_t m; 766 vm_pindex_t pindex; 767 int mincoreinfo; 768 unsigned int timestamp; 769 boolean_t locked; 770 771 /* 772 * Make sure that the addresses presented are valid for user 773 * mode. 774 */ 775 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 776 end = addr + (vm_size_t)round_page(uap->len); 777 map = &td->td_proc->p_vmspace->vm_map; 778 if (end > vm_map_max(map) || end < addr) 779 return (ENOMEM); 780 781 /* 782 * Address of byte vector 783 */ 784 vec = uap->vec; 785 786 pmap = vmspace_pmap(td->td_proc->p_vmspace); 787 788 vm_map_lock_read(map); 789 RestartScan: 790 timestamp = map->timestamp; 791 792 if (!vm_map_lookup_entry(map, addr, &entry)) { 793 vm_map_unlock_read(map); 794 return (ENOMEM); 795 } 796 797 /* 798 * Do this on a map entry basis so that if the pages are not 799 * in the current processes address space, we can easily look 800 * up the pages elsewhere. 801 */ 802 lastvecindex = -1; 803 for (current = entry; 804 (current != &map->header) && (current->start < end); 805 current = current->next) { 806 807 /* 808 * check for contiguity 809 */ 810 if (current->end < end && 811 (entry->next == &map->header || 812 current->next->start > current->end)) { 813 vm_map_unlock_read(map); 814 return (ENOMEM); 815 } 816 817 /* 818 * ignore submaps (for now) or null objects 819 */ 820 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 821 current->object.vm_object == NULL) 822 continue; 823 824 /* 825 * limit this scan to the current map entry and the 826 * limits for the mincore call 827 */ 828 if (addr < current->start) 829 addr = current->start; 830 cend = current->end; 831 if (cend > end) 832 cend = end; 833 834 /* 835 * scan this entry one page at a time 836 */ 837 while (addr < cend) { 838 /* 839 * Check pmap first, it is likely faster, also 840 * it can provide info as to whether we are the 841 * one referencing or modifying the page. 842 */ 843 object = NULL; 844 locked_pa = 0; 845 retry: 846 m = NULL; 847 mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 848 if (locked_pa != 0) { 849 /* 850 * The page is mapped by this process but not 851 * both accessed and modified. It is also 852 * managed. Acquire the object lock so that 853 * other mappings might be examined. 854 */ 855 m = PHYS_TO_VM_PAGE(locked_pa); 856 if (m->object != object) { 857 if (object != NULL) 858 VM_OBJECT_UNLOCK(object); 859 object = m->object; 860 locked = VM_OBJECT_TRYLOCK(object); 861 vm_page_unlock(m); 862 if (!locked) { 863 VM_OBJECT_LOCK(object); 864 vm_page_lock(m); 865 goto retry; 866 } 867 } else 868 vm_page_unlock(m); 869 KASSERT(m->valid == VM_PAGE_BITS_ALL, 870 ("mincore: page %p is mapped but invalid", 871 m)); 872 } else if (mincoreinfo == 0) { 873 /* 874 * The page is not mapped by this process. If 875 * the object implements managed pages, then 876 * determine if the page is resident so that 877 * the mappings might be examined. 878 */ 879 if (current->object.vm_object != object) { 880 if (object != NULL) 881 VM_OBJECT_UNLOCK(object); 882 object = current->object.vm_object; 883 VM_OBJECT_LOCK(object); 884 } 885 if (object->type == OBJT_DEFAULT || 886 object->type == OBJT_SWAP || 887 object->type == OBJT_VNODE) { 888 pindex = OFF_TO_IDX(current->offset + 889 (addr - current->start)); 890 m = vm_page_lookup(object, pindex); 891 if (m != NULL && m->valid == 0) 892 m = NULL; 893 if (m != NULL) 894 mincoreinfo = MINCORE_INCORE; 895 } 896 } 897 if (m != NULL) { 898 /* Examine other mappings to the page. */ 899 if (m->dirty == 0 && pmap_is_modified(m)) 900 vm_page_dirty(m); 901 if (m->dirty != 0) 902 mincoreinfo |= MINCORE_MODIFIED_OTHER; 903 /* 904 * The first test for PGA_REFERENCED is an 905 * optimization. The second test is 906 * required because a concurrent pmap 907 * operation could clear the last reference 908 * and set PGA_REFERENCED before the call to 909 * pmap_is_referenced(). 910 */ 911 if ((m->aflags & PGA_REFERENCED) != 0 || 912 pmap_is_referenced(m) || 913 (m->aflags & PGA_REFERENCED) != 0) 914 mincoreinfo |= MINCORE_REFERENCED_OTHER; 915 } 916 if (object != NULL) 917 VM_OBJECT_UNLOCK(object); 918 919 /* 920 * subyte may page fault. In case it needs to modify 921 * the map, we release the lock. 922 */ 923 vm_map_unlock_read(map); 924 925 /* 926 * calculate index into user supplied byte vector 927 */ 928 vecindex = OFF_TO_IDX(addr - first_addr); 929 930 /* 931 * If we have skipped map entries, we need to make sure that 932 * the byte vector is zeroed for those skipped entries. 933 */ 934 while ((lastvecindex + 1) < vecindex) { 935 error = subyte(vec + lastvecindex, 0); 936 if (error) { 937 error = EFAULT; 938 goto done2; 939 } 940 ++lastvecindex; 941 } 942 943 /* 944 * Pass the page information to the user 945 */ 946 error = subyte(vec + vecindex, mincoreinfo); 947 if (error) { 948 error = EFAULT; 949 goto done2; 950 } 951 952 /* 953 * If the map has changed, due to the subyte, the previous 954 * output may be invalid. 955 */ 956 vm_map_lock_read(map); 957 if (timestamp != map->timestamp) 958 goto RestartScan; 959 960 lastvecindex = vecindex; 961 addr += PAGE_SIZE; 962 } 963 } 964 965 /* 966 * subyte may page fault. In case it needs to modify 967 * the map, we release the lock. 968 */ 969 vm_map_unlock_read(map); 970 971 /* 972 * Zero the last entries in the byte vector. 973 */ 974 vecindex = OFF_TO_IDX(end - first_addr); 975 while ((lastvecindex + 1) < vecindex) { 976 error = subyte(vec + lastvecindex, 0); 977 if (error) { 978 error = EFAULT; 979 goto done2; 980 } 981 ++lastvecindex; 982 } 983 984 /* 985 * If the map has changed, due to the subyte, the previous 986 * output may be invalid. 987 */ 988 vm_map_lock_read(map); 989 if (timestamp != map->timestamp) 990 goto RestartScan; 991 vm_map_unlock_read(map); 992 done2: 993 return (error); 994 } 995 996 #ifndef _SYS_SYSPROTO_H_ 997 struct mlock_args { 998 const void *addr; 999 size_t len; 1000 }; 1001 #endif 1002 /* 1003 * MPSAFE 1004 */ 1005 int 1006 sys_mlock(td, uap) 1007 struct thread *td; 1008 struct mlock_args *uap; 1009 { 1010 struct proc *proc; 1011 vm_offset_t addr, end, last, start; 1012 vm_size_t npages, size; 1013 unsigned long nsize; 1014 int error; 1015 1016 error = priv_check(td, PRIV_VM_MLOCK); 1017 if (error) 1018 return (error); 1019 addr = (vm_offset_t)uap->addr; 1020 size = uap->len; 1021 last = addr + size; 1022 start = trunc_page(addr); 1023 end = round_page(last); 1024 if (last < addr || end < addr) 1025 return (EINVAL); 1026 npages = atop(end - start); 1027 if (npages > vm_page_max_wired) 1028 return (ENOMEM); 1029 proc = td->td_proc; 1030 PROC_LOCK(proc); 1031 nsize = ptoa(npages + 1032 pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map))); 1033 if (nsize > lim_cur(proc, RLIMIT_MEMLOCK)) { 1034 PROC_UNLOCK(proc); 1035 return (ENOMEM); 1036 } 1037 PROC_UNLOCK(proc); 1038 if (npages + cnt.v_wire_count > vm_page_max_wired) 1039 return (EAGAIN); 1040 #ifdef RACCT 1041 PROC_LOCK(proc); 1042 error = racct_set(proc, RACCT_MEMLOCK, nsize); 1043 PROC_UNLOCK(proc); 1044 if (error != 0) 1045 return (ENOMEM); 1046 #endif 1047 error = vm_map_wire(&proc->p_vmspace->vm_map, start, end, 1048 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1049 #ifdef RACCT 1050 if (error != KERN_SUCCESS) { 1051 PROC_LOCK(proc); 1052 racct_set(proc, RACCT_MEMLOCK, 1053 ptoa(pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map)))); 1054 PROC_UNLOCK(proc); 1055 } 1056 #endif 1057 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1058 } 1059 1060 #ifndef _SYS_SYSPROTO_H_ 1061 struct mlockall_args { 1062 int how; 1063 }; 1064 #endif 1065 1066 /* 1067 * MPSAFE 1068 */ 1069 int 1070 sys_mlockall(td, uap) 1071 struct thread *td; 1072 struct mlockall_args *uap; 1073 { 1074 vm_map_t map; 1075 int error; 1076 1077 map = &td->td_proc->p_vmspace->vm_map; 1078 error = 0; 1079 1080 if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1081 return (EINVAL); 1082 1083 #if 0 1084 /* 1085 * If wiring all pages in the process would cause it to exceed 1086 * a hard resource limit, return ENOMEM. 1087 */ 1088 PROC_LOCK(td->td_proc); 1089 if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { 1090 PROC_UNLOCK(td->td_proc); 1091 return (ENOMEM); 1092 } 1093 PROC_UNLOCK(td->td_proc); 1094 #else 1095 error = priv_check(td, PRIV_VM_MLOCK); 1096 if (error) 1097 return (error); 1098 #endif 1099 #ifdef RACCT 1100 PROC_LOCK(td->td_proc); 1101 error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 1102 PROC_UNLOCK(td->td_proc); 1103 if (error != 0) 1104 return (ENOMEM); 1105 #endif 1106 1107 if (uap->how & MCL_FUTURE) { 1108 vm_map_lock(map); 1109 vm_map_modflags(map, MAP_WIREFUTURE, 0); 1110 vm_map_unlock(map); 1111 error = 0; 1112 } 1113 1114 if (uap->how & MCL_CURRENT) { 1115 /* 1116 * P1003.1-2001 mandates that all currently mapped pages 1117 * will be memory resident and locked (wired) upon return 1118 * from mlockall(). vm_map_wire() will wire pages, by 1119 * calling vm_fault_wire() for each page in the region. 1120 */ 1121 error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1122 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1123 error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1124 } 1125 #ifdef RACCT 1126 if (error != KERN_SUCCESS) { 1127 PROC_LOCK(td->td_proc); 1128 racct_set(td->td_proc, RACCT_MEMLOCK, 1129 ptoa(pmap_wired_count(vm_map_pmap(&td->td_proc->p_vmspace->vm_map)))); 1130 PROC_UNLOCK(td->td_proc); 1131 } 1132 #endif 1133 1134 return (error); 1135 } 1136 1137 #ifndef _SYS_SYSPROTO_H_ 1138 struct munlockall_args { 1139 register_t dummy; 1140 }; 1141 #endif 1142 1143 /* 1144 * MPSAFE 1145 */ 1146 int 1147 sys_munlockall(td, uap) 1148 struct thread *td; 1149 struct munlockall_args *uap; 1150 { 1151 vm_map_t map; 1152 int error; 1153 1154 map = &td->td_proc->p_vmspace->vm_map; 1155 error = priv_check(td, PRIV_VM_MUNLOCK); 1156 if (error) 1157 return (error); 1158 1159 /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1160 vm_map_lock(map); 1161 vm_map_modflags(map, 0, MAP_WIREFUTURE); 1162 vm_map_unlock(map); 1163 1164 /* Forcibly unwire all pages. */ 1165 error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1166 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1167 #ifdef RACCT 1168 if (error == KERN_SUCCESS) { 1169 PROC_LOCK(td->td_proc); 1170 racct_set(td->td_proc, RACCT_MEMLOCK, 0); 1171 PROC_UNLOCK(td->td_proc); 1172 } 1173 #endif 1174 1175 return (error); 1176 } 1177 1178 #ifndef _SYS_SYSPROTO_H_ 1179 struct munlock_args { 1180 const void *addr; 1181 size_t len; 1182 }; 1183 #endif 1184 /* 1185 * MPSAFE 1186 */ 1187 int 1188 sys_munlock(td, uap) 1189 struct thread *td; 1190 struct munlock_args *uap; 1191 { 1192 vm_offset_t addr, end, last, start; 1193 vm_size_t size; 1194 int error; 1195 1196 error = priv_check(td, PRIV_VM_MUNLOCK); 1197 if (error) 1198 return (error); 1199 addr = (vm_offset_t)uap->addr; 1200 size = uap->len; 1201 last = addr + size; 1202 start = trunc_page(addr); 1203 end = round_page(last); 1204 if (last < addr || end < addr) 1205 return (EINVAL); 1206 error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 1207 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1208 #ifdef RACCT 1209 if (error == KERN_SUCCESS) { 1210 PROC_LOCK(td->td_proc); 1211 racct_sub(td->td_proc, RACCT_MEMLOCK, ptoa(end - start)); 1212 PROC_UNLOCK(td->td_proc); 1213 } 1214 #endif 1215 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1216 } 1217 1218 /* 1219 * vm_mmap_vnode() 1220 * 1221 * MPSAFE 1222 * 1223 * Helper function for vm_mmap. Perform sanity check specific for mmap 1224 * operations on vnodes. 1225 */ 1226 int 1227 vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1228 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1229 struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp) 1230 { 1231 struct vattr va; 1232 vm_object_t obj; 1233 vm_offset_t foff; 1234 struct mount *mp; 1235 struct ucred *cred; 1236 int error, flags; 1237 int vfslocked; 1238 1239 mp = vp->v_mount; 1240 cred = td->td_ucred; 1241 vfslocked = VFS_LOCK_GIANT(mp); 1242 if ((error = vget(vp, LK_SHARED, td)) != 0) { 1243 VFS_UNLOCK_GIANT(vfslocked); 1244 return (error); 1245 } 1246 foff = *foffp; 1247 flags = *flagsp; 1248 obj = vp->v_object; 1249 if (vp->v_type == VREG) { 1250 /* 1251 * Get the proper underlying object 1252 */ 1253 if (obj == NULL) { 1254 error = EINVAL; 1255 goto done; 1256 } 1257 if (obj->handle != vp) { 1258 vput(vp); 1259 vp = (struct vnode*)obj->handle; 1260 vget(vp, LK_SHARED, td); 1261 } 1262 } else if (vp->v_type == VCHR) { 1263 error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp, 1264 vp->v_rdev, foffp, objp); 1265 if (error == 0) 1266 goto mark_atime; 1267 goto done; 1268 } else { 1269 error = EINVAL; 1270 goto done; 1271 } 1272 if ((error = VOP_GETATTR(vp, &va, cred))) 1273 goto done; 1274 #ifdef MAC 1275 error = mac_vnode_check_mmap(cred, vp, prot, flags); 1276 if (error != 0) 1277 goto done; 1278 #endif 1279 if ((flags & MAP_SHARED) != 0) { 1280 if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1281 if (prot & PROT_WRITE) { 1282 error = EPERM; 1283 goto done; 1284 } 1285 *maxprotp &= ~VM_PROT_WRITE; 1286 } 1287 } 1288 /* 1289 * If it is a regular file without any references 1290 * we do not need to sync it. 1291 * Adjust object size to be the size of actual file. 1292 */ 1293 objsize = round_page(va.va_size); 1294 if (va.va_nlink == 0) 1295 flags |= MAP_NOSYNC; 1296 obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, td->td_ucred); 1297 if (obj == NULL) { 1298 error = ENOMEM; 1299 goto done; 1300 } 1301 *objp = obj; 1302 *flagsp = flags; 1303 1304 mark_atime: 1305 vfs_mark_atime(vp, cred); 1306 1307 done: 1308 vput(vp); 1309 VFS_UNLOCK_GIANT(vfslocked); 1310 return (error); 1311 } 1312 1313 /* 1314 * vm_mmap_cdev() 1315 * 1316 * MPSAFE 1317 * 1318 * Helper function for vm_mmap. Perform sanity check specific for mmap 1319 * operations on cdevs. 1320 */ 1321 int 1322 vm_mmap_cdev(struct thread *td, vm_size_t objsize, 1323 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1324 struct cdev *cdev, vm_ooffset_t *foff, vm_object_t *objp) 1325 { 1326 vm_object_t obj; 1327 struct cdevsw *dsw; 1328 int error, flags, ref; 1329 1330 flags = *flagsp; 1331 1332 dsw = dev_refthread(cdev, &ref); 1333 if (dsw == NULL) 1334 return (ENXIO); 1335 if (dsw->d_flags & D_MMAP_ANON) { 1336 dev_relthread(cdev, ref); 1337 *maxprotp = VM_PROT_ALL; 1338 *flagsp |= MAP_ANON; 1339 return (0); 1340 } 1341 /* 1342 * cdevs do not provide private mappings of any kind. 1343 */ 1344 if ((*maxprotp & VM_PROT_WRITE) == 0 && 1345 (prot & PROT_WRITE) != 0) { 1346 dev_relthread(cdev, ref); 1347 return (EACCES); 1348 } 1349 if (flags & (MAP_PRIVATE|MAP_COPY)) { 1350 dev_relthread(cdev, ref); 1351 return (EINVAL); 1352 } 1353 /* 1354 * Force device mappings to be shared. 1355 */ 1356 flags |= MAP_SHARED; 1357 #ifdef MAC_XXX 1358 error = mac_cdev_check_mmap(td->td_ucred, cdev, prot); 1359 if (error != 0) { 1360 dev_relthread(cdev, ref); 1361 return (error); 1362 } 1363 #endif 1364 /* 1365 * First, try d_mmap_single(). If that is not implemented 1366 * (returns ENODEV), fall back to using the device pager. 1367 * Note that d_mmap_single() must return a reference to the 1368 * object (it needs to bump the reference count of the object 1369 * it returns somehow). 1370 * 1371 * XXX assumes VM_PROT_* == PROT_* 1372 */ 1373 error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 1374 dev_relthread(cdev, ref); 1375 if (error != ENODEV) 1376 return (error); 1377 obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 1378 td->td_ucred); 1379 if (obj == NULL) 1380 return (EINVAL); 1381 *objp = obj; 1382 *flagsp = flags; 1383 return (0); 1384 } 1385 1386 /* 1387 * vm_mmap_shm() 1388 * 1389 * MPSAFE 1390 * 1391 * Helper function for vm_mmap. Perform sanity check specific for mmap 1392 * operations on shm file descriptors. 1393 */ 1394 int 1395 vm_mmap_shm(struct thread *td, vm_size_t objsize, 1396 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1397 struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp) 1398 { 1399 int error; 1400 1401 if ((*flagsp & MAP_SHARED) != 0 && 1402 (*maxprotp & VM_PROT_WRITE) == 0 && 1403 (prot & PROT_WRITE) != 0) 1404 return (EACCES); 1405 #ifdef MAC 1406 error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp); 1407 if (error != 0) 1408 return (error); 1409 #endif 1410 error = shm_mmap(shmfd, objsize, foff, objp); 1411 if (error) 1412 return (error); 1413 return (0); 1414 } 1415 1416 /* 1417 * vm_mmap() 1418 * 1419 * MPSAFE 1420 * 1421 * Internal version of mmap. Currently used by mmap, exec, and sys5 1422 * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. 1423 */ 1424 int 1425 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1426 vm_prot_t maxprot, int flags, 1427 objtype_t handle_type, void *handle, 1428 vm_ooffset_t foff) 1429 { 1430 boolean_t fitit; 1431 vm_object_t object = NULL; 1432 int rv = KERN_SUCCESS; 1433 int docow, error; 1434 struct thread *td = curthread; 1435 1436 if (size == 0) 1437 return (0); 1438 1439 size = round_page(size); 1440 1441 PROC_LOCK(td->td_proc); 1442 if (td->td_proc->p_vmspace->vm_map.size + size > 1443 lim_cur(td->td_proc, RLIMIT_VMEM)) { 1444 PROC_UNLOCK(td->td_proc); 1445 return (ENOMEM); 1446 } 1447 if (racct_set(td->td_proc, RACCT_VMEM, 1448 td->td_proc->p_vmspace->vm_map.size + size)) { 1449 PROC_UNLOCK(td->td_proc); 1450 return (ENOMEM); 1451 } 1452 PROC_UNLOCK(td->td_proc); 1453 1454 /* 1455 * We currently can only deal with page aligned file offsets. 1456 * The check is here rather than in the syscall because the 1457 * kernel calls this function internally for other mmaping 1458 * operations (such as in exec) and non-aligned offsets will 1459 * cause pmap inconsistencies...so we want to be sure to 1460 * disallow this in all cases. 1461 */ 1462 if (foff & PAGE_MASK) 1463 return (EINVAL); 1464 1465 if ((flags & MAP_FIXED) == 0) { 1466 fitit = TRUE; 1467 *addr = round_page(*addr); 1468 } else { 1469 if (*addr != trunc_page(*addr)) 1470 return (EINVAL); 1471 fitit = FALSE; 1472 } 1473 /* 1474 * Lookup/allocate object. 1475 */ 1476 switch (handle_type) { 1477 case OBJT_DEVICE: 1478 error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, 1479 handle, &foff, &object); 1480 break; 1481 case OBJT_VNODE: 1482 error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 1483 handle, &foff, &object); 1484 break; 1485 case OBJT_SWAP: 1486 error = vm_mmap_shm(td, size, prot, &maxprot, &flags, 1487 handle, foff, &object); 1488 break; 1489 case OBJT_DEFAULT: 1490 if (handle == NULL) { 1491 error = 0; 1492 break; 1493 } 1494 /* FALLTHROUGH */ 1495 default: 1496 error = EINVAL; 1497 break; 1498 } 1499 if (error) 1500 return (error); 1501 if (flags & MAP_ANON) { 1502 object = NULL; 1503 docow = 0; 1504 /* 1505 * Unnamed anonymous regions always start at 0. 1506 */ 1507 if (handle == 0) 1508 foff = 0; 1509 } else if (flags & MAP_PREFAULT_READ) 1510 docow = MAP_PREFAULT; 1511 else 1512 docow = MAP_PREFAULT_PARTIAL; 1513 1514 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 1515 docow |= MAP_COPY_ON_WRITE; 1516 if (flags & MAP_NOSYNC) 1517 docow |= MAP_DISABLE_SYNCER; 1518 if (flags & MAP_NOCORE) 1519 docow |= MAP_DISABLE_COREDUMP; 1520 1521 if (flags & MAP_STACK) 1522 rv = vm_map_stack(map, *addr, size, prot, maxprot, 1523 docow | MAP_STACK_GROWS_DOWN); 1524 else if (fitit) 1525 rv = vm_map_find(map, object, foff, addr, size, 1526 object != NULL && object->type == OBJT_DEVICE ? 1527 VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, prot, maxprot, docow); 1528 else 1529 rv = vm_map_fixed(map, object, foff, *addr, size, 1530 prot, maxprot, docow); 1531 1532 if (rv != KERN_SUCCESS) { 1533 /* 1534 * Lose the object reference. Will destroy the 1535 * object if it's an unnamed anonymous mapping 1536 * or named anonymous without other references. 1537 */ 1538 vm_object_deallocate(object); 1539 } else if (flags & MAP_SHARED) { 1540 /* 1541 * Shared memory is also shared with children. 1542 */ 1543 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 1544 if (rv != KERN_SUCCESS) 1545 (void) vm_map_remove(map, *addr, *addr + size); 1546 } 1547 1548 /* 1549 * If the process has requested that all future mappings 1550 * be wired, then heed this. 1551 */ 1552 if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE)) 1553 vm_map_wire(map, *addr, *addr + size, 1554 VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES); 1555 1556 return (vm_mmap_to_errno(rv)); 1557 } 1558 1559 int 1560 vm_mmap_to_errno(int rv) 1561 { 1562 1563 switch (rv) { 1564 case KERN_SUCCESS: 1565 return (0); 1566 case KERN_INVALID_ADDRESS: 1567 case KERN_NO_SPACE: 1568 return (ENOMEM); 1569 case KERN_PROTECTION_FAILURE: 1570 return (EACCES); 1571 default: 1572 return (EINVAL); 1573 } 1574 } 1575