1 /*- 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35 * 36 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37 */ 38 39 /* 40 * Mapped file (mmap) interface to VM 41 */ 42 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include "opt_compat.h" 47 #include "opt_hwpmc_hooks.h" 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/capability.h> 52 #include <sys/kernel.h> 53 #include <sys/lock.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/filedesc.h> 57 #include <sys/priv.h> 58 #include <sys/proc.h> 59 #include <sys/racct.h> 60 #include <sys/resource.h> 61 #include <sys/resourcevar.h> 62 #include <sys/vnode.h> 63 #include <sys/fcntl.h> 64 #include <sys/file.h> 65 #include <sys/mman.h> 66 #include <sys/mount.h> 67 #include <sys/conf.h> 68 #include <sys/stat.h> 69 #include <sys/sysent.h> 70 #include <sys/vmmeter.h> 71 72 #include <security/mac/mac_framework.h> 73 74 #include <vm/vm.h> 75 #include <vm/vm_param.h> 76 #include <vm/pmap.h> 77 #include <vm/vm_map.h> 78 #include <vm/vm_object.h> 79 #include <vm/vm_page.h> 80 #include <vm/vm_pager.h> 81 #include <vm/vm_pageout.h> 82 #include <vm/vm_extern.h> 83 #include <vm/vm_page.h> 84 #include <vm/vnode_pager.h> 85 86 #ifdef HWPMC_HOOKS 87 #include <sys/pmckern.h> 88 #endif 89 90 #ifndef _SYS_SYSPROTO_H_ 91 struct sbrk_args { 92 int incr; 93 }; 94 #endif 95 96 static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 97 int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *); 98 static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 99 int *, struct cdev *, vm_ooffset_t *, vm_object_t *); 100 static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 101 int *, struct shmfd *, vm_ooffset_t, vm_object_t *); 102 103 /* 104 * MPSAFE 105 */ 106 /* ARGSUSED */ 107 int 108 sys_sbrk(td, uap) 109 struct thread *td; 110 struct sbrk_args *uap; 111 { 112 /* Not yet implemented */ 113 return (EOPNOTSUPP); 114 } 115 116 #ifndef _SYS_SYSPROTO_H_ 117 struct sstk_args { 118 int incr; 119 }; 120 #endif 121 122 /* 123 * MPSAFE 124 */ 125 /* ARGSUSED */ 126 int 127 sys_sstk(td, uap) 128 struct thread *td; 129 struct sstk_args *uap; 130 { 131 /* Not yet implemented */ 132 return (EOPNOTSUPP); 133 } 134 135 #if defined(COMPAT_43) 136 #ifndef _SYS_SYSPROTO_H_ 137 struct getpagesize_args { 138 int dummy; 139 }; 140 #endif 141 142 int 143 ogetpagesize(td, uap) 144 struct thread *td; 145 struct getpagesize_args *uap; 146 { 147 /* MP SAFE */ 148 td->td_retval[0] = PAGE_SIZE; 149 return (0); 150 } 151 #endif /* COMPAT_43 */ 152 153 154 /* 155 * Memory Map (mmap) system call. Note that the file offset 156 * and address are allowed to be NOT page aligned, though if 157 * the MAP_FIXED flag it set, both must have the same remainder 158 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 159 * page-aligned, the actual mapping starts at trunc_page(addr) 160 * and the return value is adjusted up by the page offset. 161 * 162 * Generally speaking, only character devices which are themselves 163 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 164 * there would be no cache coherency between a descriptor and a VM mapping 165 * both to the same character device. 166 */ 167 #ifndef _SYS_SYSPROTO_H_ 168 struct mmap_args { 169 void *addr; 170 size_t len; 171 int prot; 172 int flags; 173 int fd; 174 long pad; 175 off_t pos; 176 }; 177 #endif 178 179 /* 180 * MPSAFE 181 */ 182 int 183 sys_mmap(td, uap) 184 struct thread *td; 185 struct mmap_args *uap; 186 { 187 #ifdef HWPMC_HOOKS 188 struct pmckern_map_in pkm; 189 #endif 190 struct file *fp; 191 struct vnode *vp; 192 vm_offset_t addr; 193 vm_size_t size, pageoff; 194 vm_prot_t cap_maxprot, prot, maxprot; 195 void *handle; 196 objtype_t handle_type; 197 int flags, error; 198 off_t pos; 199 struct vmspace *vms = td->td_proc->p_vmspace; 200 cap_rights_t rights; 201 202 addr = (vm_offset_t) uap->addr; 203 size = uap->len; 204 prot = uap->prot & VM_PROT_ALL; 205 flags = uap->flags; 206 pos = uap->pos; 207 208 fp = NULL; 209 210 /* 211 * Enforce the constraints. 212 * Mapping of length 0 is only allowed for old binaries. 213 * Anonymous mapping shall specify -1 as filedescriptor and 214 * zero position for new code. Be nice to ancient a.out 215 * binaries and correct pos for anonymous mapping, since old 216 * ld.so sometimes issues anonymous map requests with non-zero 217 * pos. 218 */ 219 if (!SV_CURPROC_FLAG(SV_AOUT)) { 220 if ((uap->len == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) || 221 ((flags & MAP_ANON) != 0 && (uap->fd != -1 || pos != 0))) 222 return (EINVAL); 223 } else { 224 if ((flags & MAP_ANON) != 0) 225 pos = 0; 226 } 227 228 if (flags & MAP_STACK) { 229 if ((uap->fd != -1) || 230 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 231 return (EINVAL); 232 flags |= MAP_ANON; 233 pos = 0; 234 } 235 236 /* 237 * Align the file position to a page boundary, 238 * and save its page offset component. 239 */ 240 pageoff = (pos & PAGE_MASK); 241 pos -= pageoff; 242 243 /* Adjust size for rounding (on both ends). */ 244 size += pageoff; /* low end... */ 245 size = (vm_size_t) round_page(size); /* hi end */ 246 247 /* 248 * Check for illegal addresses. Watch out for address wrap... Note 249 * that VM_*_ADDRESS are not constants due to casts (argh). 250 */ 251 if (flags & MAP_FIXED) { 252 /* 253 * The specified address must have the same remainder 254 * as the file offset taken modulo PAGE_SIZE, so it 255 * should be aligned after adjustment by pageoff. 256 */ 257 addr -= pageoff; 258 if (addr & PAGE_MASK) 259 return (EINVAL); 260 261 /* Address range must be all in user VM space. */ 262 if (addr < vm_map_min(&vms->vm_map) || 263 addr + size > vm_map_max(&vms->vm_map)) 264 return (EINVAL); 265 if (addr + size < addr) 266 return (EINVAL); 267 } else { 268 /* 269 * XXX for non-fixed mappings where no hint is provided or 270 * the hint would fall in the potential heap space, 271 * place it after the end of the largest possible heap. 272 * 273 * There should really be a pmap call to determine a reasonable 274 * location. 275 */ 276 PROC_LOCK(td->td_proc); 277 if (addr == 0 || 278 (addr >= round_page((vm_offset_t)vms->vm_taddr) && 279 addr < round_page((vm_offset_t)vms->vm_daddr + 280 lim_max(td->td_proc, RLIMIT_DATA)))) 281 addr = round_page((vm_offset_t)vms->vm_daddr + 282 lim_max(td->td_proc, RLIMIT_DATA)); 283 PROC_UNLOCK(td->td_proc); 284 } 285 if (flags & MAP_ANON) { 286 /* 287 * Mapping blank space is trivial. 288 */ 289 handle = NULL; 290 handle_type = OBJT_DEFAULT; 291 maxprot = VM_PROT_ALL; 292 cap_maxprot = VM_PROT_ALL; 293 } else { 294 /* 295 * Mapping file, get fp for validation and don't let the 296 * descriptor disappear on us if we block. Check capability 297 * rights, but also return the maximum rights to be combined 298 * with maxprot later. 299 */ 300 rights = CAP_MMAP; 301 if (prot & PROT_READ) 302 rights |= CAP_READ; 303 if ((flags & MAP_SHARED) != 0) { 304 if (prot & PROT_WRITE) 305 rights |= CAP_WRITE; 306 } 307 if (prot & PROT_EXEC) 308 rights |= CAP_MAPEXEC; 309 if ((error = fget_mmap(td, uap->fd, rights, &cap_maxprot, 310 &fp)) != 0) 311 goto done; 312 if (fp->f_type == DTYPE_SHM) { 313 handle = fp->f_data; 314 handle_type = OBJT_SWAP; 315 maxprot = VM_PROT_NONE; 316 317 /* FREAD should always be set. */ 318 if (fp->f_flag & FREAD) 319 maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; 320 if (fp->f_flag & FWRITE) 321 maxprot |= VM_PROT_WRITE; 322 goto map; 323 } 324 if (fp->f_type != DTYPE_VNODE) { 325 error = ENODEV; 326 goto done; 327 } 328 #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ 329 defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) 330 /* 331 * POSIX shared-memory objects are defined to have 332 * kernel persistence, and are not defined to support 333 * read(2)/write(2) -- or even open(2). Thus, we can 334 * use MAP_ASYNC to trade on-disk coherence for speed. 335 * The shm_open(3) library routine turns on the FPOSIXSHM 336 * flag to request this behavior. 337 */ 338 if (fp->f_flag & FPOSIXSHM) 339 flags |= MAP_NOSYNC; 340 #endif 341 vp = fp->f_vnode; 342 /* 343 * Ensure that file and memory protections are 344 * compatible. Note that we only worry about 345 * writability if mapping is shared; in this case, 346 * current and max prot are dictated by the open file. 347 * XXX use the vnode instead? Problem is: what 348 * credentials do we use for determination? What if 349 * proc does a setuid? 350 */ 351 if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC) 352 maxprot = VM_PROT_NONE; 353 else 354 maxprot = VM_PROT_EXECUTE; 355 if (fp->f_flag & FREAD) { 356 maxprot |= VM_PROT_READ; 357 } else if (prot & PROT_READ) { 358 error = EACCES; 359 goto done; 360 } 361 /* 362 * If we are sharing potential changes (either via 363 * MAP_SHARED or via the implicit sharing of character 364 * device mappings), and we are trying to get write 365 * permission although we opened it without asking 366 * for it, bail out. 367 */ 368 if ((flags & MAP_SHARED) != 0) { 369 if ((fp->f_flag & FWRITE) != 0) { 370 maxprot |= VM_PROT_WRITE; 371 } else if ((prot & PROT_WRITE) != 0) { 372 error = EACCES; 373 goto done; 374 } 375 } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) { 376 maxprot |= VM_PROT_WRITE; 377 cap_maxprot |= VM_PROT_WRITE; 378 } 379 handle = (void *)vp; 380 handle_type = OBJT_VNODE; 381 } 382 map: 383 td->td_fpop = fp; 384 maxprot &= cap_maxprot; 385 error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, 386 flags, handle_type, handle, pos); 387 td->td_fpop = NULL; 388 #ifdef HWPMC_HOOKS 389 /* inform hwpmc(4) if an executable is being mapped */ 390 if (error == 0 && handle_type == OBJT_VNODE && 391 (prot & PROT_EXEC)) { 392 pkm.pm_file = handle; 393 pkm.pm_address = (uintptr_t) addr; 394 PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); 395 } 396 #endif 397 if (error == 0) 398 td->td_retval[0] = (register_t) (addr + pageoff); 399 done: 400 if (fp) 401 fdrop(fp, td); 402 403 return (error); 404 } 405 406 int 407 freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 408 { 409 struct mmap_args oargs; 410 411 oargs.addr = uap->addr; 412 oargs.len = uap->len; 413 oargs.prot = uap->prot; 414 oargs.flags = uap->flags; 415 oargs.fd = uap->fd; 416 oargs.pos = uap->pos; 417 return (sys_mmap(td, &oargs)); 418 } 419 420 #ifdef COMPAT_43 421 #ifndef _SYS_SYSPROTO_H_ 422 struct ommap_args { 423 caddr_t addr; 424 int len; 425 int prot; 426 int flags; 427 int fd; 428 long pos; 429 }; 430 #endif 431 int 432 ommap(td, uap) 433 struct thread *td; 434 struct ommap_args *uap; 435 { 436 struct mmap_args nargs; 437 static const char cvtbsdprot[8] = { 438 0, 439 PROT_EXEC, 440 PROT_WRITE, 441 PROT_EXEC | PROT_WRITE, 442 PROT_READ, 443 PROT_EXEC | PROT_READ, 444 PROT_WRITE | PROT_READ, 445 PROT_EXEC | PROT_WRITE | PROT_READ, 446 }; 447 448 #define OMAP_ANON 0x0002 449 #define OMAP_COPY 0x0020 450 #define OMAP_SHARED 0x0010 451 #define OMAP_FIXED 0x0100 452 453 nargs.addr = uap->addr; 454 nargs.len = uap->len; 455 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 456 #ifdef COMPAT_FREEBSD32 457 #if defined(__amd64__) || defined(__ia64__) 458 if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && 459 nargs.prot != 0) 460 nargs.prot |= PROT_EXEC; 461 #endif 462 #endif 463 nargs.flags = 0; 464 if (uap->flags & OMAP_ANON) 465 nargs.flags |= MAP_ANON; 466 if (uap->flags & OMAP_COPY) 467 nargs.flags |= MAP_COPY; 468 if (uap->flags & OMAP_SHARED) 469 nargs.flags |= MAP_SHARED; 470 else 471 nargs.flags |= MAP_PRIVATE; 472 if (uap->flags & OMAP_FIXED) 473 nargs.flags |= MAP_FIXED; 474 nargs.fd = uap->fd; 475 nargs.pos = uap->pos; 476 return (sys_mmap(td, &nargs)); 477 } 478 #endif /* COMPAT_43 */ 479 480 481 #ifndef _SYS_SYSPROTO_H_ 482 struct msync_args { 483 void *addr; 484 size_t len; 485 int flags; 486 }; 487 #endif 488 /* 489 * MPSAFE 490 */ 491 int 492 sys_msync(td, uap) 493 struct thread *td; 494 struct msync_args *uap; 495 { 496 vm_offset_t addr; 497 vm_size_t size, pageoff; 498 int flags; 499 vm_map_t map; 500 int rv; 501 502 addr = (vm_offset_t) uap->addr; 503 size = uap->len; 504 flags = uap->flags; 505 506 pageoff = (addr & PAGE_MASK); 507 addr -= pageoff; 508 size += pageoff; 509 size = (vm_size_t) round_page(size); 510 if (addr + size < addr) 511 return (EINVAL); 512 513 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 514 return (EINVAL); 515 516 map = &td->td_proc->p_vmspace->vm_map; 517 518 /* 519 * Clean the pages and interpret the return value. 520 */ 521 rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 522 (flags & MS_INVALIDATE) != 0); 523 switch (rv) { 524 case KERN_SUCCESS: 525 return (0); 526 case KERN_INVALID_ADDRESS: 527 return (EINVAL); /* Sun returns ENOMEM? */ 528 case KERN_INVALID_ARGUMENT: 529 return (EBUSY); 530 case KERN_FAILURE: 531 return (EIO); 532 default: 533 return (EINVAL); 534 } 535 } 536 537 #ifndef _SYS_SYSPROTO_H_ 538 struct munmap_args { 539 void *addr; 540 size_t len; 541 }; 542 #endif 543 /* 544 * MPSAFE 545 */ 546 int 547 sys_munmap(td, uap) 548 struct thread *td; 549 struct munmap_args *uap; 550 { 551 #ifdef HWPMC_HOOKS 552 struct pmckern_map_out pkm; 553 vm_map_entry_t entry; 554 #endif 555 vm_offset_t addr; 556 vm_size_t size, pageoff; 557 vm_map_t map; 558 559 addr = (vm_offset_t) uap->addr; 560 size = uap->len; 561 if (size == 0) 562 return (EINVAL); 563 564 pageoff = (addr & PAGE_MASK); 565 addr -= pageoff; 566 size += pageoff; 567 size = (vm_size_t) round_page(size); 568 if (addr + size < addr) 569 return (EINVAL); 570 571 /* 572 * Check for illegal addresses. Watch out for address wrap... 573 */ 574 map = &td->td_proc->p_vmspace->vm_map; 575 if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 576 return (EINVAL); 577 vm_map_lock(map); 578 #ifdef HWPMC_HOOKS 579 /* 580 * Inform hwpmc if the address range being unmapped contains 581 * an executable region. 582 */ 583 pkm.pm_address = (uintptr_t) NULL; 584 if (vm_map_lookup_entry(map, addr, &entry)) { 585 for (; 586 entry != &map->header && entry->start < addr + size; 587 entry = entry->next) { 588 if (vm_map_check_protection(map, entry->start, 589 entry->end, VM_PROT_EXECUTE) == TRUE) { 590 pkm.pm_address = (uintptr_t) addr; 591 pkm.pm_size = (size_t) size; 592 break; 593 } 594 } 595 } 596 #endif 597 vm_map_delete(map, addr, addr + size); 598 599 #ifdef HWPMC_HOOKS 600 /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 601 vm_map_lock_downgrade(map); 602 if (pkm.pm_address != (uintptr_t) NULL) 603 PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 604 vm_map_unlock_read(map); 605 #else 606 vm_map_unlock(map); 607 #endif 608 /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 609 return (0); 610 } 611 612 #ifndef _SYS_SYSPROTO_H_ 613 struct mprotect_args { 614 const void *addr; 615 size_t len; 616 int prot; 617 }; 618 #endif 619 /* 620 * MPSAFE 621 */ 622 int 623 sys_mprotect(td, uap) 624 struct thread *td; 625 struct mprotect_args *uap; 626 { 627 vm_offset_t addr; 628 vm_size_t size, pageoff; 629 vm_prot_t prot; 630 631 addr = (vm_offset_t) uap->addr; 632 size = uap->len; 633 prot = uap->prot & VM_PROT_ALL; 634 635 pageoff = (addr & PAGE_MASK); 636 addr -= pageoff; 637 size += pageoff; 638 size = (vm_size_t) round_page(size); 639 if (addr + size < addr) 640 return (EINVAL); 641 642 switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 643 addr + size, prot, FALSE)) { 644 case KERN_SUCCESS: 645 return (0); 646 case KERN_PROTECTION_FAILURE: 647 return (EACCES); 648 case KERN_RESOURCE_SHORTAGE: 649 return (ENOMEM); 650 } 651 return (EINVAL); 652 } 653 654 #ifndef _SYS_SYSPROTO_H_ 655 struct minherit_args { 656 void *addr; 657 size_t len; 658 int inherit; 659 }; 660 #endif 661 /* 662 * MPSAFE 663 */ 664 int 665 sys_minherit(td, uap) 666 struct thread *td; 667 struct minherit_args *uap; 668 { 669 vm_offset_t addr; 670 vm_size_t size, pageoff; 671 vm_inherit_t inherit; 672 673 addr = (vm_offset_t)uap->addr; 674 size = uap->len; 675 inherit = uap->inherit; 676 677 pageoff = (addr & PAGE_MASK); 678 addr -= pageoff; 679 size += pageoff; 680 size = (vm_size_t) round_page(size); 681 if (addr + size < addr) 682 return (EINVAL); 683 684 switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 685 addr + size, inherit)) { 686 case KERN_SUCCESS: 687 return (0); 688 case KERN_PROTECTION_FAILURE: 689 return (EACCES); 690 } 691 return (EINVAL); 692 } 693 694 #ifndef _SYS_SYSPROTO_H_ 695 struct madvise_args { 696 void *addr; 697 size_t len; 698 int behav; 699 }; 700 #endif 701 702 /* 703 * MPSAFE 704 */ 705 int 706 sys_madvise(td, uap) 707 struct thread *td; 708 struct madvise_args *uap; 709 { 710 vm_offset_t start, end; 711 vm_map_t map; 712 struct proc *p; 713 int error; 714 715 /* 716 * Check for our special case, advising the swap pager we are 717 * "immortal." 718 */ 719 if (uap->behav == MADV_PROTECT) { 720 error = priv_check(td, PRIV_VM_MADV_PROTECT); 721 if (error == 0) { 722 p = td->td_proc; 723 PROC_LOCK(p); 724 p->p_flag |= P_PROTECTED; 725 PROC_UNLOCK(p); 726 } 727 return (error); 728 } 729 /* 730 * Check for illegal behavior 731 */ 732 if (uap->behav < 0 || uap->behav > MADV_CORE) 733 return (EINVAL); 734 /* 735 * Check for illegal addresses. Watch out for address wrap... Note 736 * that VM_*_ADDRESS are not constants due to casts (argh). 737 */ 738 map = &td->td_proc->p_vmspace->vm_map; 739 if ((vm_offset_t)uap->addr < vm_map_min(map) || 740 (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) 741 return (EINVAL); 742 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 743 return (EINVAL); 744 745 /* 746 * Since this routine is only advisory, we default to conservative 747 * behavior. 748 */ 749 start = trunc_page((vm_offset_t) uap->addr); 750 end = round_page((vm_offset_t) uap->addr + uap->len); 751 752 if (vm_map_madvise(map, start, end, uap->behav)) 753 return (EINVAL); 754 return (0); 755 } 756 757 #ifndef _SYS_SYSPROTO_H_ 758 struct mincore_args { 759 const void *addr; 760 size_t len; 761 char *vec; 762 }; 763 #endif 764 765 /* 766 * MPSAFE 767 */ 768 int 769 sys_mincore(td, uap) 770 struct thread *td; 771 struct mincore_args *uap; 772 { 773 vm_offset_t addr, first_addr; 774 vm_offset_t end, cend; 775 pmap_t pmap; 776 vm_map_t map; 777 char *vec; 778 int error = 0; 779 int vecindex, lastvecindex; 780 vm_map_entry_t current; 781 vm_map_entry_t entry; 782 vm_object_t object; 783 vm_paddr_t locked_pa; 784 vm_page_t m; 785 vm_pindex_t pindex; 786 int mincoreinfo; 787 unsigned int timestamp; 788 boolean_t locked; 789 790 /* 791 * Make sure that the addresses presented are valid for user 792 * mode. 793 */ 794 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 795 end = addr + (vm_size_t)round_page(uap->len); 796 map = &td->td_proc->p_vmspace->vm_map; 797 if (end > vm_map_max(map) || end < addr) 798 return (ENOMEM); 799 800 /* 801 * Address of byte vector 802 */ 803 vec = uap->vec; 804 805 pmap = vmspace_pmap(td->td_proc->p_vmspace); 806 807 vm_map_lock_read(map); 808 RestartScan: 809 timestamp = map->timestamp; 810 811 if (!vm_map_lookup_entry(map, addr, &entry)) { 812 vm_map_unlock_read(map); 813 return (ENOMEM); 814 } 815 816 /* 817 * Do this on a map entry basis so that if the pages are not 818 * in the current processes address space, we can easily look 819 * up the pages elsewhere. 820 */ 821 lastvecindex = -1; 822 for (current = entry; 823 (current != &map->header) && (current->start < end); 824 current = current->next) { 825 826 /* 827 * check for contiguity 828 */ 829 if (current->end < end && 830 (entry->next == &map->header || 831 current->next->start > current->end)) { 832 vm_map_unlock_read(map); 833 return (ENOMEM); 834 } 835 836 /* 837 * ignore submaps (for now) or null objects 838 */ 839 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 840 current->object.vm_object == NULL) 841 continue; 842 843 /* 844 * limit this scan to the current map entry and the 845 * limits for the mincore call 846 */ 847 if (addr < current->start) 848 addr = current->start; 849 cend = current->end; 850 if (cend > end) 851 cend = end; 852 853 /* 854 * scan this entry one page at a time 855 */ 856 while (addr < cend) { 857 /* 858 * Check pmap first, it is likely faster, also 859 * it can provide info as to whether we are the 860 * one referencing or modifying the page. 861 */ 862 object = NULL; 863 locked_pa = 0; 864 retry: 865 m = NULL; 866 mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 867 if (locked_pa != 0) { 868 /* 869 * The page is mapped by this process but not 870 * both accessed and modified. It is also 871 * managed. Acquire the object lock so that 872 * other mappings might be examined. 873 */ 874 m = PHYS_TO_VM_PAGE(locked_pa); 875 if (m->object != object) { 876 if (object != NULL) 877 VM_OBJECT_UNLOCK(object); 878 object = m->object; 879 locked = VM_OBJECT_TRYLOCK(object); 880 vm_page_unlock(m); 881 if (!locked) { 882 VM_OBJECT_LOCK(object); 883 vm_page_lock(m); 884 goto retry; 885 } 886 } else 887 vm_page_unlock(m); 888 KASSERT(m->valid == VM_PAGE_BITS_ALL, 889 ("mincore: page %p is mapped but invalid", 890 m)); 891 } else if (mincoreinfo == 0) { 892 /* 893 * The page is not mapped by this process. If 894 * the object implements managed pages, then 895 * determine if the page is resident so that 896 * the mappings might be examined. 897 */ 898 if (current->object.vm_object != object) { 899 if (object != NULL) 900 VM_OBJECT_UNLOCK(object); 901 object = current->object.vm_object; 902 VM_OBJECT_LOCK(object); 903 } 904 if (object->type == OBJT_DEFAULT || 905 object->type == OBJT_SWAP || 906 object->type == OBJT_VNODE) { 907 pindex = OFF_TO_IDX(current->offset + 908 (addr - current->start)); 909 m = vm_page_lookup(object, pindex); 910 if (m == NULL && 911 vm_page_is_cached(object, pindex)) 912 mincoreinfo = MINCORE_INCORE; 913 if (m != NULL && m->valid == 0) 914 m = NULL; 915 if (m != NULL) 916 mincoreinfo = MINCORE_INCORE; 917 } 918 } 919 if (m != NULL) { 920 /* Examine other mappings to the page. */ 921 if (m->dirty == 0 && pmap_is_modified(m)) 922 vm_page_dirty(m); 923 if (m->dirty != 0) 924 mincoreinfo |= MINCORE_MODIFIED_OTHER; 925 /* 926 * The first test for PGA_REFERENCED is an 927 * optimization. The second test is 928 * required because a concurrent pmap 929 * operation could clear the last reference 930 * and set PGA_REFERENCED before the call to 931 * pmap_is_referenced(). 932 */ 933 if ((m->aflags & PGA_REFERENCED) != 0 || 934 pmap_is_referenced(m) || 935 (m->aflags & PGA_REFERENCED) != 0) 936 mincoreinfo |= MINCORE_REFERENCED_OTHER; 937 } 938 if (object != NULL) 939 VM_OBJECT_UNLOCK(object); 940 941 /* 942 * subyte may page fault. In case it needs to modify 943 * the map, we release the lock. 944 */ 945 vm_map_unlock_read(map); 946 947 /* 948 * calculate index into user supplied byte vector 949 */ 950 vecindex = OFF_TO_IDX(addr - first_addr); 951 952 /* 953 * If we have skipped map entries, we need to make sure that 954 * the byte vector is zeroed for those skipped entries. 955 */ 956 while ((lastvecindex + 1) < vecindex) { 957 error = subyte(vec + lastvecindex, 0); 958 if (error) { 959 error = EFAULT; 960 goto done2; 961 } 962 ++lastvecindex; 963 } 964 965 /* 966 * Pass the page information to the user 967 */ 968 error = subyte(vec + vecindex, mincoreinfo); 969 if (error) { 970 error = EFAULT; 971 goto done2; 972 } 973 974 /* 975 * If the map has changed, due to the subyte, the previous 976 * output may be invalid. 977 */ 978 vm_map_lock_read(map); 979 if (timestamp != map->timestamp) 980 goto RestartScan; 981 982 lastvecindex = vecindex; 983 addr += PAGE_SIZE; 984 } 985 } 986 987 /* 988 * subyte may page fault. In case it needs to modify 989 * the map, we release the lock. 990 */ 991 vm_map_unlock_read(map); 992 993 /* 994 * Zero the last entries in the byte vector. 995 */ 996 vecindex = OFF_TO_IDX(end - first_addr); 997 while ((lastvecindex + 1) < vecindex) { 998 error = subyte(vec + lastvecindex, 0); 999 if (error) { 1000 error = EFAULT; 1001 goto done2; 1002 } 1003 ++lastvecindex; 1004 } 1005 1006 /* 1007 * If the map has changed, due to the subyte, the previous 1008 * output may be invalid. 1009 */ 1010 vm_map_lock_read(map); 1011 if (timestamp != map->timestamp) 1012 goto RestartScan; 1013 vm_map_unlock_read(map); 1014 done2: 1015 return (error); 1016 } 1017 1018 #ifndef _SYS_SYSPROTO_H_ 1019 struct mlock_args { 1020 const void *addr; 1021 size_t len; 1022 }; 1023 #endif 1024 /* 1025 * MPSAFE 1026 */ 1027 int 1028 sys_mlock(td, uap) 1029 struct thread *td; 1030 struct mlock_args *uap; 1031 { 1032 struct proc *proc; 1033 vm_offset_t addr, end, last, start; 1034 vm_size_t npages, size; 1035 unsigned long nsize; 1036 int error; 1037 1038 error = priv_check(td, PRIV_VM_MLOCK); 1039 if (error) 1040 return (error); 1041 addr = (vm_offset_t)uap->addr; 1042 size = uap->len; 1043 last = addr + size; 1044 start = trunc_page(addr); 1045 end = round_page(last); 1046 if (last < addr || end < addr) 1047 return (EINVAL); 1048 npages = atop(end - start); 1049 if (npages > vm_page_max_wired) 1050 return (ENOMEM); 1051 proc = td->td_proc; 1052 PROC_LOCK(proc); 1053 nsize = ptoa(npages + 1054 pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map))); 1055 if (nsize > lim_cur(proc, RLIMIT_MEMLOCK)) { 1056 PROC_UNLOCK(proc); 1057 return (ENOMEM); 1058 } 1059 PROC_UNLOCK(proc); 1060 if (npages + cnt.v_wire_count > vm_page_max_wired) 1061 return (EAGAIN); 1062 #ifdef RACCT 1063 PROC_LOCK(proc); 1064 error = racct_set(proc, RACCT_MEMLOCK, nsize); 1065 PROC_UNLOCK(proc); 1066 if (error != 0) 1067 return (ENOMEM); 1068 #endif 1069 error = vm_map_wire(&proc->p_vmspace->vm_map, start, end, 1070 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1071 #ifdef RACCT 1072 if (error != KERN_SUCCESS) { 1073 PROC_LOCK(proc); 1074 racct_set(proc, RACCT_MEMLOCK, 1075 ptoa(pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map)))); 1076 PROC_UNLOCK(proc); 1077 } 1078 #endif 1079 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1080 } 1081 1082 #ifndef _SYS_SYSPROTO_H_ 1083 struct mlockall_args { 1084 int how; 1085 }; 1086 #endif 1087 1088 /* 1089 * MPSAFE 1090 */ 1091 int 1092 sys_mlockall(td, uap) 1093 struct thread *td; 1094 struct mlockall_args *uap; 1095 { 1096 vm_map_t map; 1097 int error; 1098 1099 map = &td->td_proc->p_vmspace->vm_map; 1100 error = 0; 1101 1102 if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1103 return (EINVAL); 1104 1105 #if 0 1106 /* 1107 * If wiring all pages in the process would cause it to exceed 1108 * a hard resource limit, return ENOMEM. 1109 */ 1110 PROC_LOCK(td->td_proc); 1111 if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { 1112 PROC_UNLOCK(td->td_proc); 1113 return (ENOMEM); 1114 } 1115 PROC_UNLOCK(td->td_proc); 1116 #else 1117 error = priv_check(td, PRIV_VM_MLOCK); 1118 if (error) 1119 return (error); 1120 #endif 1121 #ifdef RACCT 1122 PROC_LOCK(td->td_proc); 1123 error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 1124 PROC_UNLOCK(td->td_proc); 1125 if (error != 0) 1126 return (ENOMEM); 1127 #endif 1128 1129 if (uap->how & MCL_FUTURE) { 1130 vm_map_lock(map); 1131 vm_map_modflags(map, MAP_WIREFUTURE, 0); 1132 vm_map_unlock(map); 1133 error = 0; 1134 } 1135 1136 if (uap->how & MCL_CURRENT) { 1137 /* 1138 * P1003.1-2001 mandates that all currently mapped pages 1139 * will be memory resident and locked (wired) upon return 1140 * from mlockall(). vm_map_wire() will wire pages, by 1141 * calling vm_fault_wire() for each page in the region. 1142 */ 1143 error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1144 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1145 error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1146 } 1147 #ifdef RACCT 1148 if (error != KERN_SUCCESS) { 1149 PROC_LOCK(td->td_proc); 1150 racct_set(td->td_proc, RACCT_MEMLOCK, 1151 ptoa(pmap_wired_count(vm_map_pmap(&td->td_proc->p_vmspace->vm_map)))); 1152 PROC_UNLOCK(td->td_proc); 1153 } 1154 #endif 1155 1156 return (error); 1157 } 1158 1159 #ifndef _SYS_SYSPROTO_H_ 1160 struct munlockall_args { 1161 register_t dummy; 1162 }; 1163 #endif 1164 1165 /* 1166 * MPSAFE 1167 */ 1168 int 1169 sys_munlockall(td, uap) 1170 struct thread *td; 1171 struct munlockall_args *uap; 1172 { 1173 vm_map_t map; 1174 int error; 1175 1176 map = &td->td_proc->p_vmspace->vm_map; 1177 error = priv_check(td, PRIV_VM_MUNLOCK); 1178 if (error) 1179 return (error); 1180 1181 /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1182 vm_map_lock(map); 1183 vm_map_modflags(map, 0, MAP_WIREFUTURE); 1184 vm_map_unlock(map); 1185 1186 /* Forcibly unwire all pages. */ 1187 error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1188 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1189 #ifdef RACCT 1190 if (error == KERN_SUCCESS) { 1191 PROC_LOCK(td->td_proc); 1192 racct_set(td->td_proc, RACCT_MEMLOCK, 0); 1193 PROC_UNLOCK(td->td_proc); 1194 } 1195 #endif 1196 1197 return (error); 1198 } 1199 1200 #ifndef _SYS_SYSPROTO_H_ 1201 struct munlock_args { 1202 const void *addr; 1203 size_t len; 1204 }; 1205 #endif 1206 /* 1207 * MPSAFE 1208 */ 1209 int 1210 sys_munlock(td, uap) 1211 struct thread *td; 1212 struct munlock_args *uap; 1213 { 1214 vm_offset_t addr, end, last, start; 1215 vm_size_t size; 1216 int error; 1217 1218 error = priv_check(td, PRIV_VM_MUNLOCK); 1219 if (error) 1220 return (error); 1221 addr = (vm_offset_t)uap->addr; 1222 size = uap->len; 1223 last = addr + size; 1224 start = trunc_page(addr); 1225 end = round_page(last); 1226 if (last < addr || end < addr) 1227 return (EINVAL); 1228 error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 1229 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1230 #ifdef RACCT 1231 if (error == KERN_SUCCESS) { 1232 PROC_LOCK(td->td_proc); 1233 racct_sub(td->td_proc, RACCT_MEMLOCK, ptoa(end - start)); 1234 PROC_UNLOCK(td->td_proc); 1235 } 1236 #endif 1237 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1238 } 1239 1240 /* 1241 * vm_mmap_vnode() 1242 * 1243 * Helper function for vm_mmap. Perform sanity check specific for mmap 1244 * operations on vnodes. 1245 * 1246 * For VCHR vnodes, the vnode lock is held over the call to 1247 * vm_mmap_cdev() to keep vp->v_rdev valid. 1248 */ 1249 int 1250 vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1251 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1252 struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 1253 boolean_t *writecounted) 1254 { 1255 struct vattr va; 1256 vm_object_t obj; 1257 vm_offset_t foff; 1258 struct mount *mp; 1259 struct ucred *cred; 1260 int error, flags, locktype, vfslocked; 1261 1262 mp = vp->v_mount; 1263 cred = td->td_ucred; 1264 if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) 1265 locktype = LK_EXCLUSIVE; 1266 else 1267 locktype = LK_SHARED; 1268 vfslocked = VFS_LOCK_GIANT(mp); 1269 if ((error = vget(vp, locktype, td)) != 0) { 1270 VFS_UNLOCK_GIANT(vfslocked); 1271 return (error); 1272 } 1273 foff = *foffp; 1274 flags = *flagsp; 1275 obj = vp->v_object; 1276 if (vp->v_type == VREG) { 1277 /* 1278 * Get the proper underlying object 1279 */ 1280 if (obj == NULL) { 1281 error = EINVAL; 1282 goto done; 1283 } 1284 if (obj->handle != vp) { 1285 vput(vp); 1286 vp = (struct vnode *)obj->handle; 1287 /* 1288 * Bypass filesystems obey the mpsafety of the 1289 * underlying fs. 1290 */ 1291 error = vget(vp, locktype, td); 1292 if (error != 0) { 1293 VFS_UNLOCK_GIANT(vfslocked); 1294 return (error); 1295 } 1296 } 1297 if (locktype == LK_EXCLUSIVE) { 1298 *writecounted = TRUE; 1299 vnode_pager_update_writecount(obj, 0, objsize); 1300 } 1301 } else if (vp->v_type == VCHR) { 1302 error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp, 1303 vp->v_rdev, foffp, objp); 1304 if (error == 0) 1305 goto mark_atime; 1306 goto done; 1307 } else { 1308 error = EINVAL; 1309 goto done; 1310 } 1311 if ((error = VOP_GETATTR(vp, &va, cred))) 1312 goto done; 1313 #ifdef MAC 1314 error = mac_vnode_check_mmap(cred, vp, prot, flags); 1315 if (error != 0) 1316 goto done; 1317 #endif 1318 if ((flags & MAP_SHARED) != 0) { 1319 if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1320 if (prot & PROT_WRITE) { 1321 error = EPERM; 1322 goto done; 1323 } 1324 *maxprotp &= ~VM_PROT_WRITE; 1325 } 1326 } 1327 /* 1328 * If it is a regular file without any references 1329 * we do not need to sync it. 1330 * Adjust object size to be the size of actual file. 1331 */ 1332 objsize = round_page(va.va_size); 1333 if (va.va_nlink == 0) 1334 flags |= MAP_NOSYNC; 1335 obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, cred); 1336 if (obj == NULL) { 1337 error = ENOMEM; 1338 goto done; 1339 } 1340 *objp = obj; 1341 *flagsp = flags; 1342 1343 mark_atime: 1344 vfs_mark_atime(vp, cred); 1345 1346 done: 1347 vput(vp); 1348 VFS_UNLOCK_GIANT(vfslocked); 1349 return (error); 1350 } 1351 1352 /* 1353 * vm_mmap_cdev() 1354 * 1355 * MPSAFE 1356 * 1357 * Helper function for vm_mmap. Perform sanity check specific for mmap 1358 * operations on cdevs. 1359 */ 1360 int 1361 vm_mmap_cdev(struct thread *td, vm_size_t objsize, 1362 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1363 struct cdev *cdev, vm_ooffset_t *foff, vm_object_t *objp) 1364 { 1365 vm_object_t obj; 1366 struct cdevsw *dsw; 1367 int error, flags, ref; 1368 1369 flags = *flagsp; 1370 1371 dsw = dev_refthread(cdev, &ref); 1372 if (dsw == NULL) 1373 return (ENXIO); 1374 if (dsw->d_flags & D_MMAP_ANON) { 1375 dev_relthread(cdev, ref); 1376 *maxprotp = VM_PROT_ALL; 1377 *flagsp |= MAP_ANON; 1378 return (0); 1379 } 1380 /* 1381 * cdevs do not provide private mappings of any kind. 1382 */ 1383 if ((*maxprotp & VM_PROT_WRITE) == 0 && 1384 (prot & PROT_WRITE) != 0) { 1385 dev_relthread(cdev, ref); 1386 return (EACCES); 1387 } 1388 if (flags & (MAP_PRIVATE|MAP_COPY)) { 1389 dev_relthread(cdev, ref); 1390 return (EINVAL); 1391 } 1392 /* 1393 * Force device mappings to be shared. 1394 */ 1395 flags |= MAP_SHARED; 1396 #ifdef MAC_XXX 1397 error = mac_cdev_check_mmap(td->td_ucred, cdev, prot); 1398 if (error != 0) { 1399 dev_relthread(cdev, ref); 1400 return (error); 1401 } 1402 #endif 1403 /* 1404 * First, try d_mmap_single(). If that is not implemented 1405 * (returns ENODEV), fall back to using the device pager. 1406 * Note that d_mmap_single() must return a reference to the 1407 * object (it needs to bump the reference count of the object 1408 * it returns somehow). 1409 * 1410 * XXX assumes VM_PROT_* == PROT_* 1411 */ 1412 error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 1413 dev_relthread(cdev, ref); 1414 if (error != ENODEV) 1415 return (error); 1416 obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 1417 td->td_ucred); 1418 if (obj == NULL) 1419 return (EINVAL); 1420 *objp = obj; 1421 *flagsp = flags; 1422 return (0); 1423 } 1424 1425 /* 1426 * vm_mmap_shm() 1427 * 1428 * MPSAFE 1429 * 1430 * Helper function for vm_mmap. Perform sanity check specific for mmap 1431 * operations on shm file descriptors. 1432 */ 1433 int 1434 vm_mmap_shm(struct thread *td, vm_size_t objsize, 1435 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1436 struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp) 1437 { 1438 int error; 1439 1440 if ((*flagsp & MAP_SHARED) != 0 && 1441 (*maxprotp & VM_PROT_WRITE) == 0 && 1442 (prot & PROT_WRITE) != 0) 1443 return (EACCES); 1444 #ifdef MAC 1445 error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp); 1446 if (error != 0) 1447 return (error); 1448 #endif 1449 error = shm_mmap(shmfd, objsize, foff, objp); 1450 if (error) 1451 return (error); 1452 return (0); 1453 } 1454 1455 /* 1456 * vm_mmap() 1457 * 1458 * MPSAFE 1459 * 1460 * Internal version of mmap. Currently used by mmap, exec, and sys5 1461 * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. 1462 */ 1463 int 1464 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1465 vm_prot_t maxprot, int flags, 1466 objtype_t handle_type, void *handle, 1467 vm_ooffset_t foff) 1468 { 1469 boolean_t fitit; 1470 vm_object_t object = NULL; 1471 struct thread *td = curthread; 1472 int docow, error, rv; 1473 boolean_t writecounted; 1474 1475 if (size == 0) 1476 return (0); 1477 1478 size = round_page(size); 1479 1480 if (map == &td->td_proc->p_vmspace->vm_map) { 1481 PROC_LOCK(td->td_proc); 1482 if (map->size + size > lim_cur(td->td_proc, RLIMIT_VMEM)) { 1483 PROC_UNLOCK(td->td_proc); 1484 return (ENOMEM); 1485 } 1486 if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 1487 PROC_UNLOCK(td->td_proc); 1488 return (ENOMEM); 1489 } 1490 PROC_UNLOCK(td->td_proc); 1491 } 1492 1493 /* 1494 * We currently can only deal with page aligned file offsets. 1495 * The check is here rather than in the syscall because the 1496 * kernel calls this function internally for other mmaping 1497 * operations (such as in exec) and non-aligned offsets will 1498 * cause pmap inconsistencies...so we want to be sure to 1499 * disallow this in all cases. 1500 */ 1501 if (foff & PAGE_MASK) 1502 return (EINVAL); 1503 1504 if ((flags & MAP_FIXED) == 0) { 1505 fitit = TRUE; 1506 *addr = round_page(*addr); 1507 } else { 1508 if (*addr != trunc_page(*addr)) 1509 return (EINVAL); 1510 fitit = FALSE; 1511 } 1512 writecounted = FALSE; 1513 1514 /* 1515 * Lookup/allocate object. 1516 */ 1517 switch (handle_type) { 1518 case OBJT_DEVICE: 1519 error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, 1520 handle, &foff, &object); 1521 break; 1522 case OBJT_VNODE: 1523 error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 1524 handle, &foff, &object, &writecounted); 1525 break; 1526 case OBJT_SWAP: 1527 error = vm_mmap_shm(td, size, prot, &maxprot, &flags, 1528 handle, foff, &object); 1529 break; 1530 case OBJT_DEFAULT: 1531 if (handle == NULL) { 1532 error = 0; 1533 break; 1534 } 1535 /* FALLTHROUGH */ 1536 default: 1537 error = EINVAL; 1538 break; 1539 } 1540 if (error) 1541 return (error); 1542 if (flags & MAP_ANON) { 1543 object = NULL; 1544 docow = 0; 1545 /* 1546 * Unnamed anonymous regions always start at 0. 1547 */ 1548 if (handle == 0) 1549 foff = 0; 1550 } else if (flags & MAP_PREFAULT_READ) 1551 docow = MAP_PREFAULT; 1552 else 1553 docow = MAP_PREFAULT_PARTIAL; 1554 1555 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 1556 docow |= MAP_COPY_ON_WRITE; 1557 if (flags & MAP_NOSYNC) 1558 docow |= MAP_DISABLE_SYNCER; 1559 if (flags & MAP_NOCORE) 1560 docow |= MAP_DISABLE_COREDUMP; 1561 /* Shared memory is also shared with children. */ 1562 if (flags & MAP_SHARED) 1563 docow |= MAP_INHERIT_SHARE; 1564 if (writecounted) 1565 docow |= MAP_VN_WRITECOUNT; 1566 1567 if (flags & MAP_STACK) 1568 rv = vm_map_stack(map, *addr, size, prot, maxprot, 1569 docow | MAP_STACK_GROWS_DOWN); 1570 else if (fitit) 1571 rv = vm_map_find(map, object, foff, addr, size, 1572 object != NULL && object->type == OBJT_DEVICE ? 1573 VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, prot, maxprot, docow); 1574 else 1575 rv = vm_map_fixed(map, object, foff, *addr, size, 1576 prot, maxprot, docow); 1577 1578 if (rv == KERN_SUCCESS) { 1579 /* 1580 * If the process has requested that all future mappings 1581 * be wired, then heed this. 1582 */ 1583 if (map->flags & MAP_WIREFUTURE) { 1584 vm_map_wire(map, *addr, *addr + size, 1585 VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? 1586 VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); 1587 } 1588 } else { 1589 /* 1590 * If this mapping was accounted for in the vnode's 1591 * writecount, then undo that now. 1592 */ 1593 if (writecounted) 1594 vnode_pager_release_writecount(object, 0, size); 1595 /* 1596 * Lose the object reference. Will destroy the 1597 * object if it's an unnamed anonymous mapping 1598 * or named anonymous without other references. 1599 */ 1600 vm_object_deallocate(object); 1601 } 1602 return (vm_mmap_to_errno(rv)); 1603 } 1604 1605 /* 1606 * Translate a Mach VM return code to zero on success or the appropriate errno 1607 * on failure. 1608 */ 1609 int 1610 vm_mmap_to_errno(int rv) 1611 { 1612 1613 switch (rv) { 1614 case KERN_SUCCESS: 1615 return (0); 1616 case KERN_INVALID_ADDRESS: 1617 case KERN_NO_SPACE: 1618 return (ENOMEM); 1619 case KERN_PROTECTION_FAILURE: 1620 return (EACCES); 1621 default: 1622 return (EINVAL); 1623 } 1624 } 1625