1 /*- 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35 * 36 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37 */ 38 39 /* 40 * Mapped file (mmap) interface to VM 41 */ 42 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include "opt_compat.h" 47 #include "opt_hwpmc_hooks.h" 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/capability.h> 52 #include <sys/kernel.h> 53 #include <sys/lock.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/filedesc.h> 57 #include <sys/priv.h> 58 #include <sys/proc.h> 59 #include <sys/racct.h> 60 #include <sys/resource.h> 61 #include <sys/resourcevar.h> 62 #include <sys/vnode.h> 63 #include <sys/fcntl.h> 64 #include <sys/file.h> 65 #include <sys/mman.h> 66 #include <sys/mount.h> 67 #include <sys/conf.h> 68 #include <sys/stat.h> 69 #include <sys/sysent.h> 70 #include <sys/vmmeter.h> 71 72 #include <security/mac/mac_framework.h> 73 74 #include <vm/vm.h> 75 #include <vm/vm_param.h> 76 #include <vm/pmap.h> 77 #include <vm/vm_map.h> 78 #include <vm/vm_object.h> 79 #include <vm/vm_page.h> 80 #include <vm/vm_pager.h> 81 #include <vm/vm_pageout.h> 82 #include <vm/vm_extern.h> 83 #include <vm/vm_page.h> 84 #include <vm/vnode_pager.h> 85 86 #ifdef HWPMC_HOOKS 87 #include <sys/pmckern.h> 88 #endif 89 90 #ifndef _SYS_SYSPROTO_H_ 91 struct sbrk_args { 92 int incr; 93 }; 94 #endif 95 96 static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 97 int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *); 98 static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 99 int *, struct cdev *, vm_ooffset_t *, vm_object_t *); 100 static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 101 int *, struct shmfd *, vm_ooffset_t, vm_object_t *); 102 103 /* 104 * MPSAFE 105 */ 106 /* ARGSUSED */ 107 int 108 sys_sbrk(td, uap) 109 struct thread *td; 110 struct sbrk_args *uap; 111 { 112 /* Not yet implemented */ 113 return (EOPNOTSUPP); 114 } 115 116 #ifndef _SYS_SYSPROTO_H_ 117 struct sstk_args { 118 int incr; 119 }; 120 #endif 121 122 /* 123 * MPSAFE 124 */ 125 /* ARGSUSED */ 126 int 127 sys_sstk(td, uap) 128 struct thread *td; 129 struct sstk_args *uap; 130 { 131 /* Not yet implemented */ 132 return (EOPNOTSUPP); 133 } 134 135 #if defined(COMPAT_43) 136 #ifndef _SYS_SYSPROTO_H_ 137 struct getpagesize_args { 138 int dummy; 139 }; 140 #endif 141 142 int 143 ogetpagesize(td, uap) 144 struct thread *td; 145 struct getpagesize_args *uap; 146 { 147 /* MP SAFE */ 148 td->td_retval[0] = PAGE_SIZE; 149 return (0); 150 } 151 #endif /* COMPAT_43 */ 152 153 154 /* 155 * Memory Map (mmap) system call. Note that the file offset 156 * and address are allowed to be NOT page aligned, though if 157 * the MAP_FIXED flag it set, both must have the same remainder 158 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 159 * page-aligned, the actual mapping starts at trunc_page(addr) 160 * and the return value is adjusted up by the page offset. 161 * 162 * Generally speaking, only character devices which are themselves 163 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 164 * there would be no cache coherency between a descriptor and a VM mapping 165 * both to the same character device. 166 */ 167 #ifndef _SYS_SYSPROTO_H_ 168 struct mmap_args { 169 void *addr; 170 size_t len; 171 int prot; 172 int flags; 173 int fd; 174 long pad; 175 off_t pos; 176 }; 177 #endif 178 179 /* 180 * MPSAFE 181 */ 182 int 183 sys_mmap(td, uap) 184 struct thread *td; 185 struct mmap_args *uap; 186 { 187 #ifdef HWPMC_HOOKS 188 struct pmckern_map_in pkm; 189 #endif 190 struct file *fp; 191 struct vnode *vp; 192 vm_offset_t addr; 193 vm_size_t size, pageoff; 194 vm_prot_t cap_maxprot, prot, maxprot; 195 void *handle; 196 objtype_t handle_type; 197 int flags, error; 198 off_t pos; 199 struct vmspace *vms = td->td_proc->p_vmspace; 200 cap_rights_t rights; 201 202 addr = (vm_offset_t) uap->addr; 203 size = uap->len; 204 prot = uap->prot & VM_PROT_ALL; 205 flags = uap->flags; 206 pos = uap->pos; 207 208 fp = NULL; 209 210 /* 211 * Enforce the constraints. 212 * Mapping of length 0 is only allowed for old binaries. 213 * Anonymous mapping shall specify -1 as filedescriptor and 214 * zero position for new code. Be nice to ancient a.out 215 * binaries and correct pos for anonymous mapping, since old 216 * ld.so sometimes issues anonymous map requests with non-zero 217 * pos. 218 */ 219 if (!SV_CURPROC_FLAG(SV_AOUT)) { 220 if ((uap->len == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) || 221 ((flags & MAP_ANON) != 0 && (uap->fd != -1 || pos != 0))) 222 return (EINVAL); 223 } else { 224 if ((flags & MAP_ANON) != 0) 225 pos = 0; 226 } 227 228 if (flags & MAP_STACK) { 229 if ((uap->fd != -1) || 230 ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 231 return (EINVAL); 232 flags |= MAP_ANON; 233 pos = 0; 234 } 235 236 /* 237 * Align the file position to a page boundary, 238 * and save its page offset component. 239 */ 240 pageoff = (pos & PAGE_MASK); 241 pos -= pageoff; 242 243 /* Adjust size for rounding (on both ends). */ 244 size += pageoff; /* low end... */ 245 size = (vm_size_t) round_page(size); /* hi end */ 246 247 /* 248 * Check for illegal addresses. Watch out for address wrap... Note 249 * that VM_*_ADDRESS are not constants due to casts (argh). 250 */ 251 if (flags & MAP_FIXED) { 252 /* 253 * The specified address must have the same remainder 254 * as the file offset taken modulo PAGE_SIZE, so it 255 * should be aligned after adjustment by pageoff. 256 */ 257 addr -= pageoff; 258 if (addr & PAGE_MASK) 259 return (EINVAL); 260 261 /* Address range must be all in user VM space. */ 262 if (addr < vm_map_min(&vms->vm_map) || 263 addr + size > vm_map_max(&vms->vm_map)) 264 return (EINVAL); 265 if (addr + size < addr) 266 return (EINVAL); 267 } else { 268 /* 269 * XXX for non-fixed mappings where no hint is provided or 270 * the hint would fall in the potential heap space, 271 * place it after the end of the largest possible heap. 272 * 273 * There should really be a pmap call to determine a reasonable 274 * location. 275 */ 276 PROC_LOCK(td->td_proc); 277 if (addr == 0 || 278 (addr >= round_page((vm_offset_t)vms->vm_taddr) && 279 addr < round_page((vm_offset_t)vms->vm_daddr + 280 lim_max(td->td_proc, RLIMIT_DATA)))) 281 addr = round_page((vm_offset_t)vms->vm_daddr + 282 lim_max(td->td_proc, RLIMIT_DATA)); 283 PROC_UNLOCK(td->td_proc); 284 } 285 if (flags & MAP_ANON) { 286 /* 287 * Mapping blank space is trivial. 288 */ 289 handle = NULL; 290 handle_type = OBJT_DEFAULT; 291 maxprot = VM_PROT_ALL; 292 cap_maxprot = VM_PROT_ALL; 293 } else { 294 /* 295 * Mapping file, get fp for validation and don't let the 296 * descriptor disappear on us if we block. Check capability 297 * rights, but also return the maximum rights to be combined 298 * with maxprot later. 299 */ 300 rights = CAP_MMAP; 301 if (prot & PROT_READ) 302 rights |= CAP_READ; 303 if ((flags & MAP_SHARED) != 0) { 304 if (prot & PROT_WRITE) 305 rights |= CAP_WRITE; 306 } 307 if (prot & PROT_EXEC) 308 rights |= CAP_MAPEXEC; 309 if ((error = fget_mmap(td, uap->fd, rights, &cap_maxprot, 310 &fp)) != 0) 311 goto done; 312 if (fp->f_type == DTYPE_SHM) { 313 handle = fp->f_data; 314 handle_type = OBJT_SWAP; 315 maxprot = VM_PROT_NONE; 316 317 /* FREAD should always be set. */ 318 if (fp->f_flag & FREAD) 319 maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; 320 if (fp->f_flag & FWRITE) 321 maxprot |= VM_PROT_WRITE; 322 goto map; 323 } 324 if (fp->f_type != DTYPE_VNODE) { 325 error = ENODEV; 326 goto done; 327 } 328 #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ 329 defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) 330 /* 331 * POSIX shared-memory objects are defined to have 332 * kernel persistence, and are not defined to support 333 * read(2)/write(2) -- or even open(2). Thus, we can 334 * use MAP_ASYNC to trade on-disk coherence for speed. 335 * The shm_open(3) library routine turns on the FPOSIXSHM 336 * flag to request this behavior. 337 */ 338 if (fp->f_flag & FPOSIXSHM) 339 flags |= MAP_NOSYNC; 340 #endif 341 vp = fp->f_vnode; 342 /* 343 * Ensure that file and memory protections are 344 * compatible. Note that we only worry about 345 * writability if mapping is shared; in this case, 346 * current and max prot are dictated by the open file. 347 * XXX use the vnode instead? Problem is: what 348 * credentials do we use for determination? What if 349 * proc does a setuid? 350 */ 351 if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC) 352 maxprot = VM_PROT_NONE; 353 else 354 maxprot = VM_PROT_EXECUTE; 355 if (fp->f_flag & FREAD) { 356 maxprot |= VM_PROT_READ; 357 } else if (prot & PROT_READ) { 358 error = EACCES; 359 goto done; 360 } 361 /* 362 * If we are sharing potential changes (either via 363 * MAP_SHARED or via the implicit sharing of character 364 * device mappings), and we are trying to get write 365 * permission although we opened it without asking 366 * for it, bail out. 367 */ 368 if ((flags & MAP_SHARED) != 0) { 369 if ((fp->f_flag & FWRITE) != 0) { 370 maxprot |= VM_PROT_WRITE; 371 } else if ((prot & PROT_WRITE) != 0) { 372 error = EACCES; 373 goto done; 374 } 375 } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) { 376 maxprot |= VM_PROT_WRITE; 377 cap_maxprot |= VM_PROT_WRITE; 378 } 379 handle = (void *)vp; 380 handle_type = OBJT_VNODE; 381 } 382 map: 383 td->td_fpop = fp; 384 maxprot &= cap_maxprot; 385 error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, 386 flags, handle_type, handle, pos); 387 td->td_fpop = NULL; 388 #ifdef HWPMC_HOOKS 389 /* inform hwpmc(4) if an executable is being mapped */ 390 if (error == 0 && handle_type == OBJT_VNODE && 391 (prot & PROT_EXEC)) { 392 pkm.pm_file = handle; 393 pkm.pm_address = (uintptr_t) addr; 394 PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); 395 } 396 #endif 397 if (error == 0) 398 td->td_retval[0] = (register_t) (addr + pageoff); 399 done: 400 if (fp) 401 fdrop(fp, td); 402 403 return (error); 404 } 405 406 int 407 freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 408 { 409 struct mmap_args oargs; 410 411 oargs.addr = uap->addr; 412 oargs.len = uap->len; 413 oargs.prot = uap->prot; 414 oargs.flags = uap->flags; 415 oargs.fd = uap->fd; 416 oargs.pos = uap->pos; 417 return (sys_mmap(td, &oargs)); 418 } 419 420 #ifdef COMPAT_43 421 #ifndef _SYS_SYSPROTO_H_ 422 struct ommap_args { 423 caddr_t addr; 424 int len; 425 int prot; 426 int flags; 427 int fd; 428 long pos; 429 }; 430 #endif 431 int 432 ommap(td, uap) 433 struct thread *td; 434 struct ommap_args *uap; 435 { 436 struct mmap_args nargs; 437 static const char cvtbsdprot[8] = { 438 0, 439 PROT_EXEC, 440 PROT_WRITE, 441 PROT_EXEC | PROT_WRITE, 442 PROT_READ, 443 PROT_EXEC | PROT_READ, 444 PROT_WRITE | PROT_READ, 445 PROT_EXEC | PROT_WRITE | PROT_READ, 446 }; 447 448 #define OMAP_ANON 0x0002 449 #define OMAP_COPY 0x0020 450 #define OMAP_SHARED 0x0010 451 #define OMAP_FIXED 0x0100 452 453 nargs.addr = uap->addr; 454 nargs.len = uap->len; 455 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 456 #ifdef COMPAT_FREEBSD32 457 #if defined(__amd64__) || defined(__ia64__) 458 if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && 459 nargs.prot != 0) 460 nargs.prot |= PROT_EXEC; 461 #endif 462 #endif 463 nargs.flags = 0; 464 if (uap->flags & OMAP_ANON) 465 nargs.flags |= MAP_ANON; 466 if (uap->flags & OMAP_COPY) 467 nargs.flags |= MAP_COPY; 468 if (uap->flags & OMAP_SHARED) 469 nargs.flags |= MAP_SHARED; 470 else 471 nargs.flags |= MAP_PRIVATE; 472 if (uap->flags & OMAP_FIXED) 473 nargs.flags |= MAP_FIXED; 474 nargs.fd = uap->fd; 475 nargs.pos = uap->pos; 476 return (sys_mmap(td, &nargs)); 477 } 478 #endif /* COMPAT_43 */ 479 480 481 #ifndef _SYS_SYSPROTO_H_ 482 struct msync_args { 483 void *addr; 484 size_t len; 485 int flags; 486 }; 487 #endif 488 /* 489 * MPSAFE 490 */ 491 int 492 sys_msync(td, uap) 493 struct thread *td; 494 struct msync_args *uap; 495 { 496 vm_offset_t addr; 497 vm_size_t size, pageoff; 498 int flags; 499 vm_map_t map; 500 int rv; 501 502 addr = (vm_offset_t) uap->addr; 503 size = uap->len; 504 flags = uap->flags; 505 506 pageoff = (addr & PAGE_MASK); 507 addr -= pageoff; 508 size += pageoff; 509 size = (vm_size_t) round_page(size); 510 if (addr + size < addr) 511 return (EINVAL); 512 513 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 514 return (EINVAL); 515 516 map = &td->td_proc->p_vmspace->vm_map; 517 518 /* 519 * Clean the pages and interpret the return value. 520 */ 521 rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 522 (flags & MS_INVALIDATE) != 0); 523 switch (rv) { 524 case KERN_SUCCESS: 525 return (0); 526 case KERN_INVALID_ADDRESS: 527 return (EINVAL); /* Sun returns ENOMEM? */ 528 case KERN_INVALID_ARGUMENT: 529 return (EBUSY); 530 case KERN_FAILURE: 531 return (EIO); 532 default: 533 return (EINVAL); 534 } 535 } 536 537 #ifndef _SYS_SYSPROTO_H_ 538 struct munmap_args { 539 void *addr; 540 size_t len; 541 }; 542 #endif 543 /* 544 * MPSAFE 545 */ 546 int 547 sys_munmap(td, uap) 548 struct thread *td; 549 struct munmap_args *uap; 550 { 551 #ifdef HWPMC_HOOKS 552 struct pmckern_map_out pkm; 553 vm_map_entry_t entry; 554 #endif 555 vm_offset_t addr; 556 vm_size_t size, pageoff; 557 vm_map_t map; 558 559 addr = (vm_offset_t) uap->addr; 560 size = uap->len; 561 if (size == 0) 562 return (EINVAL); 563 564 pageoff = (addr & PAGE_MASK); 565 addr -= pageoff; 566 size += pageoff; 567 size = (vm_size_t) round_page(size); 568 if (addr + size < addr) 569 return (EINVAL); 570 571 /* 572 * Check for illegal addresses. Watch out for address wrap... 573 */ 574 map = &td->td_proc->p_vmspace->vm_map; 575 if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 576 return (EINVAL); 577 vm_map_lock(map); 578 #ifdef HWPMC_HOOKS 579 /* 580 * Inform hwpmc if the address range being unmapped contains 581 * an executable region. 582 */ 583 pkm.pm_address = (uintptr_t) NULL; 584 if (vm_map_lookup_entry(map, addr, &entry)) { 585 for (; 586 entry != &map->header && entry->start < addr + size; 587 entry = entry->next) { 588 if (vm_map_check_protection(map, entry->start, 589 entry->end, VM_PROT_EXECUTE) == TRUE) { 590 pkm.pm_address = (uintptr_t) addr; 591 pkm.pm_size = (size_t) size; 592 break; 593 } 594 } 595 } 596 #endif 597 vm_map_delete(map, addr, addr + size); 598 599 #ifdef HWPMC_HOOKS 600 /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 601 vm_map_lock_downgrade(map); 602 if (pkm.pm_address != (uintptr_t) NULL) 603 PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 604 vm_map_unlock_read(map); 605 #else 606 vm_map_unlock(map); 607 #endif 608 /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 609 return (0); 610 } 611 612 #ifndef _SYS_SYSPROTO_H_ 613 struct mprotect_args { 614 const void *addr; 615 size_t len; 616 int prot; 617 }; 618 #endif 619 /* 620 * MPSAFE 621 */ 622 int 623 sys_mprotect(td, uap) 624 struct thread *td; 625 struct mprotect_args *uap; 626 { 627 vm_offset_t addr; 628 vm_size_t size, pageoff; 629 vm_prot_t prot; 630 631 addr = (vm_offset_t) uap->addr; 632 size = uap->len; 633 prot = uap->prot & VM_PROT_ALL; 634 635 pageoff = (addr & PAGE_MASK); 636 addr -= pageoff; 637 size += pageoff; 638 size = (vm_size_t) round_page(size); 639 if (addr + size < addr) 640 return (EINVAL); 641 642 switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 643 addr + size, prot, FALSE)) { 644 case KERN_SUCCESS: 645 return (0); 646 case KERN_PROTECTION_FAILURE: 647 return (EACCES); 648 case KERN_RESOURCE_SHORTAGE: 649 return (ENOMEM); 650 } 651 return (EINVAL); 652 } 653 654 #ifndef _SYS_SYSPROTO_H_ 655 struct minherit_args { 656 void *addr; 657 size_t len; 658 int inherit; 659 }; 660 #endif 661 /* 662 * MPSAFE 663 */ 664 int 665 sys_minherit(td, uap) 666 struct thread *td; 667 struct minherit_args *uap; 668 { 669 vm_offset_t addr; 670 vm_size_t size, pageoff; 671 vm_inherit_t inherit; 672 673 addr = (vm_offset_t)uap->addr; 674 size = uap->len; 675 inherit = uap->inherit; 676 677 pageoff = (addr & PAGE_MASK); 678 addr -= pageoff; 679 size += pageoff; 680 size = (vm_size_t) round_page(size); 681 if (addr + size < addr) 682 return (EINVAL); 683 684 switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 685 addr + size, inherit)) { 686 case KERN_SUCCESS: 687 return (0); 688 case KERN_PROTECTION_FAILURE: 689 return (EACCES); 690 } 691 return (EINVAL); 692 } 693 694 #ifndef _SYS_SYSPROTO_H_ 695 struct madvise_args { 696 void *addr; 697 size_t len; 698 int behav; 699 }; 700 #endif 701 702 /* 703 * MPSAFE 704 */ 705 int 706 sys_madvise(td, uap) 707 struct thread *td; 708 struct madvise_args *uap; 709 { 710 vm_offset_t start, end; 711 vm_map_t map; 712 struct proc *p; 713 int error; 714 715 /* 716 * Check for our special case, advising the swap pager we are 717 * "immortal." 718 */ 719 if (uap->behav == MADV_PROTECT) { 720 error = priv_check(td, PRIV_VM_MADV_PROTECT); 721 if (error == 0) { 722 p = td->td_proc; 723 PROC_LOCK(p); 724 p->p_flag |= P_PROTECTED; 725 PROC_UNLOCK(p); 726 } 727 return (error); 728 } 729 /* 730 * Check for illegal behavior 731 */ 732 if (uap->behav < 0 || uap->behav > MADV_CORE) 733 return (EINVAL); 734 /* 735 * Check for illegal addresses. Watch out for address wrap... Note 736 * that VM_*_ADDRESS are not constants due to casts (argh). 737 */ 738 map = &td->td_proc->p_vmspace->vm_map; 739 if ((vm_offset_t)uap->addr < vm_map_min(map) || 740 (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) 741 return (EINVAL); 742 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 743 return (EINVAL); 744 745 /* 746 * Since this routine is only advisory, we default to conservative 747 * behavior. 748 */ 749 start = trunc_page((vm_offset_t) uap->addr); 750 end = round_page((vm_offset_t) uap->addr + uap->len); 751 752 if (vm_map_madvise(map, start, end, uap->behav)) 753 return (EINVAL); 754 return (0); 755 } 756 757 #ifndef _SYS_SYSPROTO_H_ 758 struct mincore_args { 759 const void *addr; 760 size_t len; 761 char *vec; 762 }; 763 #endif 764 765 /* 766 * MPSAFE 767 */ 768 int 769 sys_mincore(td, uap) 770 struct thread *td; 771 struct mincore_args *uap; 772 { 773 vm_offset_t addr, first_addr; 774 vm_offset_t end, cend; 775 pmap_t pmap; 776 vm_map_t map; 777 char *vec; 778 int error = 0; 779 int vecindex, lastvecindex; 780 vm_map_entry_t current; 781 vm_map_entry_t entry; 782 vm_object_t object; 783 vm_paddr_t locked_pa; 784 vm_page_t m; 785 vm_pindex_t pindex; 786 int mincoreinfo; 787 unsigned int timestamp; 788 boolean_t locked; 789 790 /* 791 * Make sure that the addresses presented are valid for user 792 * mode. 793 */ 794 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 795 end = addr + (vm_size_t)round_page(uap->len); 796 map = &td->td_proc->p_vmspace->vm_map; 797 if (end > vm_map_max(map) || end < addr) 798 return (ENOMEM); 799 800 /* 801 * Address of byte vector 802 */ 803 vec = uap->vec; 804 805 pmap = vmspace_pmap(td->td_proc->p_vmspace); 806 807 vm_map_lock_read(map); 808 RestartScan: 809 timestamp = map->timestamp; 810 811 if (!vm_map_lookup_entry(map, addr, &entry)) { 812 vm_map_unlock_read(map); 813 return (ENOMEM); 814 } 815 816 /* 817 * Do this on a map entry basis so that if the pages are not 818 * in the current processes address space, we can easily look 819 * up the pages elsewhere. 820 */ 821 lastvecindex = -1; 822 for (current = entry; 823 (current != &map->header) && (current->start < end); 824 current = current->next) { 825 826 /* 827 * check for contiguity 828 */ 829 if (current->end < end && 830 (entry->next == &map->header || 831 current->next->start > current->end)) { 832 vm_map_unlock_read(map); 833 return (ENOMEM); 834 } 835 836 /* 837 * ignore submaps (for now) or null objects 838 */ 839 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 840 current->object.vm_object == NULL) 841 continue; 842 843 /* 844 * limit this scan to the current map entry and the 845 * limits for the mincore call 846 */ 847 if (addr < current->start) 848 addr = current->start; 849 cend = current->end; 850 if (cend > end) 851 cend = end; 852 853 /* 854 * scan this entry one page at a time 855 */ 856 while (addr < cend) { 857 /* 858 * Check pmap first, it is likely faster, also 859 * it can provide info as to whether we are the 860 * one referencing or modifying the page. 861 */ 862 object = NULL; 863 locked_pa = 0; 864 retry: 865 m = NULL; 866 mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 867 if (locked_pa != 0) { 868 /* 869 * The page is mapped by this process but not 870 * both accessed and modified. It is also 871 * managed. Acquire the object lock so that 872 * other mappings might be examined. 873 */ 874 m = PHYS_TO_VM_PAGE(locked_pa); 875 if (m->object != object) { 876 if (object != NULL) 877 VM_OBJECT_UNLOCK(object); 878 object = m->object; 879 locked = VM_OBJECT_TRYLOCK(object); 880 vm_page_unlock(m); 881 if (!locked) { 882 VM_OBJECT_LOCK(object); 883 vm_page_lock(m); 884 goto retry; 885 } 886 } else 887 vm_page_unlock(m); 888 KASSERT(m->valid == VM_PAGE_BITS_ALL, 889 ("mincore: page %p is mapped but invalid", 890 m)); 891 } else if (mincoreinfo == 0) { 892 /* 893 * The page is not mapped by this process. If 894 * the object implements managed pages, then 895 * determine if the page is resident so that 896 * the mappings might be examined. 897 */ 898 if (current->object.vm_object != object) { 899 if (object != NULL) 900 VM_OBJECT_UNLOCK(object); 901 object = current->object.vm_object; 902 VM_OBJECT_LOCK(object); 903 } 904 if (object->type == OBJT_DEFAULT || 905 object->type == OBJT_SWAP || 906 object->type == OBJT_VNODE) { 907 pindex = OFF_TO_IDX(current->offset + 908 (addr - current->start)); 909 m = vm_page_lookup(object, pindex); 910 if (m == NULL && 911 vm_page_is_cached(object, pindex)) 912 mincoreinfo = MINCORE_INCORE; 913 if (m != NULL && m->valid == 0) 914 m = NULL; 915 if (m != NULL) 916 mincoreinfo = MINCORE_INCORE; 917 } 918 } 919 if (m != NULL) { 920 /* Examine other mappings to the page. */ 921 if (m->dirty == 0 && pmap_is_modified(m)) 922 vm_page_dirty(m); 923 if (m->dirty != 0) 924 mincoreinfo |= MINCORE_MODIFIED_OTHER; 925 /* 926 * The first test for PGA_REFERENCED is an 927 * optimization. The second test is 928 * required because a concurrent pmap 929 * operation could clear the last reference 930 * and set PGA_REFERENCED before the call to 931 * pmap_is_referenced(). 932 */ 933 if ((m->aflags & PGA_REFERENCED) != 0 || 934 pmap_is_referenced(m) || 935 (m->aflags & PGA_REFERENCED) != 0) 936 mincoreinfo |= MINCORE_REFERENCED_OTHER; 937 } 938 if (object != NULL) 939 VM_OBJECT_UNLOCK(object); 940 941 /* 942 * subyte may page fault. In case it needs to modify 943 * the map, we release the lock. 944 */ 945 vm_map_unlock_read(map); 946 947 /* 948 * calculate index into user supplied byte vector 949 */ 950 vecindex = OFF_TO_IDX(addr - first_addr); 951 952 /* 953 * If we have skipped map entries, we need to make sure that 954 * the byte vector is zeroed for those skipped entries. 955 */ 956 while ((lastvecindex + 1) < vecindex) { 957 error = subyte(vec + lastvecindex, 0); 958 if (error) { 959 error = EFAULT; 960 goto done2; 961 } 962 ++lastvecindex; 963 } 964 965 /* 966 * Pass the page information to the user 967 */ 968 error = subyte(vec + vecindex, mincoreinfo); 969 if (error) { 970 error = EFAULT; 971 goto done2; 972 } 973 974 /* 975 * If the map has changed, due to the subyte, the previous 976 * output may be invalid. 977 */ 978 vm_map_lock_read(map); 979 if (timestamp != map->timestamp) 980 goto RestartScan; 981 982 lastvecindex = vecindex; 983 addr += PAGE_SIZE; 984 } 985 } 986 987 /* 988 * subyte may page fault. In case it needs to modify 989 * the map, we release the lock. 990 */ 991 vm_map_unlock_read(map); 992 993 /* 994 * Zero the last entries in the byte vector. 995 */ 996 vecindex = OFF_TO_IDX(end - first_addr); 997 while ((lastvecindex + 1) < vecindex) { 998 error = subyte(vec + lastvecindex, 0); 999 if (error) { 1000 error = EFAULT; 1001 goto done2; 1002 } 1003 ++lastvecindex; 1004 } 1005 1006 /* 1007 * If the map has changed, due to the subyte, the previous 1008 * output may be invalid. 1009 */ 1010 vm_map_lock_read(map); 1011 if (timestamp != map->timestamp) 1012 goto RestartScan; 1013 vm_map_unlock_read(map); 1014 done2: 1015 return (error); 1016 } 1017 1018 #ifndef _SYS_SYSPROTO_H_ 1019 struct mlock_args { 1020 const void *addr; 1021 size_t len; 1022 }; 1023 #endif 1024 /* 1025 * MPSAFE 1026 */ 1027 int 1028 sys_mlock(td, uap) 1029 struct thread *td; 1030 struct mlock_args *uap; 1031 { 1032 struct proc *proc; 1033 vm_offset_t addr, end, last, start; 1034 vm_size_t npages, size; 1035 unsigned long nsize; 1036 int error; 1037 1038 error = priv_check(td, PRIV_VM_MLOCK); 1039 if (error) 1040 return (error); 1041 addr = (vm_offset_t)uap->addr; 1042 size = uap->len; 1043 last = addr + size; 1044 start = trunc_page(addr); 1045 end = round_page(last); 1046 if (last < addr || end < addr) 1047 return (EINVAL); 1048 npages = atop(end - start); 1049 if (npages > vm_page_max_wired) 1050 return (ENOMEM); 1051 proc = td->td_proc; 1052 PROC_LOCK(proc); 1053 nsize = ptoa(npages + vmspace_wired_count(proc->p_vmspace)); 1054 if (nsize > lim_cur(proc, RLIMIT_MEMLOCK)) { 1055 PROC_UNLOCK(proc); 1056 return (ENOMEM); 1057 } 1058 PROC_UNLOCK(proc); 1059 if (npages + cnt.v_wire_count > vm_page_max_wired) 1060 return (EAGAIN); 1061 #ifdef RACCT 1062 PROC_LOCK(proc); 1063 error = racct_set(proc, RACCT_MEMLOCK, nsize); 1064 PROC_UNLOCK(proc); 1065 if (error != 0) 1066 return (ENOMEM); 1067 #endif 1068 error = vm_map_wire(&proc->p_vmspace->vm_map, start, end, 1069 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1070 #ifdef RACCT 1071 if (error != KERN_SUCCESS) { 1072 PROC_LOCK(proc); 1073 racct_set(proc, RACCT_MEMLOCK, 1074 ptoa(vmspace_wired_count(proc->p_vmspace))); 1075 PROC_UNLOCK(proc); 1076 } 1077 #endif 1078 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1079 } 1080 1081 #ifndef _SYS_SYSPROTO_H_ 1082 struct mlockall_args { 1083 int how; 1084 }; 1085 #endif 1086 1087 /* 1088 * MPSAFE 1089 */ 1090 int 1091 sys_mlockall(td, uap) 1092 struct thread *td; 1093 struct mlockall_args *uap; 1094 { 1095 vm_map_t map; 1096 int error; 1097 1098 map = &td->td_proc->p_vmspace->vm_map; 1099 error = 0; 1100 1101 if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1102 return (EINVAL); 1103 1104 #if 0 1105 /* 1106 * If wiring all pages in the process would cause it to exceed 1107 * a hard resource limit, return ENOMEM. 1108 */ 1109 PROC_LOCK(td->td_proc); 1110 if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { 1111 PROC_UNLOCK(td->td_proc); 1112 return (ENOMEM); 1113 } 1114 PROC_UNLOCK(td->td_proc); 1115 #else 1116 error = priv_check(td, PRIV_VM_MLOCK); 1117 if (error) 1118 return (error); 1119 #endif 1120 #ifdef RACCT 1121 PROC_LOCK(td->td_proc); 1122 error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 1123 PROC_UNLOCK(td->td_proc); 1124 if (error != 0) 1125 return (ENOMEM); 1126 #endif 1127 1128 if (uap->how & MCL_FUTURE) { 1129 vm_map_lock(map); 1130 vm_map_modflags(map, MAP_WIREFUTURE, 0); 1131 vm_map_unlock(map); 1132 error = 0; 1133 } 1134 1135 if (uap->how & MCL_CURRENT) { 1136 /* 1137 * P1003.1-2001 mandates that all currently mapped pages 1138 * will be memory resident and locked (wired) upon return 1139 * from mlockall(). vm_map_wire() will wire pages, by 1140 * calling vm_fault_wire() for each page in the region. 1141 */ 1142 error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1143 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1144 error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1145 } 1146 #ifdef RACCT 1147 if (error != KERN_SUCCESS) { 1148 PROC_LOCK(td->td_proc); 1149 racct_set(td->td_proc, RACCT_MEMLOCK, 1150 ptoa(vmspace_wired_count(td->td_proc->p_vmspace))); 1151 PROC_UNLOCK(td->td_proc); 1152 } 1153 #endif 1154 1155 return (error); 1156 } 1157 1158 #ifndef _SYS_SYSPROTO_H_ 1159 struct munlockall_args { 1160 register_t dummy; 1161 }; 1162 #endif 1163 1164 /* 1165 * MPSAFE 1166 */ 1167 int 1168 sys_munlockall(td, uap) 1169 struct thread *td; 1170 struct munlockall_args *uap; 1171 { 1172 vm_map_t map; 1173 int error; 1174 1175 map = &td->td_proc->p_vmspace->vm_map; 1176 error = priv_check(td, PRIV_VM_MUNLOCK); 1177 if (error) 1178 return (error); 1179 1180 /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1181 vm_map_lock(map); 1182 vm_map_modflags(map, 0, MAP_WIREFUTURE); 1183 vm_map_unlock(map); 1184 1185 /* Forcibly unwire all pages. */ 1186 error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1187 VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1188 #ifdef RACCT 1189 if (error == KERN_SUCCESS) { 1190 PROC_LOCK(td->td_proc); 1191 racct_set(td->td_proc, RACCT_MEMLOCK, 0); 1192 PROC_UNLOCK(td->td_proc); 1193 } 1194 #endif 1195 1196 return (error); 1197 } 1198 1199 #ifndef _SYS_SYSPROTO_H_ 1200 struct munlock_args { 1201 const void *addr; 1202 size_t len; 1203 }; 1204 #endif 1205 /* 1206 * MPSAFE 1207 */ 1208 int 1209 sys_munlock(td, uap) 1210 struct thread *td; 1211 struct munlock_args *uap; 1212 { 1213 vm_offset_t addr, end, last, start; 1214 vm_size_t size; 1215 int error; 1216 1217 error = priv_check(td, PRIV_VM_MUNLOCK); 1218 if (error) 1219 return (error); 1220 addr = (vm_offset_t)uap->addr; 1221 size = uap->len; 1222 last = addr + size; 1223 start = trunc_page(addr); 1224 end = round_page(last); 1225 if (last < addr || end < addr) 1226 return (EINVAL); 1227 error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 1228 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1229 #ifdef RACCT 1230 if (error == KERN_SUCCESS) { 1231 PROC_LOCK(td->td_proc); 1232 racct_sub(td->td_proc, RACCT_MEMLOCK, ptoa(end - start)); 1233 PROC_UNLOCK(td->td_proc); 1234 } 1235 #endif 1236 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1237 } 1238 1239 /* 1240 * vm_mmap_vnode() 1241 * 1242 * Helper function for vm_mmap. Perform sanity check specific for mmap 1243 * operations on vnodes. 1244 * 1245 * For VCHR vnodes, the vnode lock is held over the call to 1246 * vm_mmap_cdev() to keep vp->v_rdev valid. 1247 */ 1248 int 1249 vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1250 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1251 struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 1252 boolean_t *writecounted) 1253 { 1254 struct vattr va; 1255 vm_object_t obj; 1256 vm_offset_t foff; 1257 struct mount *mp; 1258 struct ucred *cred; 1259 int error, flags, locktype; 1260 1261 mp = vp->v_mount; 1262 cred = td->td_ucred; 1263 if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) 1264 locktype = LK_EXCLUSIVE; 1265 else 1266 locktype = LK_SHARED; 1267 if ((error = vget(vp, locktype, td)) != 0) 1268 return (error); 1269 foff = *foffp; 1270 flags = *flagsp; 1271 obj = vp->v_object; 1272 if (vp->v_type == VREG) { 1273 /* 1274 * Get the proper underlying object 1275 */ 1276 if (obj == NULL) { 1277 error = EINVAL; 1278 goto done; 1279 } 1280 if (obj->handle != vp) { 1281 vput(vp); 1282 vp = (struct vnode *)obj->handle; 1283 /* 1284 * Bypass filesystems obey the mpsafety of the 1285 * underlying fs. 1286 */ 1287 error = vget(vp, locktype, td); 1288 if (error != 0) 1289 return (error); 1290 } 1291 if (locktype == LK_EXCLUSIVE) { 1292 *writecounted = TRUE; 1293 vnode_pager_update_writecount(obj, 0, objsize); 1294 } 1295 } else if (vp->v_type == VCHR) { 1296 error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp, 1297 vp->v_rdev, foffp, objp); 1298 if (error == 0) 1299 goto mark_atime; 1300 goto done; 1301 } else { 1302 error = EINVAL; 1303 goto done; 1304 } 1305 if ((error = VOP_GETATTR(vp, &va, cred))) 1306 goto done; 1307 #ifdef MAC 1308 error = mac_vnode_check_mmap(cred, vp, prot, flags); 1309 if (error != 0) 1310 goto done; 1311 #endif 1312 if ((flags & MAP_SHARED) != 0) { 1313 if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1314 if (prot & PROT_WRITE) { 1315 error = EPERM; 1316 goto done; 1317 } 1318 *maxprotp &= ~VM_PROT_WRITE; 1319 } 1320 } 1321 /* 1322 * If it is a regular file without any references 1323 * we do not need to sync it. 1324 * Adjust object size to be the size of actual file. 1325 */ 1326 objsize = round_page(va.va_size); 1327 if (va.va_nlink == 0) 1328 flags |= MAP_NOSYNC; 1329 obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, cred); 1330 if (obj == NULL) { 1331 error = ENOMEM; 1332 goto done; 1333 } 1334 *objp = obj; 1335 *flagsp = flags; 1336 1337 mark_atime: 1338 vfs_mark_atime(vp, cred); 1339 1340 done: 1341 vput(vp); 1342 return (error); 1343 } 1344 1345 /* 1346 * vm_mmap_cdev() 1347 * 1348 * MPSAFE 1349 * 1350 * Helper function for vm_mmap. Perform sanity check specific for mmap 1351 * operations on cdevs. 1352 */ 1353 int 1354 vm_mmap_cdev(struct thread *td, vm_size_t objsize, 1355 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1356 struct cdev *cdev, vm_ooffset_t *foff, vm_object_t *objp) 1357 { 1358 vm_object_t obj; 1359 struct cdevsw *dsw; 1360 int error, flags, ref; 1361 1362 flags = *flagsp; 1363 1364 dsw = dev_refthread(cdev, &ref); 1365 if (dsw == NULL) 1366 return (ENXIO); 1367 if (dsw->d_flags & D_MMAP_ANON) { 1368 dev_relthread(cdev, ref); 1369 *maxprotp = VM_PROT_ALL; 1370 *flagsp |= MAP_ANON; 1371 return (0); 1372 } 1373 /* 1374 * cdevs do not provide private mappings of any kind. 1375 */ 1376 if ((*maxprotp & VM_PROT_WRITE) == 0 && 1377 (prot & PROT_WRITE) != 0) { 1378 dev_relthread(cdev, ref); 1379 return (EACCES); 1380 } 1381 if (flags & (MAP_PRIVATE|MAP_COPY)) { 1382 dev_relthread(cdev, ref); 1383 return (EINVAL); 1384 } 1385 /* 1386 * Force device mappings to be shared. 1387 */ 1388 flags |= MAP_SHARED; 1389 #ifdef MAC_XXX 1390 error = mac_cdev_check_mmap(td->td_ucred, cdev, prot); 1391 if (error != 0) { 1392 dev_relthread(cdev, ref); 1393 return (error); 1394 } 1395 #endif 1396 /* 1397 * First, try d_mmap_single(). If that is not implemented 1398 * (returns ENODEV), fall back to using the device pager. 1399 * Note that d_mmap_single() must return a reference to the 1400 * object (it needs to bump the reference count of the object 1401 * it returns somehow). 1402 * 1403 * XXX assumes VM_PROT_* == PROT_* 1404 */ 1405 error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 1406 dev_relthread(cdev, ref); 1407 if (error != ENODEV) 1408 return (error); 1409 obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 1410 td->td_ucred); 1411 if (obj == NULL) 1412 return (EINVAL); 1413 *objp = obj; 1414 *flagsp = flags; 1415 return (0); 1416 } 1417 1418 /* 1419 * vm_mmap_shm() 1420 * 1421 * MPSAFE 1422 * 1423 * Helper function for vm_mmap. Perform sanity check specific for mmap 1424 * operations on shm file descriptors. 1425 */ 1426 int 1427 vm_mmap_shm(struct thread *td, vm_size_t objsize, 1428 vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1429 struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp) 1430 { 1431 int error; 1432 1433 if ((*flagsp & MAP_SHARED) != 0 && 1434 (*maxprotp & VM_PROT_WRITE) == 0 && 1435 (prot & PROT_WRITE) != 0) 1436 return (EACCES); 1437 #ifdef MAC 1438 error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp); 1439 if (error != 0) 1440 return (error); 1441 #endif 1442 error = shm_mmap(shmfd, objsize, foff, objp); 1443 if (error) 1444 return (error); 1445 return (0); 1446 } 1447 1448 /* 1449 * vm_mmap() 1450 * 1451 * MPSAFE 1452 * 1453 * Internal version of mmap. Currently used by mmap, exec, and sys5 1454 * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. 1455 */ 1456 int 1457 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1458 vm_prot_t maxprot, int flags, 1459 objtype_t handle_type, void *handle, 1460 vm_ooffset_t foff) 1461 { 1462 boolean_t fitit; 1463 vm_object_t object = NULL; 1464 struct thread *td = curthread; 1465 int docow, error, rv; 1466 boolean_t writecounted; 1467 1468 if (size == 0) 1469 return (0); 1470 1471 size = round_page(size); 1472 1473 if (map == &td->td_proc->p_vmspace->vm_map) { 1474 PROC_LOCK(td->td_proc); 1475 if (map->size + size > lim_cur(td->td_proc, RLIMIT_VMEM)) { 1476 PROC_UNLOCK(td->td_proc); 1477 return (ENOMEM); 1478 } 1479 if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 1480 PROC_UNLOCK(td->td_proc); 1481 return (ENOMEM); 1482 } 1483 PROC_UNLOCK(td->td_proc); 1484 } 1485 1486 /* 1487 * We currently can only deal with page aligned file offsets. 1488 * The check is here rather than in the syscall because the 1489 * kernel calls this function internally for other mmaping 1490 * operations (such as in exec) and non-aligned offsets will 1491 * cause pmap inconsistencies...so we want to be sure to 1492 * disallow this in all cases. 1493 */ 1494 if (foff & PAGE_MASK) 1495 return (EINVAL); 1496 1497 if ((flags & MAP_FIXED) == 0) { 1498 fitit = TRUE; 1499 *addr = round_page(*addr); 1500 } else { 1501 if (*addr != trunc_page(*addr)) 1502 return (EINVAL); 1503 fitit = FALSE; 1504 } 1505 writecounted = FALSE; 1506 1507 /* 1508 * Lookup/allocate object. 1509 */ 1510 switch (handle_type) { 1511 case OBJT_DEVICE: 1512 error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, 1513 handle, &foff, &object); 1514 break; 1515 case OBJT_VNODE: 1516 error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 1517 handle, &foff, &object, &writecounted); 1518 break; 1519 case OBJT_SWAP: 1520 error = vm_mmap_shm(td, size, prot, &maxprot, &flags, 1521 handle, foff, &object); 1522 break; 1523 case OBJT_DEFAULT: 1524 if (handle == NULL) { 1525 error = 0; 1526 break; 1527 } 1528 /* FALLTHROUGH */ 1529 default: 1530 error = EINVAL; 1531 break; 1532 } 1533 if (error) 1534 return (error); 1535 if (flags & MAP_ANON) { 1536 object = NULL; 1537 docow = 0; 1538 /* 1539 * Unnamed anonymous regions always start at 0. 1540 */ 1541 if (handle == 0) 1542 foff = 0; 1543 } else if (flags & MAP_PREFAULT_READ) 1544 docow = MAP_PREFAULT; 1545 else 1546 docow = MAP_PREFAULT_PARTIAL; 1547 1548 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 1549 docow |= MAP_COPY_ON_WRITE; 1550 if (flags & MAP_NOSYNC) 1551 docow |= MAP_DISABLE_SYNCER; 1552 if (flags & MAP_NOCORE) 1553 docow |= MAP_DISABLE_COREDUMP; 1554 /* Shared memory is also shared with children. */ 1555 if (flags & MAP_SHARED) 1556 docow |= MAP_INHERIT_SHARE; 1557 if (writecounted) 1558 docow |= MAP_VN_WRITECOUNT; 1559 1560 if (flags & MAP_STACK) 1561 rv = vm_map_stack(map, *addr, size, prot, maxprot, 1562 docow | MAP_STACK_GROWS_DOWN); 1563 else if (fitit) 1564 rv = vm_map_find(map, object, foff, addr, size, 1565 object != NULL && object->type == OBJT_DEVICE ? 1566 VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, prot, maxprot, docow); 1567 else 1568 rv = vm_map_fixed(map, object, foff, *addr, size, 1569 prot, maxprot, docow); 1570 1571 if (rv == KERN_SUCCESS) { 1572 /* 1573 * If the process has requested that all future mappings 1574 * be wired, then heed this. 1575 */ 1576 if (map->flags & MAP_WIREFUTURE) { 1577 vm_map_wire(map, *addr, *addr + size, 1578 VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? 1579 VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); 1580 } 1581 } else { 1582 /* 1583 * If this mapping was accounted for in the vnode's 1584 * writecount, then undo that now. 1585 */ 1586 if (writecounted) 1587 vnode_pager_release_writecount(object, 0, size); 1588 /* 1589 * Lose the object reference. Will destroy the 1590 * object if it's an unnamed anonymous mapping 1591 * or named anonymous without other references. 1592 */ 1593 vm_object_deallocate(object); 1594 } 1595 return (vm_mmap_to_errno(rv)); 1596 } 1597 1598 /* 1599 * Translate a Mach VM return code to zero on success or the appropriate errno 1600 * on failure. 1601 */ 1602 int 1603 vm_mmap_to_errno(int rv) 1604 { 1605 1606 switch (rv) { 1607 case KERN_SUCCESS: 1608 return (0); 1609 case KERN_INVALID_ADDRESS: 1610 case KERN_NO_SPACE: 1611 return (ENOMEM); 1612 case KERN_PROTECTION_FAILURE: 1613 return (EACCES); 1614 default: 1615 return (EINVAL); 1616 } 1617 } 1618