160727d8bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1988 University of Utah. 3df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 4df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 5df8bae1dSRodney W. Grimes * 6df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 7df8bae1dSRodney W. Grimes * the Systems Programming Group of the University of Utah Computer 8df8bae1dSRodney W. Grimes * Science Department. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 19df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 20df8bae1dSRodney W. Grimes * without specific prior written permission. 21df8bae1dSRodney W. Grimes * 22df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32df8bae1dSRodney W. Grimes * SUCH DAMAGE. 33df8bae1dSRodney W. Grimes * 34df8bae1dSRodney W. Grimes * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35df8bae1dSRodney W. Grimes * 36df8bae1dSRodney W. Grimes * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37df8bae1dSRodney W. Grimes */ 38df8bae1dSRodney W. Grimes 39df8bae1dSRodney W. Grimes /* 40df8bae1dSRodney W. Grimes * Mapped file (mmap) interface to VM 41df8bae1dSRodney W. Grimes */ 42df8bae1dSRodney W. Grimes 43874651b1SDavid E. O'Brien #include <sys/cdefs.h> 44874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 45874651b1SDavid E. O'Brien 465591b823SEivind Eklund #include "opt_compat.h" 4749874f6eSJoseph Koshy #include "opt_hwpmc_hooks.h" 48e9822d92SJoerg Wunsch 49df8bae1dSRodney W. Grimes #include <sys/param.h> 50df8bae1dSRodney W. Grimes #include <sys/systm.h> 51a9d2f8d8SRobert Watson #include <sys/capability.h> 52a9d2f8d8SRobert Watson #include <sys/kernel.h> 53fb919e4dSMark Murray #include <sys/lock.h> 5423955314SAlfred Perlstein #include <sys/mutex.h> 55d2d3e875SBruce Evans #include <sys/sysproto.h> 56df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 57acd3428bSRobert Watson #include <sys/priv.h> 58df8bae1dSRodney W. Grimes #include <sys/proc.h> 59*55648840SJohn Baldwin #include <sys/procctl.h> 601ba5ad42SEdward Tomasz Napierala #include <sys/racct.h> 61070f64feSMatthew Dillon #include <sys/resource.h> 62070f64feSMatthew Dillon #include <sys/resourcevar.h> 6389f6b863SAttilio Rao #include <sys/rwlock.h> 647e19eda4SAndrey Zonov #include <sys/sysctl.h> 65df8bae1dSRodney W. Grimes #include <sys/vnode.h> 663ac4d1efSBruce Evans #include <sys/fcntl.h> 67df8bae1dSRodney W. Grimes #include <sys/file.h> 68df8bae1dSRodney W. Grimes #include <sys/mman.h> 69b483c7f6SGuido van Rooij #include <sys/mount.h> 70df8bae1dSRodney W. Grimes #include <sys/conf.h> 714183b6b6SPeter Wemm #include <sys/stat.h> 72*55648840SJohn Baldwin #include <sys/syscallsubr.h> 73497a8238SKonstantin Belousov #include <sys/sysent.h> 74efeaf95aSDavid Greenman #include <sys/vmmeter.h> 75df8bae1dSRodney W. Grimes 76aed55708SRobert Watson #include <security/mac/mac_framework.h> 77aed55708SRobert Watson 78df8bae1dSRodney W. Grimes #include <vm/vm.h> 79efeaf95aSDavid Greenman #include <vm/vm_param.h> 80efeaf95aSDavid Greenman #include <vm/pmap.h> 81efeaf95aSDavid Greenman #include <vm/vm_map.h> 82efeaf95aSDavid Greenman #include <vm/vm_object.h> 831c7c3c6aSMatthew Dillon #include <vm/vm_page.h> 84df8bae1dSRodney W. Grimes #include <vm/vm_pager.h> 85b5e8ce9fSBruce Evans #include <vm/vm_pageout.h> 86efeaf95aSDavid Greenman #include <vm/vm_extern.h> 87867a482dSJohn Dyson #include <vm/vm_page.h> 8884110e7eSKonstantin Belousov #include <vm/vnode_pager.h> 89df8bae1dSRodney W. Grimes 9049874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 9149874f6eSJoseph Koshy #include <sys/pmckern.h> 9249874f6eSJoseph Koshy #endif 9349874f6eSJoseph Koshy 947e19eda4SAndrey Zonov int old_mlock = 0; 957e19eda4SAndrey Zonov SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RW | CTLFLAG_TUN, &old_mlock, 0, 967e19eda4SAndrey Zonov "Do not apply RLIMIT_MEMLOCK on mlockall"); 977e19eda4SAndrey Zonov TUNABLE_INT("vm.old_mlock", &old_mlock); 987e19eda4SAndrey Zonov 99edb572a3SJohn Baldwin #ifdef MAP_32BIT 100edb572a3SJohn Baldwin #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) 101d2d3e875SBruce Evans #endif 1020d94caffSDavid Greenman 103c8daea13SAlexander Kabaev static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 10484110e7eSKonstantin Belousov int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *); 10598df9218SJohn Baldwin static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 10664345f0bSJohn Baldwin int *, struct cdev *, vm_ooffset_t *, vm_object_t *); 1078e38aeffSJohn Baldwin static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 1088e38aeffSJohn Baldwin int *, struct shmfd *, vm_ooffset_t, vm_object_t *); 109c8daea13SAlexander Kabaev 110edb572a3SJohn Baldwin #ifndef _SYS_SYSPROTO_H_ 111edb572a3SJohn Baldwin struct sbrk_args { 112edb572a3SJohn Baldwin int incr; 113edb572a3SJohn Baldwin }; 114edb572a3SJohn Baldwin #endif 115edb572a3SJohn Baldwin 116d2c60af8SMatthew Dillon /* 117d2c60af8SMatthew Dillon * MPSAFE 118d2c60af8SMatthew Dillon */ 119df8bae1dSRodney W. Grimes /* ARGSUSED */ 120df8bae1dSRodney W. Grimes int 1218451d0ddSKip Macy sys_sbrk(td, uap) 122b40ce416SJulian Elischer struct thread *td; 123df8bae1dSRodney W. Grimes struct sbrk_args *uap; 124df8bae1dSRodney W. Grimes { 125df8bae1dSRodney W. Grimes /* Not yet implemented */ 126df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 127df8bae1dSRodney W. Grimes } 128df8bae1dSRodney W. Grimes 129d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 130df8bae1dSRodney W. Grimes struct sstk_args { 131df8bae1dSRodney W. Grimes int incr; 132df8bae1dSRodney W. Grimes }; 133d2d3e875SBruce Evans #endif 1340d94caffSDavid Greenman 135d2c60af8SMatthew Dillon /* 136d2c60af8SMatthew Dillon * MPSAFE 137d2c60af8SMatthew Dillon */ 138df8bae1dSRodney W. Grimes /* ARGSUSED */ 139df8bae1dSRodney W. Grimes int 1408451d0ddSKip Macy sys_sstk(td, uap) 141b40ce416SJulian Elischer struct thread *td; 142df8bae1dSRodney W. Grimes struct sstk_args *uap; 143df8bae1dSRodney W. Grimes { 144df8bae1dSRodney W. Grimes /* Not yet implemented */ 145df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 146df8bae1dSRodney W. Grimes } 147df8bae1dSRodney W. Grimes 1481930e303SPoul-Henning Kamp #if defined(COMPAT_43) 149d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 150df8bae1dSRodney W. Grimes struct getpagesize_args { 151df8bae1dSRodney W. Grimes int dummy; 152df8bae1dSRodney W. Grimes }; 153d2d3e875SBruce Evans #endif 1540d94caffSDavid Greenman 155df8bae1dSRodney W. Grimes int 156b40ce416SJulian Elischer ogetpagesize(td, uap) 157b40ce416SJulian Elischer struct thread *td; 158df8bae1dSRodney W. Grimes struct getpagesize_args *uap; 159df8bae1dSRodney W. Grimes { 1600cddd8f0SMatthew Dillon /* MP SAFE */ 161b40ce416SJulian Elischer td->td_retval[0] = PAGE_SIZE; 162df8bae1dSRodney W. Grimes return (0); 163df8bae1dSRodney W. Grimes } 1641930e303SPoul-Henning Kamp #endif /* COMPAT_43 */ 165df8bae1dSRodney W. Grimes 16654f42e4bSPeter Wemm 16754f42e4bSPeter Wemm /* 16854f42e4bSPeter Wemm * Memory Map (mmap) system call. Note that the file offset 16954f42e4bSPeter Wemm * and address are allowed to be NOT page aligned, though if 17054f42e4bSPeter Wemm * the MAP_FIXED flag it set, both must have the same remainder 17154f42e4bSPeter Wemm * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 17254f42e4bSPeter Wemm * page-aligned, the actual mapping starts at trunc_page(addr) 17354f42e4bSPeter Wemm * and the return value is adjusted up by the page offset. 174b4309055SMatthew Dillon * 175b4309055SMatthew Dillon * Generally speaking, only character devices which are themselves 176b4309055SMatthew Dillon * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 177b4309055SMatthew Dillon * there would be no cache coherency between a descriptor and a VM mapping 178b4309055SMatthew Dillon * both to the same character device. 17954f42e4bSPeter Wemm */ 180d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 181df8bae1dSRodney W. Grimes struct mmap_args { 182651bb817SAlexander Langer void *addr; 183df8bae1dSRodney W. Grimes size_t len; 184df8bae1dSRodney W. Grimes int prot; 185df8bae1dSRodney W. Grimes int flags; 186df8bae1dSRodney W. Grimes int fd; 187df8bae1dSRodney W. Grimes long pad; 188df8bae1dSRodney W. Grimes off_t pos; 189df8bae1dSRodney W. Grimes }; 190d2d3e875SBruce Evans #endif 191df8bae1dSRodney W. Grimes 192d2c60af8SMatthew Dillon /* 193d2c60af8SMatthew Dillon * MPSAFE 194d2c60af8SMatthew Dillon */ 195df8bae1dSRodney W. Grimes int 1968451d0ddSKip Macy sys_mmap(td, uap) 197b40ce416SJulian Elischer struct thread *td; 19854d92145SMatthew Dillon struct mmap_args *uap; 199df8bae1dSRodney W. Grimes { 20049874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 20149874f6eSJoseph Koshy struct pmckern_map_in pkm; 20249874f6eSJoseph Koshy #endif 203c8daea13SAlexander Kabaev struct file *fp; 204df8bae1dSRodney W. Grimes struct vnode *vp; 205df8bae1dSRodney W. Grimes vm_offset_t addr; 2069154ee6aSPeter Wemm vm_size_t size, pageoff; 207a9d2f8d8SRobert Watson vm_prot_t cap_maxprot, prot, maxprot; 208651bb817SAlexander Langer void *handle; 20998df9218SJohn Baldwin objtype_t handle_type; 2105aa60b6fSJohn Baldwin int align, error, flags; 21154f42e4bSPeter Wemm off_t pos; 212b40ce416SJulian Elischer struct vmspace *vms = td->td_proc->p_vmspace; 213a9d2f8d8SRobert Watson cap_rights_t rights; 214df8bae1dSRodney W. Grimes 21554f42e4bSPeter Wemm addr = (vm_offset_t) uap->addr; 21654f42e4bSPeter Wemm size = uap->len; 217df8bae1dSRodney W. Grimes prot = uap->prot & VM_PROT_ALL; 218df8bae1dSRodney W. Grimes flags = uap->flags; 21954f42e4bSPeter Wemm pos = uap->pos; 22054f42e4bSPeter Wemm 221426da3bcSAlfred Perlstein fp = NULL; 22227bfa958SSimon L. B. Nielsen 2237707ccabSKonstantin Belousov /* 2247707ccabSKonstantin Belousov * Enforce the constraints. 2257707ccabSKonstantin Belousov * Mapping of length 0 is only allowed for old binaries. 2267707ccabSKonstantin Belousov * Anonymous mapping shall specify -1 as filedescriptor and 2277707ccabSKonstantin Belousov * zero position for new code. Be nice to ancient a.out 2287707ccabSKonstantin Belousov * binaries and correct pos for anonymous mapping, since old 2297707ccabSKonstantin Belousov * ld.so sometimes issues anonymous map requests with non-zero 2307707ccabSKonstantin Belousov * pos. 2317707ccabSKonstantin Belousov */ 2327707ccabSKonstantin Belousov if (!SV_CURPROC_FLAG(SV_AOUT)) { 2337707ccabSKonstantin Belousov if ((uap->len == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) || 2347707ccabSKonstantin Belousov ((flags & MAP_ANON) != 0 && (uap->fd != -1 || pos != 0))) 235df8bae1dSRodney W. Grimes return (EINVAL); 2367707ccabSKonstantin Belousov } else { 2377707ccabSKonstantin Belousov if ((flags & MAP_ANON) != 0) 2387707ccabSKonstantin Belousov pos = 0; 2397707ccabSKonstantin Belousov } 2409154ee6aSPeter Wemm 2412267af78SJulian Elischer if (flags & MAP_STACK) { 2422267af78SJulian Elischer if ((uap->fd != -1) || 2432267af78SJulian Elischer ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 2442267af78SJulian Elischer return (EINVAL); 2452267af78SJulian Elischer flags |= MAP_ANON; 2462267af78SJulian Elischer pos = 0; 2472907af2aSJulian Elischer } 2482907af2aSJulian Elischer 2499154ee6aSPeter Wemm /* 25054f42e4bSPeter Wemm * Align the file position to a page boundary, 25154f42e4bSPeter Wemm * and save its page offset component. 2529154ee6aSPeter Wemm */ 25354f42e4bSPeter Wemm pageoff = (pos & PAGE_MASK); 25454f42e4bSPeter Wemm pos -= pageoff; 25554f42e4bSPeter Wemm 25654f42e4bSPeter Wemm /* Adjust size for rounding (on both ends). */ 25754f42e4bSPeter Wemm size += pageoff; /* low end... */ 25854f42e4bSPeter Wemm size = (vm_size_t) round_page(size); /* hi end */ 2599154ee6aSPeter Wemm 2605aa60b6fSJohn Baldwin /* Ensure alignment is at least a page and fits in a pointer. */ 2615aa60b6fSJohn Baldwin align = flags & MAP_ALIGNMENT_MASK; 2625aa60b6fSJohn Baldwin if (align != 0 && align != MAP_ALIGNED_SUPER && 2635aa60b6fSJohn Baldwin (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY || 2645aa60b6fSJohn Baldwin align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT)) 2655aa60b6fSJohn Baldwin return (EINVAL); 2665aa60b6fSJohn Baldwin 267df8bae1dSRodney W. Grimes /* 2680d94caffSDavid Greenman * Check for illegal addresses. Watch out for address wrap... Note 2690d94caffSDavid Greenman * that VM_*_ADDRESS are not constants due to casts (argh). 270df8bae1dSRodney W. Grimes */ 271df8bae1dSRodney W. Grimes if (flags & MAP_FIXED) { 27254f42e4bSPeter Wemm /* 27354f42e4bSPeter Wemm * The specified address must have the same remainder 27454f42e4bSPeter Wemm * as the file offset taken modulo PAGE_SIZE, so it 27554f42e4bSPeter Wemm * should be aligned after adjustment by pageoff. 27654f42e4bSPeter Wemm */ 27754f42e4bSPeter Wemm addr -= pageoff; 27854f42e4bSPeter Wemm if (addr & PAGE_MASK) 27954f42e4bSPeter Wemm return (EINVAL); 28027bfa958SSimon L. B. Nielsen 28154f42e4bSPeter Wemm /* Address range must be all in user VM space. */ 28205ba50f5SJake Burkholder if (addr < vm_map_min(&vms->vm_map) || 28305ba50f5SJake Burkholder addr + size > vm_map_max(&vms->vm_map)) 284df8bae1dSRodney W. Grimes return (EINVAL); 285bbc0ec52SDavid Greenman if (addr + size < addr) 286df8bae1dSRodney W. Grimes return (EINVAL); 287edb572a3SJohn Baldwin #ifdef MAP_32BIT 288edb572a3SJohn Baldwin if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR) 289edb572a3SJohn Baldwin return (EINVAL); 290edb572a3SJohn Baldwin } else if (flags & MAP_32BIT) { 291edb572a3SJohn Baldwin /* 292edb572a3SJohn Baldwin * For MAP_32BIT, override the hint if it is too high and 293edb572a3SJohn Baldwin * do not bother moving the mapping past the heap (since 294edb572a3SJohn Baldwin * the heap is usually above 2GB). 295edb572a3SJohn Baldwin */ 296edb572a3SJohn Baldwin if (addr + size > MAP_32BIT_MAX_ADDR) 297edb572a3SJohn Baldwin addr = 0; 298edb572a3SJohn Baldwin #endif 29991d5354aSJohn Baldwin } else { 300df8bae1dSRodney W. Grimes /* 30154f42e4bSPeter Wemm * XXX for non-fixed mappings where no hint is provided or 30254f42e4bSPeter Wemm * the hint would fall in the potential heap space, 30354f42e4bSPeter Wemm * place it after the end of the largest possible heap. 304df8bae1dSRodney W. Grimes * 30554f42e4bSPeter Wemm * There should really be a pmap call to determine a reasonable 30654f42e4bSPeter Wemm * location. 307df8bae1dSRodney W. Grimes */ 30891d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 30991d5354aSJohn Baldwin if (addr == 0 || 3101f6889a1SMatthew Dillon (addr >= round_page((vm_offset_t)vms->vm_taddr) && 311c460ac3aSPeter Wemm addr < round_page((vm_offset_t)vms->vm_daddr + 31291d5354aSJohn Baldwin lim_max(td->td_proc, RLIMIT_DATA)))) 313c460ac3aSPeter Wemm addr = round_page((vm_offset_t)vms->vm_daddr + 31491d5354aSJohn Baldwin lim_max(td->td_proc, RLIMIT_DATA)); 31591d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 31691d5354aSJohn Baldwin } 317df8bae1dSRodney W. Grimes if (flags & MAP_ANON) { 318df8bae1dSRodney W. Grimes /* 319df8bae1dSRodney W. Grimes * Mapping blank space is trivial. 320df8bae1dSRodney W. Grimes */ 321df8bae1dSRodney W. Grimes handle = NULL; 32298df9218SJohn Baldwin handle_type = OBJT_DEFAULT; 323df8bae1dSRodney W. Grimes maxprot = VM_PROT_ALL; 324a9d2f8d8SRobert Watson cap_maxprot = VM_PROT_ALL; 32530d4dd7eSAlexander Kabaev } else { 326df8bae1dSRodney W. Grimes /* 327a9d2f8d8SRobert Watson * Mapping file, get fp for validation and don't let the 328a9d2f8d8SRobert Watson * descriptor disappear on us if we block. Check capability 329a9d2f8d8SRobert Watson * rights, but also return the maximum rights to be combined 330a9d2f8d8SRobert Watson * with maxprot later. 331df8bae1dSRodney W. Grimes */ 3327008be5bSPawel Jakub Dawidek cap_rights_init(&rights, CAP_MMAP); 333a9d2f8d8SRobert Watson if (prot & PROT_READ) 3347008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_R); 335a9d2f8d8SRobert Watson if ((flags & MAP_SHARED) != 0) { 336a9d2f8d8SRobert Watson if (prot & PROT_WRITE) 3377008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_W); 338a9d2f8d8SRobert Watson } 339a9d2f8d8SRobert Watson if (prot & PROT_EXEC) 3407008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_X); 3417008be5bSPawel Jakub Dawidek error = fget_mmap(td, uap->fd, &rights, &cap_maxprot, &fp); 3427008be5bSPawel Jakub Dawidek if (error != 0) 343426da3bcSAlfred Perlstein goto done; 3448e38aeffSJohn Baldwin if (fp->f_type == DTYPE_SHM) { 3458e38aeffSJohn Baldwin handle = fp->f_data; 3468e38aeffSJohn Baldwin handle_type = OBJT_SWAP; 3478e38aeffSJohn Baldwin maxprot = VM_PROT_NONE; 3488e38aeffSJohn Baldwin 3498e38aeffSJohn Baldwin /* FREAD should always be set. */ 3508e38aeffSJohn Baldwin if (fp->f_flag & FREAD) 3518e38aeffSJohn Baldwin maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; 3528e38aeffSJohn Baldwin if (fp->f_flag & FWRITE) 3538e38aeffSJohn Baldwin maxprot |= VM_PROT_WRITE; 3548e38aeffSJohn Baldwin goto map; 3558e38aeffSJohn Baldwin } 356e4ca250dSJohn Baldwin if (fp->f_type != DTYPE_VNODE) { 35789eae00bSTom Rhodes error = ENODEV; 358426da3bcSAlfred Perlstein goto done; 359e4ca250dSJohn Baldwin } 3608e38aeffSJohn Baldwin #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ 3618e38aeffSJohn Baldwin defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) 362279d7226SMatthew Dillon /* 363aa543039SGarrett Wollman * POSIX shared-memory objects are defined to have 364aa543039SGarrett Wollman * kernel persistence, and are not defined to support 365aa543039SGarrett Wollman * read(2)/write(2) -- or even open(2). Thus, we can 366aa543039SGarrett Wollman * use MAP_ASYNC to trade on-disk coherence for speed. 367aa543039SGarrett Wollman * The shm_open(3) library routine turns on the FPOSIXSHM 368aa543039SGarrett Wollman * flag to request this behavior. 369aa543039SGarrett Wollman */ 370aa543039SGarrett Wollman if (fp->f_flag & FPOSIXSHM) 371aa543039SGarrett Wollman flags |= MAP_NOSYNC; 3728e38aeffSJohn Baldwin #endif 3733b6d9652SPoul-Henning Kamp vp = fp->f_vnode; 374c8bdd56bSGuido van Rooij /* 375df8bae1dSRodney W. Grimes * Ensure that file and memory protections are 376df8bae1dSRodney W. Grimes * compatible. Note that we only worry about 377df8bae1dSRodney W. Grimes * writability if mapping is shared; in this case, 378df8bae1dSRodney W. Grimes * current and max prot are dictated by the open file. 379df8bae1dSRodney W. Grimes * XXX use the vnode instead? Problem is: what 3800d94caffSDavid Greenman * credentials do we use for determination? What if 3810d94caffSDavid Greenman * proc does a setuid? 382df8bae1dSRodney W. Grimes */ 3838eec77b0STim J. Robbins if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC) 384b483c7f6SGuido van Rooij maxprot = VM_PROT_NONE; 385b483c7f6SGuido van Rooij else 386b483c7f6SGuido van Rooij maxprot = VM_PROT_EXECUTE; 387279d7226SMatthew Dillon if (fp->f_flag & FREAD) { 388df8bae1dSRodney W. Grimes maxprot |= VM_PROT_READ; 389279d7226SMatthew Dillon } else if (prot & PROT_READ) { 390279d7226SMatthew Dillon error = EACCES; 391279d7226SMatthew Dillon goto done; 392279d7226SMatthew Dillon } 393c8bdd56bSGuido van Rooij /* 394c8bdd56bSGuido van Rooij * If we are sharing potential changes (either via 395c8bdd56bSGuido van Rooij * MAP_SHARED or via the implicit sharing of character 396c8bdd56bSGuido van Rooij * device mappings), and we are trying to get write 397c8bdd56bSGuido van Rooij * permission although we opened it without asking 398c8daea13SAlexander Kabaev * for it, bail out. 399c8bdd56bSGuido van Rooij */ 400ce7a036dSAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 40105feb99fSGuido van Rooij if ((fp->f_flag & FWRITE) != 0) { 402df8bae1dSRodney W. Grimes maxprot |= VM_PROT_WRITE; 403279d7226SMatthew Dillon } else if ((prot & PROT_WRITE) != 0) { 404279d7226SMatthew Dillon error = EACCES; 405279d7226SMatthew Dillon goto done; 406279d7226SMatthew Dillon } 407ce7a036dSAlexander Kabaev } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) { 40805feb99fSGuido van Rooij maxprot |= VM_PROT_WRITE; 409a9d2f8d8SRobert Watson cap_maxprot |= VM_PROT_WRITE; 410279d7226SMatthew Dillon } 411651bb817SAlexander Langer handle = (void *)vp; 41298df9218SJohn Baldwin handle_type = OBJT_VNODE; 41330d4dd7eSAlexander Kabaev } 4148e38aeffSJohn Baldwin map: 41536b90789SKonstantin Belousov td->td_fpop = fp; 416a9d2f8d8SRobert Watson maxprot &= cap_maxprot; 4171f6889a1SMatthew Dillon error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, 41898df9218SJohn Baldwin flags, handle_type, handle, pos); 41936b90789SKonstantin Belousov td->td_fpop = NULL; 42049874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 42149874f6eSJoseph Koshy /* inform hwpmc(4) if an executable is being mapped */ 42249874f6eSJoseph Koshy if (error == 0 && handle_type == OBJT_VNODE && 42349874f6eSJoseph Koshy (prot & PROT_EXEC)) { 42449874f6eSJoseph Koshy pkm.pm_file = handle; 42549874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 42649874f6eSJoseph Koshy PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); 42749874f6eSJoseph Koshy } 42849874f6eSJoseph Koshy #endif 429df8bae1dSRodney W. Grimes if (error == 0) 430b40ce416SJulian Elischer td->td_retval[0] = (register_t) (addr + pageoff); 431279d7226SMatthew Dillon done: 432279d7226SMatthew Dillon if (fp) 433b40ce416SJulian Elischer fdrop(fp, td); 434f6b5b182SJeff Roberson 435df8bae1dSRodney W. Grimes return (error); 436df8bae1dSRodney W. Grimes } 437df8bae1dSRodney W. Grimes 438c2815ad5SPeter Wemm int 439c2815ad5SPeter Wemm freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 440c2815ad5SPeter Wemm { 441c2815ad5SPeter Wemm struct mmap_args oargs; 442c2815ad5SPeter Wemm 443c2815ad5SPeter Wemm oargs.addr = uap->addr; 444c2815ad5SPeter Wemm oargs.len = uap->len; 445c2815ad5SPeter Wemm oargs.prot = uap->prot; 446c2815ad5SPeter Wemm oargs.flags = uap->flags; 447c2815ad5SPeter Wemm oargs.fd = uap->fd; 448c2815ad5SPeter Wemm oargs.pos = uap->pos; 4498451d0ddSKip Macy return (sys_mmap(td, &oargs)); 450c2815ad5SPeter Wemm } 451c2815ad5SPeter Wemm 45205f0fdd2SPoul-Henning Kamp #ifdef COMPAT_43 453d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 45405f0fdd2SPoul-Henning Kamp struct ommap_args { 45505f0fdd2SPoul-Henning Kamp caddr_t addr; 45605f0fdd2SPoul-Henning Kamp int len; 45705f0fdd2SPoul-Henning Kamp int prot; 45805f0fdd2SPoul-Henning Kamp int flags; 45905f0fdd2SPoul-Henning Kamp int fd; 46005f0fdd2SPoul-Henning Kamp long pos; 46105f0fdd2SPoul-Henning Kamp }; 462d2d3e875SBruce Evans #endif 46305f0fdd2SPoul-Henning Kamp int 464b40ce416SJulian Elischer ommap(td, uap) 465b40ce416SJulian Elischer struct thread *td; 46654d92145SMatthew Dillon struct ommap_args *uap; 46705f0fdd2SPoul-Henning Kamp { 46805f0fdd2SPoul-Henning Kamp struct mmap_args nargs; 46905f0fdd2SPoul-Henning Kamp static const char cvtbsdprot[8] = { 47005f0fdd2SPoul-Henning Kamp 0, 47105f0fdd2SPoul-Henning Kamp PROT_EXEC, 47205f0fdd2SPoul-Henning Kamp PROT_WRITE, 47305f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE, 47405f0fdd2SPoul-Henning Kamp PROT_READ, 47505f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_READ, 47605f0fdd2SPoul-Henning Kamp PROT_WRITE | PROT_READ, 47705f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE | PROT_READ, 47805f0fdd2SPoul-Henning Kamp }; 4790d94caffSDavid Greenman 48005f0fdd2SPoul-Henning Kamp #define OMAP_ANON 0x0002 48105f0fdd2SPoul-Henning Kamp #define OMAP_COPY 0x0020 48205f0fdd2SPoul-Henning Kamp #define OMAP_SHARED 0x0010 48305f0fdd2SPoul-Henning Kamp #define OMAP_FIXED 0x0100 48405f0fdd2SPoul-Henning Kamp 48505f0fdd2SPoul-Henning Kamp nargs.addr = uap->addr; 48605f0fdd2SPoul-Henning Kamp nargs.len = uap->len; 48705f0fdd2SPoul-Henning Kamp nargs.prot = cvtbsdprot[uap->prot & 0x7]; 488ee4116b8SKonstantin Belousov #ifdef COMPAT_FREEBSD32 489ee4116b8SKonstantin Belousov #if defined(__amd64__) || defined(__ia64__) 490ee4116b8SKonstantin Belousov if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && 491ee4116b8SKonstantin Belousov nargs.prot != 0) 492ee4116b8SKonstantin Belousov nargs.prot |= PROT_EXEC; 493ee4116b8SKonstantin Belousov #endif 494ee4116b8SKonstantin Belousov #endif 49505f0fdd2SPoul-Henning Kamp nargs.flags = 0; 49605f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_ANON) 49705f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_ANON; 49805f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_COPY) 49905f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_COPY; 50005f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_SHARED) 50105f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_SHARED; 50205f0fdd2SPoul-Henning Kamp else 50305f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_PRIVATE; 50405f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_FIXED) 50505f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_FIXED; 50605f0fdd2SPoul-Henning Kamp nargs.fd = uap->fd; 50705f0fdd2SPoul-Henning Kamp nargs.pos = uap->pos; 5088451d0ddSKip Macy return (sys_mmap(td, &nargs)); 50905f0fdd2SPoul-Henning Kamp } 51005f0fdd2SPoul-Henning Kamp #endif /* COMPAT_43 */ 51105f0fdd2SPoul-Henning Kamp 51205f0fdd2SPoul-Henning Kamp 513d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 514df8bae1dSRodney W. Grimes struct msync_args { 515651bb817SAlexander Langer void *addr; 516c899450bSPeter Wemm size_t len; 517e6c6af11SDavid Greenman int flags; 518df8bae1dSRodney W. Grimes }; 519d2d3e875SBruce Evans #endif 520d2c60af8SMatthew Dillon /* 521d2c60af8SMatthew Dillon * MPSAFE 522d2c60af8SMatthew Dillon */ 523df8bae1dSRodney W. Grimes int 5248451d0ddSKip Macy sys_msync(td, uap) 525b40ce416SJulian Elischer struct thread *td; 526df8bae1dSRodney W. Grimes struct msync_args *uap; 527df8bae1dSRodney W. Grimes { 528df8bae1dSRodney W. Grimes vm_offset_t addr; 529dabee6feSPeter Wemm vm_size_t size, pageoff; 530e6c6af11SDavid Greenman int flags; 531df8bae1dSRodney W. Grimes vm_map_t map; 532df8bae1dSRodney W. Grimes int rv; 533df8bae1dSRodney W. Grimes 534df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 5359154ee6aSPeter Wemm size = uap->len; 536e6c6af11SDavid Greenman flags = uap->flags; 537e6c6af11SDavid Greenman 538dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 539dabee6feSPeter Wemm addr -= pageoff; 540dabee6feSPeter Wemm size += pageoff; 541dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5429154ee6aSPeter Wemm if (addr + size < addr) 543dabee6feSPeter Wemm return (EINVAL); 544dabee6feSPeter Wemm 545dabee6feSPeter Wemm if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 5461e62bc63SDavid Greenman return (EINVAL); 5471e62bc63SDavid Greenman 548b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 5499154ee6aSPeter Wemm 550df8bae1dSRodney W. Grimes /* 551df8bae1dSRodney W. Grimes * Clean the pages and interpret the return value. 552df8bae1dSRodney W. Grimes */ 553950f8459SAlan Cox rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 554e6c6af11SDavid Greenman (flags & MS_INVALIDATE) != 0); 555df8bae1dSRodney W. Grimes switch (rv) { 556df8bae1dSRodney W. Grimes case KERN_SUCCESS: 557d2c60af8SMatthew Dillon return (0); 558df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 559df8bae1dSRodney W. Grimes return (EINVAL); /* Sun returns ENOMEM? */ 560b7b7cd44SAlan Cox case KERN_INVALID_ARGUMENT: 561b7b7cd44SAlan Cox return (EBUSY); 562126d6082SKonstantin Belousov case KERN_FAILURE: 563126d6082SKonstantin Belousov return (EIO); 564df8bae1dSRodney W. Grimes default: 565df8bae1dSRodney W. Grimes return (EINVAL); 566df8bae1dSRodney W. Grimes } 567df8bae1dSRodney W. Grimes } 568df8bae1dSRodney W. Grimes 569d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 570df8bae1dSRodney W. Grimes struct munmap_args { 571651bb817SAlexander Langer void *addr; 5729154ee6aSPeter Wemm size_t len; 573df8bae1dSRodney W. Grimes }; 574d2d3e875SBruce Evans #endif 575d2c60af8SMatthew Dillon /* 576d2c60af8SMatthew Dillon * MPSAFE 577d2c60af8SMatthew Dillon */ 578df8bae1dSRodney W. Grimes int 5798451d0ddSKip Macy sys_munmap(td, uap) 580b40ce416SJulian Elischer struct thread *td; 58154d92145SMatthew Dillon struct munmap_args *uap; 582df8bae1dSRodney W. Grimes { 58349874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 58449874f6eSJoseph Koshy struct pmckern_map_out pkm; 58549874f6eSJoseph Koshy vm_map_entry_t entry; 58649874f6eSJoseph Koshy #endif 587df8bae1dSRodney W. Grimes vm_offset_t addr; 588dabee6feSPeter Wemm vm_size_t size, pageoff; 589df8bae1dSRodney W. Grimes vm_map_t map; 590df8bae1dSRodney W. Grimes 591df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 5929154ee6aSPeter Wemm size = uap->len; 593d8834602SAlan Cox if (size == 0) 594d8834602SAlan Cox return (EINVAL); 595dabee6feSPeter Wemm 596dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 597dabee6feSPeter Wemm addr -= pageoff; 598dabee6feSPeter Wemm size += pageoff; 599dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6009154ee6aSPeter Wemm if (addr + size < addr) 601df8bae1dSRodney W. Grimes return (EINVAL); 6029154ee6aSPeter Wemm 603df8bae1dSRodney W. Grimes /* 60405ba50f5SJake Burkholder * Check for illegal addresses. Watch out for address wrap... 605df8bae1dSRodney W. Grimes */ 606b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 60705ba50f5SJake Burkholder if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 60805ba50f5SJake Burkholder return (EINVAL); 609d8834602SAlan Cox vm_map_lock(map); 61049874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 61149874f6eSJoseph Koshy /* 61249874f6eSJoseph Koshy * Inform hwpmc if the address range being unmapped contains 61349874f6eSJoseph Koshy * an executable region. 61449874f6eSJoseph Koshy */ 6150d419640SRyan Stone pkm.pm_address = (uintptr_t) NULL; 61649874f6eSJoseph Koshy if (vm_map_lookup_entry(map, addr, &entry)) { 61749874f6eSJoseph Koshy for (; 61849874f6eSJoseph Koshy entry != &map->header && entry->start < addr + size; 61949874f6eSJoseph Koshy entry = entry->next) { 62049874f6eSJoseph Koshy if (vm_map_check_protection(map, entry->start, 62149874f6eSJoseph Koshy entry->end, VM_PROT_EXECUTE) == TRUE) { 62249874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 62349874f6eSJoseph Koshy pkm.pm_size = (size_t) size; 62449874f6eSJoseph Koshy break; 62549874f6eSJoseph Koshy } 62649874f6eSJoseph Koshy } 62749874f6eSJoseph Koshy } 62849874f6eSJoseph Koshy #endif 629655c3490SKonstantin Belousov vm_map_delete(map, addr, addr + size); 6300d419640SRyan Stone 6310d419640SRyan Stone #ifdef HWPMC_HOOKS 6320d419640SRyan Stone /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 6330d419640SRyan Stone vm_map_lock_downgrade(map); 634d473d3a1SRyan Stone if (pkm.pm_address != (uintptr_t) NULL) 6350d419640SRyan Stone PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 6360d419640SRyan Stone vm_map_unlock_read(map); 6370d419640SRyan Stone #else 638d8834602SAlan Cox vm_map_unlock(map); 6390d419640SRyan Stone #endif 6400d419640SRyan Stone /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 641df8bae1dSRodney W. Grimes return (0); 642df8bae1dSRodney W. Grimes } 643df8bae1dSRodney W. Grimes 644d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 645df8bae1dSRodney W. Grimes struct mprotect_args { 646651bb817SAlexander Langer const void *addr; 6479154ee6aSPeter Wemm size_t len; 648df8bae1dSRodney W. Grimes int prot; 649df8bae1dSRodney W. Grimes }; 650d2d3e875SBruce Evans #endif 651d2c60af8SMatthew Dillon /* 652d2c60af8SMatthew Dillon * MPSAFE 653d2c60af8SMatthew Dillon */ 654df8bae1dSRodney W. Grimes int 6558451d0ddSKip Macy sys_mprotect(td, uap) 656b40ce416SJulian Elischer struct thread *td; 657df8bae1dSRodney W. Grimes struct mprotect_args *uap; 658df8bae1dSRodney W. Grimes { 659df8bae1dSRodney W. Grimes vm_offset_t addr; 660dabee6feSPeter Wemm vm_size_t size, pageoff; 66154d92145SMatthew Dillon vm_prot_t prot; 662df8bae1dSRodney W. Grimes 663df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 6649154ee6aSPeter Wemm size = uap->len; 665df8bae1dSRodney W. Grimes prot = uap->prot & VM_PROT_ALL; 666df8bae1dSRodney W. Grimes 667dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 668dabee6feSPeter Wemm addr -= pageoff; 669dabee6feSPeter Wemm size += pageoff; 670dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6719154ee6aSPeter Wemm if (addr + size < addr) 672dabee6feSPeter Wemm return (EINVAL); 673dabee6feSPeter Wemm 67443285049SAlan Cox switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 67543285049SAlan Cox addr + size, prot, FALSE)) { 676df8bae1dSRodney W. Grimes case KERN_SUCCESS: 677df8bae1dSRodney W. Grimes return (0); 678df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 679df8bae1dSRodney W. Grimes return (EACCES); 6803364c323SKonstantin Belousov case KERN_RESOURCE_SHORTAGE: 6813364c323SKonstantin Belousov return (ENOMEM); 682df8bae1dSRodney W. Grimes } 683df8bae1dSRodney W. Grimes return (EINVAL); 684df8bae1dSRodney W. Grimes } 685df8bae1dSRodney W. Grimes 686d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 687dabee6feSPeter Wemm struct minherit_args { 688651bb817SAlexander Langer void *addr; 6899154ee6aSPeter Wemm size_t len; 690dabee6feSPeter Wemm int inherit; 691dabee6feSPeter Wemm }; 692dabee6feSPeter Wemm #endif 693d2c60af8SMatthew Dillon /* 694d2c60af8SMatthew Dillon * MPSAFE 695d2c60af8SMatthew Dillon */ 696dabee6feSPeter Wemm int 6978451d0ddSKip Macy sys_minherit(td, uap) 698b40ce416SJulian Elischer struct thread *td; 699dabee6feSPeter Wemm struct minherit_args *uap; 700dabee6feSPeter Wemm { 701dabee6feSPeter Wemm vm_offset_t addr; 702dabee6feSPeter Wemm vm_size_t size, pageoff; 70354d92145SMatthew Dillon vm_inherit_t inherit; 704dabee6feSPeter Wemm 705dabee6feSPeter Wemm addr = (vm_offset_t)uap->addr; 7069154ee6aSPeter Wemm size = uap->len; 707dabee6feSPeter Wemm inherit = uap->inherit; 708dabee6feSPeter Wemm 709dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 710dabee6feSPeter Wemm addr -= pageoff; 711dabee6feSPeter Wemm size += pageoff; 712dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 7139154ee6aSPeter Wemm if (addr + size < addr) 714dabee6feSPeter Wemm return (EINVAL); 715dabee6feSPeter Wemm 716e0be79afSAlan Cox switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 717e0be79afSAlan Cox addr + size, inherit)) { 718dabee6feSPeter Wemm case KERN_SUCCESS: 719dabee6feSPeter Wemm return (0); 720dabee6feSPeter Wemm case KERN_PROTECTION_FAILURE: 721dabee6feSPeter Wemm return (EACCES); 722dabee6feSPeter Wemm } 723dabee6feSPeter Wemm return (EINVAL); 724dabee6feSPeter Wemm } 725dabee6feSPeter Wemm 726dabee6feSPeter Wemm #ifndef _SYS_SYSPROTO_H_ 727df8bae1dSRodney W. Grimes struct madvise_args { 728651bb817SAlexander Langer void *addr; 7299154ee6aSPeter Wemm size_t len; 730df8bae1dSRodney W. Grimes int behav; 731df8bae1dSRodney W. Grimes }; 732d2d3e875SBruce Evans #endif 7330d94caffSDavid Greenman 734d2c60af8SMatthew Dillon /* 735d2c60af8SMatthew Dillon * MPSAFE 736d2c60af8SMatthew Dillon */ 737df8bae1dSRodney W. Grimes int 7388451d0ddSKip Macy sys_madvise(td, uap) 739b40ce416SJulian Elischer struct thread *td; 740df8bae1dSRodney W. Grimes struct madvise_args *uap; 741df8bae1dSRodney W. Grimes { 742f35329acSJohn Dyson vm_offset_t start, end; 74305ba50f5SJake Burkholder vm_map_t map; 744*55648840SJohn Baldwin int flags; 745b4309055SMatthew Dillon 746b4309055SMatthew Dillon /* 747f4cf2141SWes Peters * Check for our special case, advising the swap pager we are 748f4cf2141SWes Peters * "immortal." 749f4cf2141SWes Peters */ 750f4cf2141SWes Peters if (uap->behav == MADV_PROTECT) { 751*55648840SJohn Baldwin flags = PPROT_SET; 752*55648840SJohn Baldwin return (kern_procctl(td, P_PID, td->td_proc->p_pid, 753*55648840SJohn Baldwin PROC_SPROTECT, &flags)); 75469297bf8SJohn Baldwin } 755*55648840SJohn Baldwin 756f4cf2141SWes Peters /* 757b4309055SMatthew Dillon * Check for illegal behavior 758b4309055SMatthew Dillon */ 7599730a5daSPaul Saab if (uap->behav < 0 || uap->behav > MADV_CORE) 760b4309055SMatthew Dillon return (EINVAL); 761867a482dSJohn Dyson /* 762867a482dSJohn Dyson * Check for illegal addresses. Watch out for address wrap... Note 763867a482dSJohn Dyson * that VM_*_ADDRESS are not constants due to casts (argh). 764867a482dSJohn Dyson */ 76505ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 76605ba50f5SJake Burkholder if ((vm_offset_t)uap->addr < vm_map_min(map) || 76705ba50f5SJake Burkholder (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) 768867a482dSJohn Dyson return (EINVAL); 769867a482dSJohn Dyson if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 770867a482dSJohn Dyson return (EINVAL); 771867a482dSJohn Dyson 772867a482dSJohn Dyson /* 773867a482dSJohn Dyson * Since this routine is only advisory, we default to conservative 774867a482dSJohn Dyson * behavior. 775867a482dSJohn Dyson */ 776cd6eea25SDavid Greenman start = trunc_page((vm_offset_t) uap->addr); 777cd6eea25SDavid Greenman end = round_page((vm_offset_t) uap->addr + uap->len); 778867a482dSJohn Dyson 77905ba50f5SJake Burkholder if (vm_map_madvise(map, start, end, uap->behav)) 780094f6d26SAlan Cox return (EINVAL); 781094f6d26SAlan Cox return (0); 782df8bae1dSRodney W. Grimes } 783df8bae1dSRodney W. Grimes 784d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 785df8bae1dSRodney W. Grimes struct mincore_args { 786651bb817SAlexander Langer const void *addr; 7879154ee6aSPeter Wemm size_t len; 788df8bae1dSRodney W. Grimes char *vec; 789df8bae1dSRodney W. Grimes }; 790d2d3e875SBruce Evans #endif 7910d94caffSDavid Greenman 792d2c60af8SMatthew Dillon /* 793d2c60af8SMatthew Dillon * MPSAFE 794d2c60af8SMatthew Dillon */ 795df8bae1dSRodney W. Grimes int 7968451d0ddSKip Macy sys_mincore(td, uap) 797b40ce416SJulian Elischer struct thread *td; 798df8bae1dSRodney W. Grimes struct mincore_args *uap; 799df8bae1dSRodney W. Grimes { 800867a482dSJohn Dyson vm_offset_t addr, first_addr; 801867a482dSJohn Dyson vm_offset_t end, cend; 802867a482dSJohn Dyson pmap_t pmap; 803867a482dSJohn Dyson vm_map_t map; 80402c04a2fSJohn Dyson char *vec; 805d2c60af8SMatthew Dillon int error = 0; 806867a482dSJohn Dyson int vecindex, lastvecindex; 80754d92145SMatthew Dillon vm_map_entry_t current; 808867a482dSJohn Dyson vm_map_entry_t entry; 809567e51e1SAlan Cox vm_object_t object; 810567e51e1SAlan Cox vm_paddr_t locked_pa; 811567e51e1SAlan Cox vm_page_t m; 812567e51e1SAlan Cox vm_pindex_t pindex; 813867a482dSJohn Dyson int mincoreinfo; 814dd2622a8SAlan Cox unsigned int timestamp; 815567e51e1SAlan Cox boolean_t locked; 816df8bae1dSRodney W. Grimes 817867a482dSJohn Dyson /* 818867a482dSJohn Dyson * Make sure that the addresses presented are valid for user 819867a482dSJohn Dyson * mode. 820867a482dSJohn Dyson */ 821867a482dSJohn Dyson first_addr = addr = trunc_page((vm_offset_t) uap->addr); 8229154ee6aSPeter Wemm end = addr + (vm_size_t)round_page(uap->len); 82305ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 82405ba50f5SJake Burkholder if (end > vm_map_max(map) || end < addr) 825455dd7d4SKonstantin Belousov return (ENOMEM); 82602c04a2fSJohn Dyson 827867a482dSJohn Dyson /* 828867a482dSJohn Dyson * Address of byte vector 829867a482dSJohn Dyson */ 83002c04a2fSJohn Dyson vec = uap->vec; 831867a482dSJohn Dyson 832b40ce416SJulian Elischer pmap = vmspace_pmap(td->td_proc->p_vmspace); 833867a482dSJohn Dyson 834eff50fcdSAlan Cox vm_map_lock_read(map); 835dd2622a8SAlan Cox RestartScan: 836dd2622a8SAlan Cox timestamp = map->timestamp; 837867a482dSJohn Dyson 838455dd7d4SKonstantin Belousov if (!vm_map_lookup_entry(map, addr, &entry)) { 839455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 840455dd7d4SKonstantin Belousov return (ENOMEM); 841455dd7d4SKonstantin Belousov } 842867a482dSJohn Dyson 843867a482dSJohn Dyson /* 844867a482dSJohn Dyson * Do this on a map entry basis so that if the pages are not 845867a482dSJohn Dyson * in the current processes address space, we can easily look 846867a482dSJohn Dyson * up the pages elsewhere. 847867a482dSJohn Dyson */ 848867a482dSJohn Dyson lastvecindex = -1; 849867a482dSJohn Dyson for (current = entry; 850867a482dSJohn Dyson (current != &map->header) && (current->start < end); 851867a482dSJohn Dyson current = current->next) { 852867a482dSJohn Dyson 853867a482dSJohn Dyson /* 854455dd7d4SKonstantin Belousov * check for contiguity 855455dd7d4SKonstantin Belousov */ 856455dd7d4SKonstantin Belousov if (current->end < end && 857455dd7d4SKonstantin Belousov (entry->next == &map->header || 858455dd7d4SKonstantin Belousov current->next->start > current->end)) { 859455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 860455dd7d4SKonstantin Belousov return (ENOMEM); 861455dd7d4SKonstantin Belousov } 862455dd7d4SKonstantin Belousov 863455dd7d4SKonstantin Belousov /* 864867a482dSJohn Dyson * ignore submaps (for now) or null objects 865867a482dSJohn Dyson */ 8669fdfe602SMatthew Dillon if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 867867a482dSJohn Dyson current->object.vm_object == NULL) 868867a482dSJohn Dyson continue; 869867a482dSJohn Dyson 870867a482dSJohn Dyson /* 871867a482dSJohn Dyson * limit this scan to the current map entry and the 872867a482dSJohn Dyson * limits for the mincore call 873867a482dSJohn Dyson */ 874867a482dSJohn Dyson if (addr < current->start) 875867a482dSJohn Dyson addr = current->start; 876867a482dSJohn Dyson cend = current->end; 877867a482dSJohn Dyson if (cend > end) 878867a482dSJohn Dyson cend = end; 879867a482dSJohn Dyson 880867a482dSJohn Dyson /* 881867a482dSJohn Dyson * scan this entry one page at a time 882867a482dSJohn Dyson */ 883867a482dSJohn Dyson while (addr < cend) { 884867a482dSJohn Dyson /* 885867a482dSJohn Dyson * Check pmap first, it is likely faster, also 886867a482dSJohn Dyson * it can provide info as to whether we are the 887867a482dSJohn Dyson * one referencing or modifying the page. 888867a482dSJohn Dyson */ 889567e51e1SAlan Cox object = NULL; 890567e51e1SAlan Cox locked_pa = 0; 891567e51e1SAlan Cox retry: 892567e51e1SAlan Cox m = NULL; 893567e51e1SAlan Cox mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 894567e51e1SAlan Cox if (locked_pa != 0) { 895867a482dSJohn Dyson /* 896567e51e1SAlan Cox * The page is mapped by this process but not 897567e51e1SAlan Cox * both accessed and modified. It is also 898567e51e1SAlan Cox * managed. Acquire the object lock so that 899567e51e1SAlan Cox * other mappings might be examined. 900867a482dSJohn Dyson */ 901567e51e1SAlan Cox m = PHYS_TO_VM_PAGE(locked_pa); 902567e51e1SAlan Cox if (m->object != object) { 903567e51e1SAlan Cox if (object != NULL) 90489f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 905567e51e1SAlan Cox object = m->object; 90689f6b863SAttilio Rao locked = VM_OBJECT_TRYWLOCK(object); 907567e51e1SAlan Cox vm_page_unlock(m); 908567e51e1SAlan Cox if (!locked) { 90989f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 9102965a453SKip Macy vm_page_lock(m); 911567e51e1SAlan Cox goto retry; 912567e51e1SAlan Cox } 913567e51e1SAlan Cox } else 914567e51e1SAlan Cox vm_page_unlock(m); 915567e51e1SAlan Cox KASSERT(m->valid == VM_PAGE_BITS_ALL, 916567e51e1SAlan Cox ("mincore: page %p is mapped but invalid", 917567e51e1SAlan Cox m)); 918567e51e1SAlan Cox } else if (mincoreinfo == 0) { 919567e51e1SAlan Cox /* 920567e51e1SAlan Cox * The page is not mapped by this process. If 921567e51e1SAlan Cox * the object implements managed pages, then 922567e51e1SAlan Cox * determine if the page is resident so that 923567e51e1SAlan Cox * the mappings might be examined. 924567e51e1SAlan Cox */ 925567e51e1SAlan Cox if (current->object.vm_object != object) { 926567e51e1SAlan Cox if (object != NULL) 92789f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 928567e51e1SAlan Cox object = current->object.vm_object; 92989f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 930567e51e1SAlan Cox } 931567e51e1SAlan Cox if (object->type == OBJT_DEFAULT || 932567e51e1SAlan Cox object->type == OBJT_SWAP || 933567e51e1SAlan Cox object->type == OBJT_VNODE) { 934567e51e1SAlan Cox pindex = OFF_TO_IDX(current->offset + 935567e51e1SAlan Cox (addr - current->start)); 936567e51e1SAlan Cox m = vm_page_lookup(object, pindex); 9371c8279e4SAlan Cox if (m == NULL && 9381c8279e4SAlan Cox vm_page_is_cached(object, pindex)) 9391c8279e4SAlan Cox mincoreinfo = MINCORE_INCORE; 940567e51e1SAlan Cox if (m != NULL && m->valid == 0) 941567e51e1SAlan Cox m = NULL; 942567e51e1SAlan Cox if (m != NULL) 943567e51e1SAlan Cox mincoreinfo = MINCORE_INCORE; 944567e51e1SAlan Cox } 945567e51e1SAlan Cox } 946567e51e1SAlan Cox if (m != NULL) { 947567e51e1SAlan Cox /* Examine other mappings to the page. */ 948567e51e1SAlan Cox if (m->dirty == 0 && pmap_is_modified(m)) 949567e51e1SAlan Cox vm_page_dirty(m); 950567e51e1SAlan Cox if (m->dirty != 0) 951867a482dSJohn Dyson mincoreinfo |= MINCORE_MODIFIED_OTHER; 952c46b90e9SAlan Cox /* 9533407fefeSKonstantin Belousov * The first test for PGA_REFERENCED is an 954c46b90e9SAlan Cox * optimization. The second test is 955c46b90e9SAlan Cox * required because a concurrent pmap 956c46b90e9SAlan Cox * operation could clear the last reference 9573407fefeSKonstantin Belousov * and set PGA_REFERENCED before the call to 958c46b90e9SAlan Cox * pmap_is_referenced(). 959c46b90e9SAlan Cox */ 9603407fefeSKonstantin Belousov if ((m->aflags & PGA_REFERENCED) != 0 || 961c46b90e9SAlan Cox pmap_is_referenced(m) || 9623407fefeSKonstantin Belousov (m->aflags & PGA_REFERENCED) != 0) 963867a482dSJohn Dyson mincoreinfo |= MINCORE_REFERENCED_OTHER; 9649b5a5d81SJohn Dyson } 965567e51e1SAlan Cox if (object != NULL) 96689f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 967867a482dSJohn Dyson 968867a482dSJohn Dyson /* 969dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 970dd2622a8SAlan Cox * the map, we release the lock. 971dd2622a8SAlan Cox */ 972dd2622a8SAlan Cox vm_map_unlock_read(map); 973dd2622a8SAlan Cox 974dd2622a8SAlan Cox /* 975867a482dSJohn Dyson * calculate index into user supplied byte vector 976867a482dSJohn Dyson */ 977867a482dSJohn Dyson vecindex = OFF_TO_IDX(addr - first_addr); 978867a482dSJohn Dyson 979867a482dSJohn Dyson /* 980867a482dSJohn Dyson * If we have skipped map entries, we need to make sure that 981867a482dSJohn Dyson * the byte vector is zeroed for those skipped entries. 982867a482dSJohn Dyson */ 983867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 9846a87d217SJohn Baldwin ++lastvecindex; 985867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 986867a482dSJohn Dyson if (error) { 987d2c60af8SMatthew Dillon error = EFAULT; 988d2c60af8SMatthew Dillon goto done2; 989867a482dSJohn Dyson } 990867a482dSJohn Dyson } 991867a482dSJohn Dyson 992867a482dSJohn Dyson /* 993867a482dSJohn Dyson * Pass the page information to the user 994867a482dSJohn Dyson */ 995867a482dSJohn Dyson error = subyte(vec + vecindex, mincoreinfo); 996867a482dSJohn Dyson if (error) { 997d2c60af8SMatthew Dillon error = EFAULT; 998d2c60af8SMatthew Dillon goto done2; 999867a482dSJohn Dyson } 1000dd2622a8SAlan Cox 1001dd2622a8SAlan Cox /* 1002dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 1003dd2622a8SAlan Cox * output may be invalid. 1004dd2622a8SAlan Cox */ 1005dd2622a8SAlan Cox vm_map_lock_read(map); 1006dd2622a8SAlan Cox if (timestamp != map->timestamp) 1007dd2622a8SAlan Cox goto RestartScan; 1008dd2622a8SAlan Cox 1009867a482dSJohn Dyson lastvecindex = vecindex; 101002c04a2fSJohn Dyson addr += PAGE_SIZE; 101102c04a2fSJohn Dyson } 1012867a482dSJohn Dyson } 1013867a482dSJohn Dyson 1014867a482dSJohn Dyson /* 1015dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 1016dd2622a8SAlan Cox * the map, we release the lock. 1017dd2622a8SAlan Cox */ 1018dd2622a8SAlan Cox vm_map_unlock_read(map); 1019dd2622a8SAlan Cox 1020dd2622a8SAlan Cox /* 1021867a482dSJohn Dyson * Zero the last entries in the byte vector. 1022867a482dSJohn Dyson */ 1023867a482dSJohn Dyson vecindex = OFF_TO_IDX(end - first_addr); 1024867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 10256a87d217SJohn Baldwin ++lastvecindex; 1026867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 1027867a482dSJohn Dyson if (error) { 1028d2c60af8SMatthew Dillon error = EFAULT; 1029d2c60af8SMatthew Dillon goto done2; 1030867a482dSJohn Dyson } 1031867a482dSJohn Dyson } 1032867a482dSJohn Dyson 1033dd2622a8SAlan Cox /* 1034dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 1035dd2622a8SAlan Cox * output may be invalid. 1036dd2622a8SAlan Cox */ 1037dd2622a8SAlan Cox vm_map_lock_read(map); 1038dd2622a8SAlan Cox if (timestamp != map->timestamp) 1039dd2622a8SAlan Cox goto RestartScan; 1040eff50fcdSAlan Cox vm_map_unlock_read(map); 1041d2c60af8SMatthew Dillon done2: 1042d2c60af8SMatthew Dillon return (error); 1043df8bae1dSRodney W. Grimes } 1044df8bae1dSRodney W. Grimes 1045d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 1046df8bae1dSRodney W. Grimes struct mlock_args { 1047651bb817SAlexander Langer const void *addr; 1048df8bae1dSRodney W. Grimes size_t len; 1049df8bae1dSRodney W. Grimes }; 1050d2d3e875SBruce Evans #endif 1051d2c60af8SMatthew Dillon /* 1052d2c60af8SMatthew Dillon * MPSAFE 1053d2c60af8SMatthew Dillon */ 1054df8bae1dSRodney W. Grimes int 10558451d0ddSKip Macy sys_mlock(td, uap) 1056b40ce416SJulian Elischer struct thread *td; 1057df8bae1dSRodney W. Grimes struct mlock_args *uap; 1058df8bae1dSRodney W. Grimes { 1059995d7069SGleb Smirnoff 1060995d7069SGleb Smirnoff return (vm_mlock(td->td_proc, td->td_ucred, uap->addr, uap->len)); 1061995d7069SGleb Smirnoff } 1062995d7069SGleb Smirnoff 1063995d7069SGleb Smirnoff int 1064995d7069SGleb Smirnoff vm_mlock(struct proc *proc, struct ucred *cred, const void *addr0, size_t len) 1065995d7069SGleb Smirnoff { 1066bb734798SDon Lewis vm_offset_t addr, end, last, start; 1067bb734798SDon Lewis vm_size_t npages, size; 10683ac7d297SAndrey Zonov vm_map_t map; 10691ba5ad42SEdward Tomasz Napierala unsigned long nsize; 1070bb734798SDon Lewis int error; 1071df8bae1dSRodney W. Grimes 1072995d7069SGleb Smirnoff error = priv_check_cred(cred, PRIV_VM_MLOCK, 0); 107347934cefSDon Lewis if (error) 107447934cefSDon Lewis return (error); 1075995d7069SGleb Smirnoff addr = (vm_offset_t)addr0; 1076995d7069SGleb Smirnoff size = len; 1077bb734798SDon Lewis last = addr + size; 107816929939SDon Lewis start = trunc_page(addr); 1079bb734798SDon Lewis end = round_page(last); 1080bb734798SDon Lewis if (last < addr || end < addr) 1081df8bae1dSRodney W. Grimes return (EINVAL); 108216929939SDon Lewis npages = atop(end - start); 108316929939SDon Lewis if (npages > vm_page_max_wired) 108416929939SDon Lewis return (ENOMEM); 10853ac7d297SAndrey Zonov map = &proc->p_vmspace->vm_map; 108647934cefSDon Lewis PROC_LOCK(proc); 10873ac7d297SAndrey Zonov nsize = ptoa(npages + pmap_wired_count(map->pmap)); 10881ba5ad42SEdward Tomasz Napierala if (nsize > lim_cur(proc, RLIMIT_MEMLOCK)) { 108947934cefSDon Lewis PROC_UNLOCK(proc); 10904a40e3d4SJohn Dyson return (ENOMEM); 109191d5354aSJohn Baldwin } 109247934cefSDon Lewis PROC_UNLOCK(proc); 10932feb50bfSAttilio Rao if (npages + cnt.v_wire_count > vm_page_max_wired) 109416929939SDon Lewis return (EAGAIN); 1095afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10961ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 10971ba5ad42SEdward Tomasz Napierala error = racct_set(proc, RACCT_MEMLOCK, nsize); 10981ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 10991ba5ad42SEdward Tomasz Napierala if (error != 0) 11001ba5ad42SEdward Tomasz Napierala return (ENOMEM); 1101afcc55f3SEdward Tomasz Napierala #endif 11023ac7d297SAndrey Zonov error = vm_map_wire(map, start, end, 110316929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1104afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11051ba5ad42SEdward Tomasz Napierala if (error != KERN_SUCCESS) { 11061ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 11071ba5ad42SEdward Tomasz Napierala racct_set(proc, RACCT_MEMLOCK, 11083ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 11091ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 11101ba5ad42SEdward Tomasz Napierala } 1111afcc55f3SEdward Tomasz Napierala #endif 1112df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1113df8bae1dSRodney W. Grimes } 1114df8bae1dSRodney W. Grimes 1115d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 11164a40e3d4SJohn Dyson struct mlockall_args { 11174a40e3d4SJohn Dyson int how; 11184a40e3d4SJohn Dyson }; 11194a40e3d4SJohn Dyson #endif 11204a40e3d4SJohn Dyson 1121d2c60af8SMatthew Dillon /* 1122d2c60af8SMatthew Dillon * MPSAFE 1123d2c60af8SMatthew Dillon */ 11244a40e3d4SJohn Dyson int 11258451d0ddSKip Macy sys_mlockall(td, uap) 1126b40ce416SJulian Elischer struct thread *td; 11274a40e3d4SJohn Dyson struct mlockall_args *uap; 11284a40e3d4SJohn Dyson { 1129abd498aaSBruce M Simpson vm_map_t map; 1130abd498aaSBruce M Simpson int error; 1131abd498aaSBruce M Simpson 1132abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 11337e19eda4SAndrey Zonov error = priv_check(td, PRIV_VM_MLOCK); 11347e19eda4SAndrey Zonov if (error) 11357e19eda4SAndrey Zonov return (error); 1136abd498aaSBruce M Simpson 1137abd498aaSBruce M Simpson if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1138abd498aaSBruce M Simpson return (EINVAL); 1139abd498aaSBruce M Simpson 1140abd498aaSBruce M Simpson /* 1141abd498aaSBruce M Simpson * If wiring all pages in the process would cause it to exceed 1142abd498aaSBruce M Simpson * a hard resource limit, return ENOMEM. 1143abd498aaSBruce M Simpson */ 11447e19eda4SAndrey Zonov if (!old_mlock && uap->how & MCL_CURRENT) { 114591d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 1146fd6f4ffbSEdward Tomasz Napierala if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { 114791d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1148abd498aaSBruce M Simpson return (ENOMEM); 114991d5354aSJohn Baldwin } 115091d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 11517e19eda4SAndrey Zonov } 1152afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11531ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11541ba5ad42SEdward Tomasz Napierala error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 11551ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11561ba5ad42SEdward Tomasz Napierala if (error != 0) 11571ba5ad42SEdward Tomasz Napierala return (ENOMEM); 1158afcc55f3SEdward Tomasz Napierala #endif 1159abd498aaSBruce M Simpson 1160abd498aaSBruce M Simpson if (uap->how & MCL_FUTURE) { 1161abd498aaSBruce M Simpson vm_map_lock(map); 1162abd498aaSBruce M Simpson vm_map_modflags(map, MAP_WIREFUTURE, 0); 1163abd498aaSBruce M Simpson vm_map_unlock(map); 1164abd498aaSBruce M Simpson error = 0; 1165abd498aaSBruce M Simpson } 1166abd498aaSBruce M Simpson 1167abd498aaSBruce M Simpson if (uap->how & MCL_CURRENT) { 1168abd498aaSBruce M Simpson /* 1169abd498aaSBruce M Simpson * P1003.1-2001 mandates that all currently mapped pages 1170abd498aaSBruce M Simpson * will be memory resident and locked (wired) upon return 1171abd498aaSBruce M Simpson * from mlockall(). vm_map_wire() will wire pages, by 1172abd498aaSBruce M Simpson * calling vm_fault_wire() for each page in the region. 1173abd498aaSBruce M Simpson */ 1174abd498aaSBruce M Simpson error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1175abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1176abd498aaSBruce M Simpson error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1177abd498aaSBruce M Simpson } 1178afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11791ba5ad42SEdward Tomasz Napierala if (error != KERN_SUCCESS) { 11801ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11811ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 11823ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 11831ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11841ba5ad42SEdward Tomasz Napierala } 1185afcc55f3SEdward Tomasz Napierala #endif 1186abd498aaSBruce M Simpson 1187abd498aaSBruce M Simpson return (error); 11884a40e3d4SJohn Dyson } 11894a40e3d4SJohn Dyson 11904a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1191fa721254SAlfred Perlstein struct munlockall_args { 1192abd498aaSBruce M Simpson register_t dummy; 11934a40e3d4SJohn Dyson }; 11944a40e3d4SJohn Dyson #endif 11954a40e3d4SJohn Dyson 1196d2c60af8SMatthew Dillon /* 1197d2c60af8SMatthew Dillon * MPSAFE 1198d2c60af8SMatthew Dillon */ 11994a40e3d4SJohn Dyson int 12008451d0ddSKip Macy sys_munlockall(td, uap) 1201b40ce416SJulian Elischer struct thread *td; 12024a40e3d4SJohn Dyson struct munlockall_args *uap; 12034a40e3d4SJohn Dyson { 1204abd498aaSBruce M Simpson vm_map_t map; 1205abd498aaSBruce M Simpson int error; 1206abd498aaSBruce M Simpson 1207abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 1208acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 1209abd498aaSBruce M Simpson if (error) 1210abd498aaSBruce M Simpson return (error); 1211abd498aaSBruce M Simpson 1212abd498aaSBruce M Simpson /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1213abd498aaSBruce M Simpson vm_map_lock(map); 1214abd498aaSBruce M Simpson vm_map_modflags(map, 0, MAP_WIREFUTURE); 1215abd498aaSBruce M Simpson vm_map_unlock(map); 1216abd498aaSBruce M Simpson 1217abd498aaSBruce M Simpson /* Forcibly unwire all pages. */ 1218abd498aaSBruce M Simpson error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1219abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1220afcc55f3SEdward Tomasz Napierala #ifdef RACCT 12211ba5ad42SEdward Tomasz Napierala if (error == KERN_SUCCESS) { 12221ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 12231ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 0); 12241ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 12251ba5ad42SEdward Tomasz Napierala } 1226afcc55f3SEdward Tomasz Napierala #endif 1227abd498aaSBruce M Simpson 1228abd498aaSBruce M Simpson return (error); 12294a40e3d4SJohn Dyson } 12304a40e3d4SJohn Dyson 12314a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1232df8bae1dSRodney W. Grimes struct munlock_args { 1233651bb817SAlexander Langer const void *addr; 1234df8bae1dSRodney W. Grimes size_t len; 1235df8bae1dSRodney W. Grimes }; 1236d2d3e875SBruce Evans #endif 1237d2c60af8SMatthew Dillon /* 1238d2c60af8SMatthew Dillon * MPSAFE 1239d2c60af8SMatthew Dillon */ 1240df8bae1dSRodney W. Grimes int 12418451d0ddSKip Macy sys_munlock(td, uap) 1242b40ce416SJulian Elischer struct thread *td; 1243df8bae1dSRodney W. Grimes struct munlock_args *uap; 1244df8bae1dSRodney W. Grimes { 1245bb734798SDon Lewis vm_offset_t addr, end, last, start; 124616929939SDon Lewis vm_size_t size; 1247fc2b1679SJeremie Le Hen #ifdef RACCT 1248c92b5069SJeremie Le Hen vm_map_t map; 1249fc2b1679SJeremie Le Hen #endif 1250df8bae1dSRodney W. Grimes int error; 1251df8bae1dSRodney W. Grimes 1252acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 125347934cefSDon Lewis if (error) 125447934cefSDon Lewis return (error); 125516929939SDon Lewis addr = (vm_offset_t)uap->addr; 125616929939SDon Lewis size = uap->len; 1257bb734798SDon Lewis last = addr + size; 125816929939SDon Lewis start = trunc_page(addr); 1259bb734798SDon Lewis end = round_page(last); 1260bb734798SDon Lewis if (last < addr || end < addr) 1261df8bae1dSRodney W. Grimes return (EINVAL); 126216929939SDon Lewis error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 126316929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1264afcc55f3SEdward Tomasz Napierala #ifdef RACCT 12651ba5ad42SEdward Tomasz Napierala if (error == KERN_SUCCESS) { 12661ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 1267c92b5069SJeremie Le Hen map = &td->td_proc->p_vmspace->vm_map; 1268c92b5069SJeremie Le Hen racct_set(td->td_proc, RACCT_MEMLOCK, 1269c92b5069SJeremie Le Hen ptoa(pmap_wired_count(map->pmap))); 12701ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 12711ba5ad42SEdward Tomasz Napierala } 1272afcc55f3SEdward Tomasz Napierala #endif 1273df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1274df8bae1dSRodney W. Grimes } 1275df8bae1dSRodney W. Grimes 1276df8bae1dSRodney W. Grimes /* 1277c8daea13SAlexander Kabaev * vm_mmap_vnode() 1278c8daea13SAlexander Kabaev * 1279c8daea13SAlexander Kabaev * Helper function for vm_mmap. Perform sanity check specific for mmap 1280c8daea13SAlexander Kabaev * operations on vnodes. 128184110e7eSKonstantin Belousov * 128284110e7eSKonstantin Belousov * For VCHR vnodes, the vnode lock is held over the call to 128384110e7eSKonstantin Belousov * vm_mmap_cdev() to keep vp->v_rdev valid. 1284c8daea13SAlexander Kabaev */ 1285c8daea13SAlexander Kabaev int 1286c8daea13SAlexander Kabaev vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1287c8daea13SAlexander Kabaev vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 128884110e7eSKonstantin Belousov struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 128984110e7eSKonstantin Belousov boolean_t *writecounted) 1290c8daea13SAlexander Kabaev { 1291c8daea13SAlexander Kabaev struct vattr va; 1292c8daea13SAlexander Kabaev vm_object_t obj; 129364345f0bSJohn Baldwin vm_offset_t foff; 1294ae51ff11SJeff Roberson struct mount *mp; 12950359a12eSAttilio Rao struct ucred *cred; 12965050aa86SKonstantin Belousov int error, flags, locktype; 1297c8daea13SAlexander Kabaev 1298ae51ff11SJeff Roberson mp = vp->v_mount; 12990359a12eSAttilio Rao cred = td->td_ucred; 130084110e7eSKonstantin Belousov if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) 130184110e7eSKonstantin Belousov locktype = LK_EXCLUSIVE; 130284110e7eSKonstantin Belousov else 130384110e7eSKonstantin Belousov locktype = LK_SHARED; 13045050aa86SKonstantin Belousov if ((error = vget(vp, locktype, td)) != 0) 1305c8daea13SAlexander Kabaev return (error); 130664345f0bSJohn Baldwin foff = *foffp; 1307c8daea13SAlexander Kabaev flags = *flagsp; 13088516dd18SPoul-Henning Kamp obj = vp->v_object; 1309c8daea13SAlexander Kabaev if (vp->v_type == VREG) { 1310c8daea13SAlexander Kabaev /* 1311c8daea13SAlexander Kabaev * Get the proper underlying object 1312c8daea13SAlexander Kabaev */ 13138516dd18SPoul-Henning Kamp if (obj == NULL) { 1314c8daea13SAlexander Kabaev error = EINVAL; 1315c8daea13SAlexander Kabaev goto done; 1316c8daea13SAlexander Kabaev } 1317e5f299ffSKonstantin Belousov if (obj->type == OBJT_VNODE && obj->handle != vp) { 1318c8daea13SAlexander Kabaev vput(vp); 1319c8daea13SAlexander Kabaev vp = (struct vnode *)obj->handle; 132084110e7eSKonstantin Belousov /* 132184110e7eSKonstantin Belousov * Bypass filesystems obey the mpsafety of the 132253f5f8a0SKonstantin Belousov * underlying fs. Tmpfs never bypasses. 132384110e7eSKonstantin Belousov */ 132484110e7eSKonstantin Belousov error = vget(vp, locktype, td); 13255050aa86SKonstantin Belousov if (error != 0) 132684110e7eSKonstantin Belousov return (error); 132784110e7eSKonstantin Belousov } 132884110e7eSKonstantin Belousov if (locktype == LK_EXCLUSIVE) { 132984110e7eSKonstantin Belousov *writecounted = TRUE; 133084110e7eSKonstantin Belousov vnode_pager_update_writecount(obj, 0, objsize); 133184110e7eSKonstantin Belousov } 1332c8daea13SAlexander Kabaev } else if (vp->v_type == VCHR) { 133364345f0bSJohn Baldwin error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp, 133464345f0bSJohn Baldwin vp->v_rdev, foffp, objp); 133564345f0bSJohn Baldwin if (error == 0) 133664345f0bSJohn Baldwin goto mark_atime; 133791a35e78SKonstantin Belousov goto done; 1338c8daea13SAlexander Kabaev } else { 1339c8daea13SAlexander Kabaev error = EINVAL; 1340c8daea13SAlexander Kabaev goto done; 1341c8daea13SAlexander Kabaev } 13420359a12eSAttilio Rao if ((error = VOP_GETATTR(vp, &va, cred))) 1343c8daea13SAlexander Kabaev goto done; 1344c92163dcSChristian S.J. Peron #ifdef MAC 13450359a12eSAttilio Rao error = mac_vnode_check_mmap(cred, vp, prot, flags); 1346c92163dcSChristian S.J. Peron if (error != 0) 1347c92163dcSChristian S.J. Peron goto done; 1348c92163dcSChristian S.J. Peron #endif 1349c8daea13SAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 1350c8daea13SAlexander Kabaev if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1351c8daea13SAlexander Kabaev if (prot & PROT_WRITE) { 1352c8daea13SAlexander Kabaev error = EPERM; 1353c8daea13SAlexander Kabaev goto done; 1354c8daea13SAlexander Kabaev } 1355c8daea13SAlexander Kabaev *maxprotp &= ~VM_PROT_WRITE; 1356c8daea13SAlexander Kabaev } 1357c8daea13SAlexander Kabaev } 1358c8daea13SAlexander Kabaev /* 1359c8daea13SAlexander Kabaev * If it is a regular file without any references 1360c8daea13SAlexander Kabaev * we do not need to sync it. 1361c8daea13SAlexander Kabaev * Adjust object size to be the size of actual file. 1362c8daea13SAlexander Kabaev */ 1363c8daea13SAlexander Kabaev objsize = round_page(va.va_size); 1364c8daea13SAlexander Kabaev if (va.va_nlink == 0) 1365c8daea13SAlexander Kabaev flags |= MAP_NOSYNC; 1366e5f299ffSKonstantin Belousov if (obj->type == OBJT_VNODE) 1367e5f299ffSKonstantin Belousov obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, 1368e5f299ffSKonstantin Belousov cred); 1369e5f299ffSKonstantin Belousov else { 1370e5f299ffSKonstantin Belousov KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP, 1371e5f299ffSKonstantin Belousov ("wrong object type")); 1372e5f299ffSKonstantin Belousov vm_object_reference(obj); 1373e5f299ffSKonstantin Belousov } 1374c8daea13SAlexander Kabaev if (obj == NULL) { 137564345f0bSJohn Baldwin error = ENOMEM; 1376c8daea13SAlexander Kabaev goto done; 1377c8daea13SAlexander Kabaev } 1378c8daea13SAlexander Kabaev *objp = obj; 1379c8daea13SAlexander Kabaev *flagsp = flags; 138064345f0bSJohn Baldwin 138164345f0bSJohn Baldwin mark_atime: 13820359a12eSAttilio Rao vfs_mark_atime(vp, cred); 13831e309003SDiomidis Spinellis 1384c8daea13SAlexander Kabaev done: 1385bafa6cfcSKonstantin Belousov if (error != 0 && *writecounted) { 1386bafa6cfcSKonstantin Belousov *writecounted = FALSE; 1387bafa6cfcSKonstantin Belousov vnode_pager_update_writecount(obj, objsize, 0); 1388bafa6cfcSKonstantin Belousov } 1389c8daea13SAlexander Kabaev vput(vp); 1390c8daea13SAlexander Kabaev return (error); 1391c8daea13SAlexander Kabaev } 1392c8daea13SAlexander Kabaev 1393c8daea13SAlexander Kabaev /* 139498df9218SJohn Baldwin * vm_mmap_cdev() 139598df9218SJohn Baldwin * 139698df9218SJohn Baldwin * MPSAFE 139798df9218SJohn Baldwin * 139898df9218SJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 139998df9218SJohn Baldwin * operations on cdevs. 140098df9218SJohn Baldwin */ 140198df9218SJohn Baldwin int 140298df9218SJohn Baldwin vm_mmap_cdev(struct thread *td, vm_size_t objsize, 140398df9218SJohn Baldwin vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 140464345f0bSJohn Baldwin struct cdev *cdev, vm_ooffset_t *foff, vm_object_t *objp) 140598df9218SJohn Baldwin { 140698df9218SJohn Baldwin vm_object_t obj; 140791a35e78SKonstantin Belousov struct cdevsw *dsw; 14083979450bSKonstantin Belousov int error, flags, ref; 140998df9218SJohn Baldwin 141098df9218SJohn Baldwin flags = *flagsp; 141198df9218SJohn Baldwin 14123979450bSKonstantin Belousov dsw = dev_refthread(cdev, &ref); 141391a35e78SKonstantin Belousov if (dsw == NULL) 141491a35e78SKonstantin Belousov return (ENXIO); 141591a35e78SKonstantin Belousov if (dsw->d_flags & D_MMAP_ANON) { 14163979450bSKonstantin Belousov dev_relthread(cdev, ref); 141798df9218SJohn Baldwin *maxprotp = VM_PROT_ALL; 141898df9218SJohn Baldwin *flagsp |= MAP_ANON; 141998df9218SJohn Baldwin return (0); 142098df9218SJohn Baldwin } 142198df9218SJohn Baldwin /* 142264345f0bSJohn Baldwin * cdevs do not provide private mappings of any kind. 142398df9218SJohn Baldwin */ 142498df9218SJohn Baldwin if ((*maxprotp & VM_PROT_WRITE) == 0 && 142564345f0bSJohn Baldwin (prot & PROT_WRITE) != 0) { 14263979450bSKonstantin Belousov dev_relthread(cdev, ref); 142798df9218SJohn Baldwin return (EACCES); 142864345f0bSJohn Baldwin } 142964345f0bSJohn Baldwin if (flags & (MAP_PRIVATE|MAP_COPY)) { 14303979450bSKonstantin Belousov dev_relthread(cdev, ref); 143198df9218SJohn Baldwin return (EINVAL); 143264345f0bSJohn Baldwin } 143398df9218SJohn Baldwin /* 143498df9218SJohn Baldwin * Force device mappings to be shared. 143598df9218SJohn Baldwin */ 143698df9218SJohn Baldwin flags |= MAP_SHARED; 143798df9218SJohn Baldwin #ifdef MAC_XXX 143864345f0bSJohn Baldwin error = mac_cdev_check_mmap(td->td_ucred, cdev, prot); 143964345f0bSJohn Baldwin if (error != 0) { 14403979450bSKonstantin Belousov dev_relthread(cdev, ref); 144198df9218SJohn Baldwin return (error); 144264345f0bSJohn Baldwin } 144398df9218SJohn Baldwin #endif 144464345f0bSJohn Baldwin /* 144564345f0bSJohn Baldwin * First, try d_mmap_single(). If that is not implemented 144664345f0bSJohn Baldwin * (returns ENODEV), fall back to using the device pager. 144764345f0bSJohn Baldwin * Note that d_mmap_single() must return a reference to the 144864345f0bSJohn Baldwin * object (it needs to bump the reference count of the object 144964345f0bSJohn Baldwin * it returns somehow). 145064345f0bSJohn Baldwin * 145164345f0bSJohn Baldwin * XXX assumes VM_PROT_* == PROT_* 145264345f0bSJohn Baldwin */ 145364345f0bSJohn Baldwin error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 14543979450bSKonstantin Belousov dev_relthread(cdev, ref); 145564345f0bSJohn Baldwin if (error != ENODEV) 145664345f0bSJohn Baldwin return (error); 14573364c323SKonstantin Belousov obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 14583364c323SKonstantin Belousov td->td_ucred); 145998df9218SJohn Baldwin if (obj == NULL) 146098df9218SJohn Baldwin return (EINVAL); 146198df9218SJohn Baldwin *objp = obj; 146298df9218SJohn Baldwin *flagsp = flags; 146398df9218SJohn Baldwin return (0); 146498df9218SJohn Baldwin } 146598df9218SJohn Baldwin 146698df9218SJohn Baldwin /* 14678e38aeffSJohn Baldwin * vm_mmap_shm() 14688e38aeffSJohn Baldwin * 14698e38aeffSJohn Baldwin * MPSAFE 14708e38aeffSJohn Baldwin * 14718e38aeffSJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 14728e38aeffSJohn Baldwin * operations on shm file descriptors. 14738e38aeffSJohn Baldwin */ 14748e38aeffSJohn Baldwin int 14758e38aeffSJohn Baldwin vm_mmap_shm(struct thread *td, vm_size_t objsize, 14768e38aeffSJohn Baldwin vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 14778e38aeffSJohn Baldwin struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp) 14788e38aeffSJohn Baldwin { 14798e38aeffSJohn Baldwin int error; 14808e38aeffSJohn Baldwin 1481da048309SAlan Cox if ((*flagsp & MAP_SHARED) != 0 && 1482da048309SAlan Cox (*maxprotp & VM_PROT_WRITE) == 0 && 14838e38aeffSJohn Baldwin (prot & PROT_WRITE) != 0) 14848e38aeffSJohn Baldwin return (EACCES); 14858e38aeffSJohn Baldwin #ifdef MAC 14868e38aeffSJohn Baldwin error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp); 14878e38aeffSJohn Baldwin if (error != 0) 14888e38aeffSJohn Baldwin return (error); 14898e38aeffSJohn Baldwin #endif 14908e38aeffSJohn Baldwin error = shm_mmap(shmfd, objsize, foff, objp); 14918e38aeffSJohn Baldwin if (error) 14928e38aeffSJohn Baldwin return (error); 14938e38aeffSJohn Baldwin return (0); 14948e38aeffSJohn Baldwin } 14958e38aeffSJohn Baldwin 14968e38aeffSJohn Baldwin /* 1497d2c60af8SMatthew Dillon * vm_mmap() 1498d2c60af8SMatthew Dillon * 1499d2c60af8SMatthew Dillon * MPSAFE 1500d2c60af8SMatthew Dillon * 1501d2c60af8SMatthew Dillon * Internal version of mmap. Currently used by mmap, exec, and sys5 1502d2c60af8SMatthew Dillon * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. 1503df8bae1dSRodney W. Grimes */ 1504df8bae1dSRodney W. Grimes int 1505b9dcd593SBruce Evans vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1506b9dcd593SBruce Evans vm_prot_t maxprot, int flags, 150798df9218SJohn Baldwin objtype_t handle_type, void *handle, 1508b9dcd593SBruce Evans vm_ooffset_t foff) 1509df8bae1dSRodney W. Grimes { 1510df8bae1dSRodney W. Grimes boolean_t fitit; 15116bda842dSMatt Jacob vm_object_t object = NULL; 1512b40ce416SJulian Elischer struct thread *td = curthread; 15135aa60b6fSJohn Baldwin int docow, error, findspace, rv; 151484110e7eSKonstantin Belousov boolean_t writecounted; 1515df8bae1dSRodney W. Grimes 1516df8bae1dSRodney W. Grimes if (size == 0) 1517df8bae1dSRodney W. Grimes return (0); 1518df8bae1dSRodney W. Grimes 1519749474f2SPeter Wemm size = round_page(size); 1520df8bae1dSRodney W. Grimes 1521a6492969SAlan Cox if (map == &td->td_proc->p_vmspace->vm_map) { 152291d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 1523a6492969SAlan Cox if (map->size + size > lim_cur(td->td_proc, RLIMIT_VMEM)) { 152491d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1525070f64feSMatthew Dillon return (ENOMEM); 1526070f64feSMatthew Dillon } 1527a6492969SAlan Cox if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 15281ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 15291ba5ad42SEdward Tomasz Napierala return (ENOMEM); 15301ba5ad42SEdward Tomasz Napierala } 15317e19eda4SAndrey Zonov if (!old_mlock && map->flags & MAP_WIREFUTURE) { 15323ac7d297SAndrey Zonov if (ptoa(pmap_wired_count(map->pmap)) + size > 15333ac7d297SAndrey Zonov lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { 15347e19eda4SAndrey Zonov racct_set_force(td->td_proc, RACCT_VMEM, 15357e19eda4SAndrey Zonov map->size); 15367e19eda4SAndrey Zonov PROC_UNLOCK(td->td_proc); 15377e19eda4SAndrey Zonov return (ENOMEM); 15387e19eda4SAndrey Zonov } 15397e19eda4SAndrey Zonov error = racct_set(td->td_proc, RACCT_MEMLOCK, 15403ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap)) + size); 15417e19eda4SAndrey Zonov if (error != 0) { 15427e19eda4SAndrey Zonov racct_set_force(td->td_proc, RACCT_VMEM, 15437e19eda4SAndrey Zonov map->size); 15447e19eda4SAndrey Zonov PROC_UNLOCK(td->td_proc); 15457e19eda4SAndrey Zonov return (error); 15467e19eda4SAndrey Zonov } 15477e19eda4SAndrey Zonov } 154891d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1549a6492969SAlan Cox } 1550070f64feSMatthew Dillon 1551df8bae1dSRodney W. Grimes /* 1552bc9ad247SDavid Greenman * We currently can only deal with page aligned file offsets. 1553bc9ad247SDavid Greenman * The check is here rather than in the syscall because the 1554bc9ad247SDavid Greenman * kernel calls this function internally for other mmaping 1555bc9ad247SDavid Greenman * operations (such as in exec) and non-aligned offsets will 1556bc9ad247SDavid Greenman * cause pmap inconsistencies...so we want to be sure to 1557bc9ad247SDavid Greenman * disallow this in all cases. 1558bc9ad247SDavid Greenman */ 1559bc9ad247SDavid Greenman if (foff & PAGE_MASK) 1560bc9ad247SDavid Greenman return (EINVAL); 1561bc9ad247SDavid Greenman 156206cb7259SDavid Greenman if ((flags & MAP_FIXED) == 0) { 156306cb7259SDavid Greenman fitit = TRUE; 156406cb7259SDavid Greenman *addr = round_page(*addr); 156506cb7259SDavid Greenman } else { 156606cb7259SDavid Greenman if (*addr != trunc_page(*addr)) 156706cb7259SDavid Greenman return (EINVAL); 156806cb7259SDavid Greenman fitit = FALSE; 156906cb7259SDavid Greenman } 157084110e7eSKonstantin Belousov writecounted = FALSE; 157184110e7eSKonstantin Belousov 1572bc9ad247SDavid Greenman /* 157324a1cce3SDavid Greenman * Lookup/allocate object. 1574df8bae1dSRodney W. Grimes */ 157598df9218SJohn Baldwin switch (handle_type) { 157698df9218SJohn Baldwin case OBJT_DEVICE: 157798df9218SJohn Baldwin error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, 157864345f0bSJohn Baldwin handle, &foff, &object); 157998df9218SJohn Baldwin break; 158098df9218SJohn Baldwin case OBJT_VNODE: 1581c8daea13SAlexander Kabaev error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 158284110e7eSKonstantin Belousov handle, &foff, &object, &writecounted); 158398df9218SJohn Baldwin break; 15848e38aeffSJohn Baldwin case OBJT_SWAP: 15858e38aeffSJohn Baldwin error = vm_mmap_shm(td, size, prot, &maxprot, &flags, 15868e38aeffSJohn Baldwin handle, foff, &object); 15878e38aeffSJohn Baldwin break; 158898df9218SJohn Baldwin case OBJT_DEFAULT: 158998df9218SJohn Baldwin if (handle == NULL) { 159098df9218SJohn Baldwin error = 0; 159198df9218SJohn Baldwin break; 159298df9218SJohn Baldwin } 159398df9218SJohn Baldwin /* FALLTHROUGH */ 159498df9218SJohn Baldwin default: 159598df9218SJohn Baldwin error = EINVAL; 15966bda842dSMatt Jacob break; 159798df9218SJohn Baldwin } 159898df9218SJohn Baldwin if (error) 1599c8daea13SAlexander Kabaev return (error); 16005f55e841SDavid Greenman if (flags & MAP_ANON) { 1601c8daea13SAlexander Kabaev object = NULL; 1602c8daea13SAlexander Kabaev docow = 0; 16035f55e841SDavid Greenman /* 16045f55e841SDavid Greenman * Unnamed anonymous regions always start at 0. 16055f55e841SDavid Greenman */ 160667bf6868SJohn Dyson if (handle == 0) 16075f55e841SDavid Greenman foff = 0; 160874ffb9afSAlan Cox } else if (flags & MAP_PREFAULT_READ) 160974ffb9afSAlan Cox docow = MAP_PREFAULT; 161074ffb9afSAlan Cox else 16114738fa09SAlan Cox docow = MAP_PREFAULT_PARTIAL; 1612df8bae1dSRodney W. Grimes 16134f79d873SMatthew Dillon if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 16144738fa09SAlan Cox docow |= MAP_COPY_ON_WRITE; 16154f79d873SMatthew Dillon if (flags & MAP_NOSYNC) 16164f79d873SMatthew Dillon docow |= MAP_DISABLE_SYNCER; 16179730a5daSPaul Saab if (flags & MAP_NOCORE) 16189730a5daSPaul Saab docow |= MAP_DISABLE_COREDUMP; 16198211bd45SKonstantin Belousov /* Shared memory is also shared with children. */ 16208211bd45SKonstantin Belousov if (flags & MAP_SHARED) 16218211bd45SKonstantin Belousov docow |= MAP_INHERIT_SHARE; 162284110e7eSKonstantin Belousov if (writecounted) 162384110e7eSKonstantin Belousov docow |= MAP_VN_WRITECOUNT; 16245850152dSJohn Dyson 16252267af78SJulian Elischer if (flags & MAP_STACK) 1626fd75d710SMarcel Moolenaar rv = vm_map_stack(map, *addr, size, prot, maxprot, 1627fd75d710SMarcel Moolenaar docow | MAP_STACK_GROWS_DOWN); 16285aa60b6fSJohn Baldwin else if (fitit) { 16295aa60b6fSJohn Baldwin if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER) 16305aa60b6fSJohn Baldwin findspace = VMFS_SUPER_SPACE; 16315aa60b6fSJohn Baldwin else if ((flags & MAP_ALIGNMENT_MASK) != 0) 16325aa60b6fSJohn Baldwin findspace = VMFS_ALIGNED_SPACE(flags >> 16335aa60b6fSJohn Baldwin MAP_ALIGNMENT_SHIFT); 16342267af78SJulian Elischer else 16355aa60b6fSJohn Baldwin findspace = VMFS_OPTIMAL_SPACE; 1636edb572a3SJohn Baldwin rv = vm_map_find(map, object, foff, addr, size, 1637edb572a3SJohn Baldwin #ifdef MAP_32BIT 1638edb572a3SJohn Baldwin flags & MAP_32BIT ? MAP_32BIT_MAX_ADDR : 1639edb572a3SJohn Baldwin #endif 1640edb572a3SJohn Baldwin 0, findspace, prot, maxprot, docow); 16415aa60b6fSJohn Baldwin } else 1642b8ca4ef2SAlan Cox rv = vm_map_fixed(map, object, foff, *addr, size, 1643bd7e5f99SJohn Dyson prot, maxprot, docow); 1644bd7e5f99SJohn Dyson 1645f9230ad6SAlan Cox if (rv == KERN_SUCCESS) { 16467fb0c17eSDavid Greenman /* 1647f9230ad6SAlan Cox * If the process has requested that all future mappings 1648f9230ad6SAlan Cox * be wired, then heed this. 1649f9230ad6SAlan Cox */ 16501472f4f4SKonstantin Belousov if (map->flags & MAP_WIREFUTURE) { 1651f9230ad6SAlan Cox vm_map_wire(map, *addr, *addr + size, 16521472f4f4SKonstantin Belousov VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? 16531472f4f4SKonstantin Belousov VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); 16541472f4f4SKonstantin Belousov } 1655f9230ad6SAlan Cox } else { 1656f9230ad6SAlan Cox /* 165784110e7eSKonstantin Belousov * If this mapping was accounted for in the vnode's 165884110e7eSKonstantin Belousov * writecount, then undo that now. 16597fb0c17eSDavid Greenman */ 166084110e7eSKonstantin Belousov if (writecounted) 166184110e7eSKonstantin Belousov vnode_pager_release_writecount(object, 0, size); 1662f9230ad6SAlan Cox /* 1663f9230ad6SAlan Cox * Lose the object reference. Will destroy the 1664f9230ad6SAlan Cox * object if it's an unnamed anonymous mapping 1665f9230ad6SAlan Cox * or named anonymous without other references. 1666f9230ad6SAlan Cox */ 1667df8bae1dSRodney W. Grimes vm_object_deallocate(object); 1668df8bae1dSRodney W. Grimes } 16692e32165cSKonstantin Belousov return (vm_mmap_to_errno(rv)); 16702e32165cSKonstantin Belousov } 16712e32165cSKonstantin Belousov 1672f9230ad6SAlan Cox /* 1673f9230ad6SAlan Cox * Translate a Mach VM return code to zero on success or the appropriate errno 1674f9230ad6SAlan Cox * on failure. 1675f9230ad6SAlan Cox */ 16762e32165cSKonstantin Belousov int 16772e32165cSKonstantin Belousov vm_mmap_to_errno(int rv) 16782e32165cSKonstantin Belousov { 16792e32165cSKonstantin Belousov 1680df8bae1dSRodney W. Grimes switch (rv) { 1681df8bae1dSRodney W. Grimes case KERN_SUCCESS: 1682df8bae1dSRodney W. Grimes return (0); 1683df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 1684df8bae1dSRodney W. Grimes case KERN_NO_SPACE: 1685df8bae1dSRodney W. Grimes return (ENOMEM); 1686df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 1687df8bae1dSRodney W. Grimes return (EACCES); 1688df8bae1dSRodney W. Grimes default: 1689df8bae1dSRodney W. Grimes return (EINVAL); 1690df8bae1dSRodney W. Grimes } 1691df8bae1dSRodney W. Grimes } 1692