160727d8bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1988 University of Utah. 3df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 4df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 5df8bae1dSRodney W. Grimes * 6df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 7df8bae1dSRodney W. Grimes * the Systems Programming Group of the University of Utah Computer 8df8bae1dSRodney W. Grimes * Science Department. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 19df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 20df8bae1dSRodney W. Grimes * without specific prior written permission. 21df8bae1dSRodney W. Grimes * 22df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32df8bae1dSRodney W. Grimes * SUCH DAMAGE. 33df8bae1dSRodney W. Grimes * 34df8bae1dSRodney W. Grimes * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35df8bae1dSRodney W. Grimes * 36df8bae1dSRodney W. Grimes * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37df8bae1dSRodney W. Grimes */ 38df8bae1dSRodney W. Grimes 39df8bae1dSRodney W. Grimes /* 40df8bae1dSRodney W. Grimes * Mapped file (mmap) interface to VM 41df8bae1dSRodney W. Grimes */ 42df8bae1dSRodney W. Grimes 43874651b1SDavid E. O'Brien #include <sys/cdefs.h> 44874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 45874651b1SDavid E. O'Brien 465591b823SEivind Eklund #include "opt_compat.h" 4749874f6eSJoseph Koshy #include "opt_hwpmc_hooks.h" 48e9822d92SJoerg Wunsch 49df8bae1dSRodney W. Grimes #include <sys/param.h> 50df8bae1dSRodney W. Grimes #include <sys/systm.h> 51a9d2f8d8SRobert Watson #include <sys/capability.h> 52a9d2f8d8SRobert Watson #include <sys/kernel.h> 53fb919e4dSMark Murray #include <sys/lock.h> 5423955314SAlfred Perlstein #include <sys/mutex.h> 55d2d3e875SBruce Evans #include <sys/sysproto.h> 56df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 57acd3428bSRobert Watson #include <sys/priv.h> 58df8bae1dSRodney W. Grimes #include <sys/proc.h> 591ba5ad42SEdward Tomasz Napierala #include <sys/racct.h> 60070f64feSMatthew Dillon #include <sys/resource.h> 61070f64feSMatthew Dillon #include <sys/resourcevar.h> 62*7e19eda4SAndrey Zonov #include <sys/sysctl.h> 63df8bae1dSRodney W. Grimes #include <sys/vnode.h> 643ac4d1efSBruce Evans #include <sys/fcntl.h> 65df8bae1dSRodney W. Grimes #include <sys/file.h> 66df8bae1dSRodney W. Grimes #include <sys/mman.h> 67b483c7f6SGuido van Rooij #include <sys/mount.h> 68df8bae1dSRodney W. Grimes #include <sys/conf.h> 694183b6b6SPeter Wemm #include <sys/stat.h> 70497a8238SKonstantin Belousov #include <sys/sysent.h> 71efeaf95aSDavid Greenman #include <sys/vmmeter.h> 72df8bae1dSRodney W. Grimes 73aed55708SRobert Watson #include <security/mac/mac_framework.h> 74aed55708SRobert Watson 75df8bae1dSRodney W. Grimes #include <vm/vm.h> 76efeaf95aSDavid Greenman #include <vm/vm_param.h> 77efeaf95aSDavid Greenman #include <vm/pmap.h> 78efeaf95aSDavid Greenman #include <vm/vm_map.h> 79efeaf95aSDavid Greenman #include <vm/vm_object.h> 801c7c3c6aSMatthew Dillon #include <vm/vm_page.h> 81df8bae1dSRodney W. Grimes #include <vm/vm_pager.h> 82b5e8ce9fSBruce Evans #include <vm/vm_pageout.h> 83efeaf95aSDavid Greenman #include <vm/vm_extern.h> 84867a482dSJohn Dyson #include <vm/vm_page.h> 8584110e7eSKonstantin Belousov #include <vm/vnode_pager.h> 86df8bae1dSRodney W. Grimes 8749874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 8849874f6eSJoseph Koshy #include <sys/pmckern.h> 8949874f6eSJoseph Koshy #endif 9049874f6eSJoseph Koshy 91*7e19eda4SAndrey Zonov int old_mlock = 0; 92*7e19eda4SAndrey Zonov SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RW | CTLFLAG_TUN, &old_mlock, 0, 93*7e19eda4SAndrey Zonov "Do not apply RLIMIT_MEMLOCK on mlockall"); 94*7e19eda4SAndrey Zonov TUNABLE_INT("vm.old_mlock", &old_mlock); 95*7e19eda4SAndrey Zonov 96d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 97df8bae1dSRodney W. Grimes struct sbrk_args { 98df8bae1dSRodney W. Grimes int incr; 99df8bae1dSRodney W. Grimes }; 100d2d3e875SBruce Evans #endif 1010d94caffSDavid Greenman 102c8daea13SAlexander Kabaev static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 10384110e7eSKonstantin Belousov int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *); 10498df9218SJohn Baldwin static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 10564345f0bSJohn Baldwin int *, struct cdev *, vm_ooffset_t *, vm_object_t *); 1068e38aeffSJohn Baldwin static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 1078e38aeffSJohn Baldwin int *, struct shmfd *, vm_ooffset_t, vm_object_t *); 108c8daea13SAlexander Kabaev 109d2c60af8SMatthew Dillon /* 110d2c60af8SMatthew Dillon * MPSAFE 111d2c60af8SMatthew Dillon */ 112df8bae1dSRodney W. Grimes /* ARGSUSED */ 113df8bae1dSRodney W. Grimes int 1148451d0ddSKip Macy sys_sbrk(td, uap) 115b40ce416SJulian Elischer struct thread *td; 116df8bae1dSRodney W. Grimes struct sbrk_args *uap; 117df8bae1dSRodney W. Grimes { 118df8bae1dSRodney W. Grimes /* Not yet implemented */ 119df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 120df8bae1dSRodney W. Grimes } 121df8bae1dSRodney W. Grimes 122d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 123df8bae1dSRodney W. Grimes struct sstk_args { 124df8bae1dSRodney W. Grimes int incr; 125df8bae1dSRodney W. Grimes }; 126d2d3e875SBruce Evans #endif 1270d94caffSDavid Greenman 128d2c60af8SMatthew Dillon /* 129d2c60af8SMatthew Dillon * MPSAFE 130d2c60af8SMatthew Dillon */ 131df8bae1dSRodney W. Grimes /* ARGSUSED */ 132df8bae1dSRodney W. Grimes int 1338451d0ddSKip Macy sys_sstk(td, uap) 134b40ce416SJulian Elischer struct thread *td; 135df8bae1dSRodney W. Grimes struct sstk_args *uap; 136df8bae1dSRodney W. Grimes { 137df8bae1dSRodney W. Grimes /* Not yet implemented */ 138df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 139df8bae1dSRodney W. Grimes } 140df8bae1dSRodney W. Grimes 1411930e303SPoul-Henning Kamp #if defined(COMPAT_43) 142d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 143df8bae1dSRodney W. Grimes struct getpagesize_args { 144df8bae1dSRodney W. Grimes int dummy; 145df8bae1dSRodney W. Grimes }; 146d2d3e875SBruce Evans #endif 1470d94caffSDavid Greenman 148df8bae1dSRodney W. Grimes int 149b40ce416SJulian Elischer ogetpagesize(td, uap) 150b40ce416SJulian Elischer struct thread *td; 151df8bae1dSRodney W. Grimes struct getpagesize_args *uap; 152df8bae1dSRodney W. Grimes { 1530cddd8f0SMatthew Dillon /* MP SAFE */ 154b40ce416SJulian Elischer td->td_retval[0] = PAGE_SIZE; 155df8bae1dSRodney W. Grimes return (0); 156df8bae1dSRodney W. Grimes } 1571930e303SPoul-Henning Kamp #endif /* COMPAT_43 */ 158df8bae1dSRodney W. Grimes 15954f42e4bSPeter Wemm 16054f42e4bSPeter Wemm /* 16154f42e4bSPeter Wemm * Memory Map (mmap) system call. Note that the file offset 16254f42e4bSPeter Wemm * and address are allowed to be NOT page aligned, though if 16354f42e4bSPeter Wemm * the MAP_FIXED flag it set, both must have the same remainder 16454f42e4bSPeter Wemm * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 16554f42e4bSPeter Wemm * page-aligned, the actual mapping starts at trunc_page(addr) 16654f42e4bSPeter Wemm * and the return value is adjusted up by the page offset. 167b4309055SMatthew Dillon * 168b4309055SMatthew Dillon * Generally speaking, only character devices which are themselves 169b4309055SMatthew Dillon * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 170b4309055SMatthew Dillon * there would be no cache coherency between a descriptor and a VM mapping 171b4309055SMatthew Dillon * both to the same character device. 17254f42e4bSPeter Wemm */ 173d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 174df8bae1dSRodney W. Grimes struct mmap_args { 175651bb817SAlexander Langer void *addr; 176df8bae1dSRodney W. Grimes size_t len; 177df8bae1dSRodney W. Grimes int prot; 178df8bae1dSRodney W. Grimes int flags; 179df8bae1dSRodney W. Grimes int fd; 180df8bae1dSRodney W. Grimes long pad; 181df8bae1dSRodney W. Grimes off_t pos; 182df8bae1dSRodney W. Grimes }; 183d2d3e875SBruce Evans #endif 184df8bae1dSRodney W. Grimes 185d2c60af8SMatthew Dillon /* 186d2c60af8SMatthew Dillon * MPSAFE 187d2c60af8SMatthew Dillon */ 188df8bae1dSRodney W. Grimes int 1898451d0ddSKip Macy sys_mmap(td, uap) 190b40ce416SJulian Elischer struct thread *td; 19154d92145SMatthew Dillon struct mmap_args *uap; 192df8bae1dSRodney W. Grimes { 19349874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 19449874f6eSJoseph Koshy struct pmckern_map_in pkm; 19549874f6eSJoseph Koshy #endif 196c8daea13SAlexander Kabaev struct file *fp; 197df8bae1dSRodney W. Grimes struct vnode *vp; 198df8bae1dSRodney W. Grimes vm_offset_t addr; 1999154ee6aSPeter Wemm vm_size_t size, pageoff; 200a9d2f8d8SRobert Watson vm_prot_t cap_maxprot, prot, maxprot; 201651bb817SAlexander Langer void *handle; 20298df9218SJohn Baldwin objtype_t handle_type; 203df8bae1dSRodney W. Grimes int flags, error; 20454f42e4bSPeter Wemm off_t pos; 205b40ce416SJulian Elischer struct vmspace *vms = td->td_proc->p_vmspace; 206a9d2f8d8SRobert Watson cap_rights_t rights; 207df8bae1dSRodney W. Grimes 20854f42e4bSPeter Wemm addr = (vm_offset_t) uap->addr; 20954f42e4bSPeter Wemm size = uap->len; 210df8bae1dSRodney W. Grimes prot = uap->prot & VM_PROT_ALL; 211df8bae1dSRodney W. Grimes flags = uap->flags; 21254f42e4bSPeter Wemm pos = uap->pos; 21354f42e4bSPeter Wemm 214426da3bcSAlfred Perlstein fp = NULL; 21527bfa958SSimon L. B. Nielsen 2167707ccabSKonstantin Belousov /* 2177707ccabSKonstantin Belousov * Enforce the constraints. 2187707ccabSKonstantin Belousov * Mapping of length 0 is only allowed for old binaries. 2197707ccabSKonstantin Belousov * Anonymous mapping shall specify -1 as filedescriptor and 2207707ccabSKonstantin Belousov * zero position for new code. Be nice to ancient a.out 2217707ccabSKonstantin Belousov * binaries and correct pos for anonymous mapping, since old 2227707ccabSKonstantin Belousov * ld.so sometimes issues anonymous map requests with non-zero 2237707ccabSKonstantin Belousov * pos. 2247707ccabSKonstantin Belousov */ 2257707ccabSKonstantin Belousov if (!SV_CURPROC_FLAG(SV_AOUT)) { 2267707ccabSKonstantin Belousov if ((uap->len == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) || 2277707ccabSKonstantin Belousov ((flags & MAP_ANON) != 0 && (uap->fd != -1 || pos != 0))) 228df8bae1dSRodney W. Grimes return (EINVAL); 2297707ccabSKonstantin Belousov } else { 2307707ccabSKonstantin Belousov if ((flags & MAP_ANON) != 0) 2317707ccabSKonstantin Belousov pos = 0; 2327707ccabSKonstantin Belousov } 2339154ee6aSPeter Wemm 2342267af78SJulian Elischer if (flags & MAP_STACK) { 2352267af78SJulian Elischer if ((uap->fd != -1) || 2362267af78SJulian Elischer ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 2372267af78SJulian Elischer return (EINVAL); 2382267af78SJulian Elischer flags |= MAP_ANON; 2392267af78SJulian Elischer pos = 0; 2402907af2aSJulian Elischer } 2412907af2aSJulian Elischer 2429154ee6aSPeter Wemm /* 24354f42e4bSPeter Wemm * Align the file position to a page boundary, 24454f42e4bSPeter Wemm * and save its page offset component. 2459154ee6aSPeter Wemm */ 24654f42e4bSPeter Wemm pageoff = (pos & PAGE_MASK); 24754f42e4bSPeter Wemm pos -= pageoff; 24854f42e4bSPeter Wemm 24954f42e4bSPeter Wemm /* Adjust size for rounding (on both ends). */ 25054f42e4bSPeter Wemm size += pageoff; /* low end... */ 25154f42e4bSPeter Wemm size = (vm_size_t) round_page(size); /* hi end */ 2529154ee6aSPeter Wemm 253df8bae1dSRodney W. Grimes /* 2540d94caffSDavid Greenman * Check for illegal addresses. Watch out for address wrap... Note 2550d94caffSDavid Greenman * that VM_*_ADDRESS are not constants due to casts (argh). 256df8bae1dSRodney W. Grimes */ 257df8bae1dSRodney W. Grimes if (flags & MAP_FIXED) { 25854f42e4bSPeter Wemm /* 25954f42e4bSPeter Wemm * The specified address must have the same remainder 26054f42e4bSPeter Wemm * as the file offset taken modulo PAGE_SIZE, so it 26154f42e4bSPeter Wemm * should be aligned after adjustment by pageoff. 26254f42e4bSPeter Wemm */ 26354f42e4bSPeter Wemm addr -= pageoff; 26454f42e4bSPeter Wemm if (addr & PAGE_MASK) 26554f42e4bSPeter Wemm return (EINVAL); 26627bfa958SSimon L. B. Nielsen 26754f42e4bSPeter Wemm /* Address range must be all in user VM space. */ 26805ba50f5SJake Burkholder if (addr < vm_map_min(&vms->vm_map) || 26905ba50f5SJake Burkholder addr + size > vm_map_max(&vms->vm_map)) 270df8bae1dSRodney W. Grimes return (EINVAL); 271bbc0ec52SDavid Greenman if (addr + size < addr) 272df8bae1dSRodney W. Grimes return (EINVAL); 27391d5354aSJohn Baldwin } else { 274df8bae1dSRodney W. Grimes /* 27554f42e4bSPeter Wemm * XXX for non-fixed mappings where no hint is provided or 27654f42e4bSPeter Wemm * the hint would fall in the potential heap space, 27754f42e4bSPeter Wemm * place it after the end of the largest possible heap. 278df8bae1dSRodney W. Grimes * 27954f42e4bSPeter Wemm * There should really be a pmap call to determine a reasonable 28054f42e4bSPeter Wemm * location. 281df8bae1dSRodney W. Grimes */ 28291d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 28391d5354aSJohn Baldwin if (addr == 0 || 2841f6889a1SMatthew Dillon (addr >= round_page((vm_offset_t)vms->vm_taddr) && 285c460ac3aSPeter Wemm addr < round_page((vm_offset_t)vms->vm_daddr + 28691d5354aSJohn Baldwin lim_max(td->td_proc, RLIMIT_DATA)))) 287c460ac3aSPeter Wemm addr = round_page((vm_offset_t)vms->vm_daddr + 28891d5354aSJohn Baldwin lim_max(td->td_proc, RLIMIT_DATA)); 28991d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 29091d5354aSJohn Baldwin } 291df8bae1dSRodney W. Grimes if (flags & MAP_ANON) { 292df8bae1dSRodney W. Grimes /* 293df8bae1dSRodney W. Grimes * Mapping blank space is trivial. 294df8bae1dSRodney W. Grimes */ 295df8bae1dSRodney W. Grimes handle = NULL; 29698df9218SJohn Baldwin handle_type = OBJT_DEFAULT; 297df8bae1dSRodney W. Grimes maxprot = VM_PROT_ALL; 298a9d2f8d8SRobert Watson cap_maxprot = VM_PROT_ALL; 29930d4dd7eSAlexander Kabaev } else { 300df8bae1dSRodney W. Grimes /* 301a9d2f8d8SRobert Watson * Mapping file, get fp for validation and don't let the 302a9d2f8d8SRobert Watson * descriptor disappear on us if we block. Check capability 303a9d2f8d8SRobert Watson * rights, but also return the maximum rights to be combined 304a9d2f8d8SRobert Watson * with maxprot later. 305df8bae1dSRodney W. Grimes */ 306a9d2f8d8SRobert Watson rights = CAP_MMAP; 307a9d2f8d8SRobert Watson if (prot & PROT_READ) 308a9d2f8d8SRobert Watson rights |= CAP_READ; 309a9d2f8d8SRobert Watson if ((flags & MAP_SHARED) != 0) { 310a9d2f8d8SRobert Watson if (prot & PROT_WRITE) 311a9d2f8d8SRobert Watson rights |= CAP_WRITE; 312a9d2f8d8SRobert Watson } 313a9d2f8d8SRobert Watson if (prot & PROT_EXEC) 314a9d2f8d8SRobert Watson rights |= CAP_MAPEXEC; 315a9d2f8d8SRobert Watson if ((error = fget_mmap(td, uap->fd, rights, &cap_maxprot, 316a9d2f8d8SRobert Watson &fp)) != 0) 317426da3bcSAlfred Perlstein goto done; 3188e38aeffSJohn Baldwin if (fp->f_type == DTYPE_SHM) { 3198e38aeffSJohn Baldwin handle = fp->f_data; 3208e38aeffSJohn Baldwin handle_type = OBJT_SWAP; 3218e38aeffSJohn Baldwin maxprot = VM_PROT_NONE; 3228e38aeffSJohn Baldwin 3238e38aeffSJohn Baldwin /* FREAD should always be set. */ 3248e38aeffSJohn Baldwin if (fp->f_flag & FREAD) 3258e38aeffSJohn Baldwin maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; 3268e38aeffSJohn Baldwin if (fp->f_flag & FWRITE) 3278e38aeffSJohn Baldwin maxprot |= VM_PROT_WRITE; 3288e38aeffSJohn Baldwin goto map; 3298e38aeffSJohn Baldwin } 330e4ca250dSJohn Baldwin if (fp->f_type != DTYPE_VNODE) { 33189eae00bSTom Rhodes error = ENODEV; 332426da3bcSAlfred Perlstein goto done; 333e4ca250dSJohn Baldwin } 3348e38aeffSJohn Baldwin #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ 3358e38aeffSJohn Baldwin defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) 336279d7226SMatthew Dillon /* 337aa543039SGarrett Wollman * POSIX shared-memory objects are defined to have 338aa543039SGarrett Wollman * kernel persistence, and are not defined to support 339aa543039SGarrett Wollman * read(2)/write(2) -- or even open(2). Thus, we can 340aa543039SGarrett Wollman * use MAP_ASYNC to trade on-disk coherence for speed. 341aa543039SGarrett Wollman * The shm_open(3) library routine turns on the FPOSIXSHM 342aa543039SGarrett Wollman * flag to request this behavior. 343aa543039SGarrett Wollman */ 344aa543039SGarrett Wollman if (fp->f_flag & FPOSIXSHM) 345aa543039SGarrett Wollman flags |= MAP_NOSYNC; 3468e38aeffSJohn Baldwin #endif 3473b6d9652SPoul-Henning Kamp vp = fp->f_vnode; 348c8bdd56bSGuido van Rooij /* 349df8bae1dSRodney W. Grimes * Ensure that file and memory protections are 350df8bae1dSRodney W. Grimes * compatible. Note that we only worry about 351df8bae1dSRodney W. Grimes * writability if mapping is shared; in this case, 352df8bae1dSRodney W. Grimes * current and max prot are dictated by the open file. 353df8bae1dSRodney W. Grimes * XXX use the vnode instead? Problem is: what 3540d94caffSDavid Greenman * credentials do we use for determination? What if 3550d94caffSDavid Greenman * proc does a setuid? 356df8bae1dSRodney W. Grimes */ 3578eec77b0STim J. Robbins if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC) 358b483c7f6SGuido van Rooij maxprot = VM_PROT_NONE; 359b483c7f6SGuido van Rooij else 360b483c7f6SGuido van Rooij maxprot = VM_PROT_EXECUTE; 361279d7226SMatthew Dillon if (fp->f_flag & FREAD) { 362df8bae1dSRodney W. Grimes maxprot |= VM_PROT_READ; 363279d7226SMatthew Dillon } else if (prot & PROT_READ) { 364279d7226SMatthew Dillon error = EACCES; 365279d7226SMatthew Dillon goto done; 366279d7226SMatthew Dillon } 367c8bdd56bSGuido van Rooij /* 368c8bdd56bSGuido van Rooij * If we are sharing potential changes (either via 369c8bdd56bSGuido van Rooij * MAP_SHARED or via the implicit sharing of character 370c8bdd56bSGuido van Rooij * device mappings), and we are trying to get write 371c8bdd56bSGuido van Rooij * permission although we opened it without asking 372c8daea13SAlexander Kabaev * for it, bail out. 373c8bdd56bSGuido van Rooij */ 374ce7a036dSAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 37505feb99fSGuido van Rooij if ((fp->f_flag & FWRITE) != 0) { 376df8bae1dSRodney W. Grimes maxprot |= VM_PROT_WRITE; 377279d7226SMatthew Dillon } else if ((prot & PROT_WRITE) != 0) { 378279d7226SMatthew Dillon error = EACCES; 379279d7226SMatthew Dillon goto done; 380279d7226SMatthew Dillon } 381ce7a036dSAlexander Kabaev } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) { 38205feb99fSGuido van Rooij maxprot |= VM_PROT_WRITE; 383a9d2f8d8SRobert Watson cap_maxprot |= VM_PROT_WRITE; 384279d7226SMatthew Dillon } 385651bb817SAlexander Langer handle = (void *)vp; 38698df9218SJohn Baldwin handle_type = OBJT_VNODE; 38730d4dd7eSAlexander Kabaev } 3888e38aeffSJohn Baldwin map: 38936b90789SKonstantin Belousov td->td_fpop = fp; 390a9d2f8d8SRobert Watson maxprot &= cap_maxprot; 3911f6889a1SMatthew Dillon error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, 39298df9218SJohn Baldwin flags, handle_type, handle, pos); 39336b90789SKonstantin Belousov td->td_fpop = NULL; 39449874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 39549874f6eSJoseph Koshy /* inform hwpmc(4) if an executable is being mapped */ 39649874f6eSJoseph Koshy if (error == 0 && handle_type == OBJT_VNODE && 39749874f6eSJoseph Koshy (prot & PROT_EXEC)) { 39849874f6eSJoseph Koshy pkm.pm_file = handle; 39949874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 40049874f6eSJoseph Koshy PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); 40149874f6eSJoseph Koshy } 40249874f6eSJoseph Koshy #endif 403df8bae1dSRodney W. Grimes if (error == 0) 404b40ce416SJulian Elischer td->td_retval[0] = (register_t) (addr + pageoff); 405279d7226SMatthew Dillon done: 406279d7226SMatthew Dillon if (fp) 407b40ce416SJulian Elischer fdrop(fp, td); 408f6b5b182SJeff Roberson 409df8bae1dSRodney W. Grimes return (error); 410df8bae1dSRodney W. Grimes } 411df8bae1dSRodney W. Grimes 412c2815ad5SPeter Wemm int 413c2815ad5SPeter Wemm freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 414c2815ad5SPeter Wemm { 415c2815ad5SPeter Wemm struct mmap_args oargs; 416c2815ad5SPeter Wemm 417c2815ad5SPeter Wemm oargs.addr = uap->addr; 418c2815ad5SPeter Wemm oargs.len = uap->len; 419c2815ad5SPeter Wemm oargs.prot = uap->prot; 420c2815ad5SPeter Wemm oargs.flags = uap->flags; 421c2815ad5SPeter Wemm oargs.fd = uap->fd; 422c2815ad5SPeter Wemm oargs.pos = uap->pos; 4238451d0ddSKip Macy return (sys_mmap(td, &oargs)); 424c2815ad5SPeter Wemm } 425c2815ad5SPeter Wemm 42605f0fdd2SPoul-Henning Kamp #ifdef COMPAT_43 427d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 42805f0fdd2SPoul-Henning Kamp struct ommap_args { 42905f0fdd2SPoul-Henning Kamp caddr_t addr; 43005f0fdd2SPoul-Henning Kamp int len; 43105f0fdd2SPoul-Henning Kamp int prot; 43205f0fdd2SPoul-Henning Kamp int flags; 43305f0fdd2SPoul-Henning Kamp int fd; 43405f0fdd2SPoul-Henning Kamp long pos; 43505f0fdd2SPoul-Henning Kamp }; 436d2d3e875SBruce Evans #endif 43705f0fdd2SPoul-Henning Kamp int 438b40ce416SJulian Elischer ommap(td, uap) 439b40ce416SJulian Elischer struct thread *td; 44054d92145SMatthew Dillon struct ommap_args *uap; 44105f0fdd2SPoul-Henning Kamp { 44205f0fdd2SPoul-Henning Kamp struct mmap_args nargs; 44305f0fdd2SPoul-Henning Kamp static const char cvtbsdprot[8] = { 44405f0fdd2SPoul-Henning Kamp 0, 44505f0fdd2SPoul-Henning Kamp PROT_EXEC, 44605f0fdd2SPoul-Henning Kamp PROT_WRITE, 44705f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE, 44805f0fdd2SPoul-Henning Kamp PROT_READ, 44905f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_READ, 45005f0fdd2SPoul-Henning Kamp PROT_WRITE | PROT_READ, 45105f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE | PROT_READ, 45205f0fdd2SPoul-Henning Kamp }; 4530d94caffSDavid Greenman 45405f0fdd2SPoul-Henning Kamp #define OMAP_ANON 0x0002 45505f0fdd2SPoul-Henning Kamp #define OMAP_COPY 0x0020 45605f0fdd2SPoul-Henning Kamp #define OMAP_SHARED 0x0010 45705f0fdd2SPoul-Henning Kamp #define OMAP_FIXED 0x0100 45805f0fdd2SPoul-Henning Kamp 45905f0fdd2SPoul-Henning Kamp nargs.addr = uap->addr; 46005f0fdd2SPoul-Henning Kamp nargs.len = uap->len; 46105f0fdd2SPoul-Henning Kamp nargs.prot = cvtbsdprot[uap->prot & 0x7]; 462ee4116b8SKonstantin Belousov #ifdef COMPAT_FREEBSD32 463ee4116b8SKonstantin Belousov #if defined(__amd64__) || defined(__ia64__) 464ee4116b8SKonstantin Belousov if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && 465ee4116b8SKonstantin Belousov nargs.prot != 0) 466ee4116b8SKonstantin Belousov nargs.prot |= PROT_EXEC; 467ee4116b8SKonstantin Belousov #endif 468ee4116b8SKonstantin Belousov #endif 46905f0fdd2SPoul-Henning Kamp nargs.flags = 0; 47005f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_ANON) 47105f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_ANON; 47205f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_COPY) 47305f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_COPY; 47405f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_SHARED) 47505f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_SHARED; 47605f0fdd2SPoul-Henning Kamp else 47705f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_PRIVATE; 47805f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_FIXED) 47905f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_FIXED; 48005f0fdd2SPoul-Henning Kamp nargs.fd = uap->fd; 48105f0fdd2SPoul-Henning Kamp nargs.pos = uap->pos; 4828451d0ddSKip Macy return (sys_mmap(td, &nargs)); 48305f0fdd2SPoul-Henning Kamp } 48405f0fdd2SPoul-Henning Kamp #endif /* COMPAT_43 */ 48505f0fdd2SPoul-Henning Kamp 48605f0fdd2SPoul-Henning Kamp 487d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 488df8bae1dSRodney W. Grimes struct msync_args { 489651bb817SAlexander Langer void *addr; 490c899450bSPeter Wemm size_t len; 491e6c6af11SDavid Greenman int flags; 492df8bae1dSRodney W. Grimes }; 493d2d3e875SBruce Evans #endif 494d2c60af8SMatthew Dillon /* 495d2c60af8SMatthew Dillon * MPSAFE 496d2c60af8SMatthew Dillon */ 497df8bae1dSRodney W. Grimes int 4988451d0ddSKip Macy sys_msync(td, uap) 499b40ce416SJulian Elischer struct thread *td; 500df8bae1dSRodney W. Grimes struct msync_args *uap; 501df8bae1dSRodney W. Grimes { 502df8bae1dSRodney W. Grimes vm_offset_t addr; 503dabee6feSPeter Wemm vm_size_t size, pageoff; 504e6c6af11SDavid Greenman int flags; 505df8bae1dSRodney W. Grimes vm_map_t map; 506df8bae1dSRodney W. Grimes int rv; 507df8bae1dSRodney W. Grimes 508df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 5099154ee6aSPeter Wemm size = uap->len; 510e6c6af11SDavid Greenman flags = uap->flags; 511e6c6af11SDavid Greenman 512dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 513dabee6feSPeter Wemm addr -= pageoff; 514dabee6feSPeter Wemm size += pageoff; 515dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5169154ee6aSPeter Wemm if (addr + size < addr) 517dabee6feSPeter Wemm return (EINVAL); 518dabee6feSPeter Wemm 519dabee6feSPeter Wemm if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 5201e62bc63SDavid Greenman return (EINVAL); 5211e62bc63SDavid Greenman 522b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 5239154ee6aSPeter Wemm 524df8bae1dSRodney W. Grimes /* 525df8bae1dSRodney W. Grimes * Clean the pages and interpret the return value. 526df8bae1dSRodney W. Grimes */ 527950f8459SAlan Cox rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 528e6c6af11SDavid Greenman (flags & MS_INVALIDATE) != 0); 529df8bae1dSRodney W. Grimes switch (rv) { 530df8bae1dSRodney W. Grimes case KERN_SUCCESS: 531d2c60af8SMatthew Dillon return (0); 532df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 533df8bae1dSRodney W. Grimes return (EINVAL); /* Sun returns ENOMEM? */ 534b7b7cd44SAlan Cox case KERN_INVALID_ARGUMENT: 535b7b7cd44SAlan Cox return (EBUSY); 536126d6082SKonstantin Belousov case KERN_FAILURE: 537126d6082SKonstantin Belousov return (EIO); 538df8bae1dSRodney W. Grimes default: 539df8bae1dSRodney W. Grimes return (EINVAL); 540df8bae1dSRodney W. Grimes } 541df8bae1dSRodney W. Grimes } 542df8bae1dSRodney W. Grimes 543d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 544df8bae1dSRodney W. Grimes struct munmap_args { 545651bb817SAlexander Langer void *addr; 5469154ee6aSPeter Wemm size_t len; 547df8bae1dSRodney W. Grimes }; 548d2d3e875SBruce Evans #endif 549d2c60af8SMatthew Dillon /* 550d2c60af8SMatthew Dillon * MPSAFE 551d2c60af8SMatthew Dillon */ 552df8bae1dSRodney W. Grimes int 5538451d0ddSKip Macy sys_munmap(td, uap) 554b40ce416SJulian Elischer struct thread *td; 55554d92145SMatthew Dillon struct munmap_args *uap; 556df8bae1dSRodney W. Grimes { 55749874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 55849874f6eSJoseph Koshy struct pmckern_map_out pkm; 55949874f6eSJoseph Koshy vm_map_entry_t entry; 56049874f6eSJoseph Koshy #endif 561df8bae1dSRodney W. Grimes vm_offset_t addr; 562dabee6feSPeter Wemm vm_size_t size, pageoff; 563df8bae1dSRodney W. Grimes vm_map_t map; 564df8bae1dSRodney W. Grimes 565df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 5669154ee6aSPeter Wemm size = uap->len; 567d8834602SAlan Cox if (size == 0) 568d8834602SAlan Cox return (EINVAL); 569dabee6feSPeter Wemm 570dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 571dabee6feSPeter Wemm addr -= pageoff; 572dabee6feSPeter Wemm size += pageoff; 573dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5749154ee6aSPeter Wemm if (addr + size < addr) 575df8bae1dSRodney W. Grimes return (EINVAL); 5769154ee6aSPeter Wemm 577df8bae1dSRodney W. Grimes /* 57805ba50f5SJake Burkholder * Check for illegal addresses. Watch out for address wrap... 579df8bae1dSRodney W. Grimes */ 580b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 58105ba50f5SJake Burkholder if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 58205ba50f5SJake Burkholder return (EINVAL); 583d8834602SAlan Cox vm_map_lock(map); 58449874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 58549874f6eSJoseph Koshy /* 58649874f6eSJoseph Koshy * Inform hwpmc if the address range being unmapped contains 58749874f6eSJoseph Koshy * an executable region. 58849874f6eSJoseph Koshy */ 5890d419640SRyan Stone pkm.pm_address = (uintptr_t) NULL; 59049874f6eSJoseph Koshy if (vm_map_lookup_entry(map, addr, &entry)) { 59149874f6eSJoseph Koshy for (; 59249874f6eSJoseph Koshy entry != &map->header && entry->start < addr + size; 59349874f6eSJoseph Koshy entry = entry->next) { 59449874f6eSJoseph Koshy if (vm_map_check_protection(map, entry->start, 59549874f6eSJoseph Koshy entry->end, VM_PROT_EXECUTE) == TRUE) { 59649874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 59749874f6eSJoseph Koshy pkm.pm_size = (size_t) size; 59849874f6eSJoseph Koshy break; 59949874f6eSJoseph Koshy } 60049874f6eSJoseph Koshy } 60149874f6eSJoseph Koshy } 60249874f6eSJoseph Koshy #endif 603655c3490SKonstantin Belousov vm_map_delete(map, addr, addr + size); 6040d419640SRyan Stone 6050d419640SRyan Stone #ifdef HWPMC_HOOKS 6060d419640SRyan Stone /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 6070d419640SRyan Stone vm_map_lock_downgrade(map); 608d473d3a1SRyan Stone if (pkm.pm_address != (uintptr_t) NULL) 6090d419640SRyan Stone PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 6100d419640SRyan Stone vm_map_unlock_read(map); 6110d419640SRyan Stone #else 612d8834602SAlan Cox vm_map_unlock(map); 6130d419640SRyan Stone #endif 6140d419640SRyan Stone /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 615df8bae1dSRodney W. Grimes return (0); 616df8bae1dSRodney W. Grimes } 617df8bae1dSRodney W. Grimes 618d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 619df8bae1dSRodney W. Grimes struct mprotect_args { 620651bb817SAlexander Langer const void *addr; 6219154ee6aSPeter Wemm size_t len; 622df8bae1dSRodney W. Grimes int prot; 623df8bae1dSRodney W. Grimes }; 624d2d3e875SBruce Evans #endif 625d2c60af8SMatthew Dillon /* 626d2c60af8SMatthew Dillon * MPSAFE 627d2c60af8SMatthew Dillon */ 628df8bae1dSRodney W. Grimes int 6298451d0ddSKip Macy sys_mprotect(td, uap) 630b40ce416SJulian Elischer struct thread *td; 631df8bae1dSRodney W. Grimes struct mprotect_args *uap; 632df8bae1dSRodney W. Grimes { 633df8bae1dSRodney W. Grimes vm_offset_t addr; 634dabee6feSPeter Wemm vm_size_t size, pageoff; 63554d92145SMatthew Dillon vm_prot_t prot; 636df8bae1dSRodney W. Grimes 637df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 6389154ee6aSPeter Wemm size = uap->len; 639df8bae1dSRodney W. Grimes prot = uap->prot & VM_PROT_ALL; 640df8bae1dSRodney W. Grimes 641dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 642dabee6feSPeter Wemm addr -= pageoff; 643dabee6feSPeter Wemm size += pageoff; 644dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6459154ee6aSPeter Wemm if (addr + size < addr) 646dabee6feSPeter Wemm return (EINVAL); 647dabee6feSPeter Wemm 64843285049SAlan Cox switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 64943285049SAlan Cox addr + size, prot, FALSE)) { 650df8bae1dSRodney W. Grimes case KERN_SUCCESS: 651df8bae1dSRodney W. Grimes return (0); 652df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 653df8bae1dSRodney W. Grimes return (EACCES); 6543364c323SKonstantin Belousov case KERN_RESOURCE_SHORTAGE: 6553364c323SKonstantin Belousov return (ENOMEM); 656df8bae1dSRodney W. Grimes } 657df8bae1dSRodney W. Grimes return (EINVAL); 658df8bae1dSRodney W. Grimes } 659df8bae1dSRodney W. Grimes 660d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 661dabee6feSPeter Wemm struct minherit_args { 662651bb817SAlexander Langer void *addr; 6639154ee6aSPeter Wemm size_t len; 664dabee6feSPeter Wemm int inherit; 665dabee6feSPeter Wemm }; 666dabee6feSPeter Wemm #endif 667d2c60af8SMatthew Dillon /* 668d2c60af8SMatthew Dillon * MPSAFE 669d2c60af8SMatthew Dillon */ 670dabee6feSPeter Wemm int 6718451d0ddSKip Macy sys_minherit(td, uap) 672b40ce416SJulian Elischer struct thread *td; 673dabee6feSPeter Wemm struct minherit_args *uap; 674dabee6feSPeter Wemm { 675dabee6feSPeter Wemm vm_offset_t addr; 676dabee6feSPeter Wemm vm_size_t size, pageoff; 67754d92145SMatthew Dillon vm_inherit_t inherit; 678dabee6feSPeter Wemm 679dabee6feSPeter Wemm addr = (vm_offset_t)uap->addr; 6809154ee6aSPeter Wemm size = uap->len; 681dabee6feSPeter Wemm inherit = uap->inherit; 682dabee6feSPeter Wemm 683dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 684dabee6feSPeter Wemm addr -= pageoff; 685dabee6feSPeter Wemm size += pageoff; 686dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6879154ee6aSPeter Wemm if (addr + size < addr) 688dabee6feSPeter Wemm return (EINVAL); 689dabee6feSPeter Wemm 690e0be79afSAlan Cox switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 691e0be79afSAlan Cox addr + size, inherit)) { 692dabee6feSPeter Wemm case KERN_SUCCESS: 693dabee6feSPeter Wemm return (0); 694dabee6feSPeter Wemm case KERN_PROTECTION_FAILURE: 695dabee6feSPeter Wemm return (EACCES); 696dabee6feSPeter Wemm } 697dabee6feSPeter Wemm return (EINVAL); 698dabee6feSPeter Wemm } 699dabee6feSPeter Wemm 700dabee6feSPeter Wemm #ifndef _SYS_SYSPROTO_H_ 701df8bae1dSRodney W. Grimes struct madvise_args { 702651bb817SAlexander Langer void *addr; 7039154ee6aSPeter Wemm size_t len; 704df8bae1dSRodney W. Grimes int behav; 705df8bae1dSRodney W. Grimes }; 706d2d3e875SBruce Evans #endif 7070d94caffSDavid Greenman 708d2c60af8SMatthew Dillon /* 709d2c60af8SMatthew Dillon * MPSAFE 710d2c60af8SMatthew Dillon */ 711df8bae1dSRodney W. Grimes int 7128451d0ddSKip Macy sys_madvise(td, uap) 713b40ce416SJulian Elischer struct thread *td; 714df8bae1dSRodney W. Grimes struct madvise_args *uap; 715df8bae1dSRodney W. Grimes { 716f35329acSJohn Dyson vm_offset_t start, end; 71705ba50f5SJake Burkholder vm_map_t map; 718f4cf2141SWes Peters struct proc *p; 719f4cf2141SWes Peters int error; 720b4309055SMatthew Dillon 721b4309055SMatthew Dillon /* 722f4cf2141SWes Peters * Check for our special case, advising the swap pager we are 723f4cf2141SWes Peters * "immortal." 724f4cf2141SWes Peters */ 725f4cf2141SWes Peters if (uap->behav == MADV_PROTECT) { 726acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MADV_PROTECT); 72769297bf8SJohn Baldwin if (error == 0) { 728f4cf2141SWes Peters p = td->td_proc; 729f4cf2141SWes Peters PROC_LOCK(p); 730f4cf2141SWes Peters p->p_flag |= P_PROTECTED; 731f4cf2141SWes Peters PROC_UNLOCK(p); 73269297bf8SJohn Baldwin } 733f4cf2141SWes Peters return (error); 734f4cf2141SWes Peters } 735f4cf2141SWes Peters /* 736b4309055SMatthew Dillon * Check for illegal behavior 737b4309055SMatthew Dillon */ 7389730a5daSPaul Saab if (uap->behav < 0 || uap->behav > MADV_CORE) 739b4309055SMatthew Dillon return (EINVAL); 740867a482dSJohn Dyson /* 741867a482dSJohn Dyson * Check for illegal addresses. Watch out for address wrap... Note 742867a482dSJohn Dyson * that VM_*_ADDRESS are not constants due to casts (argh). 743867a482dSJohn Dyson */ 74405ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 74505ba50f5SJake Burkholder if ((vm_offset_t)uap->addr < vm_map_min(map) || 74605ba50f5SJake Burkholder (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) 747867a482dSJohn Dyson return (EINVAL); 748867a482dSJohn Dyson if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 749867a482dSJohn Dyson return (EINVAL); 750867a482dSJohn Dyson 751867a482dSJohn Dyson /* 752867a482dSJohn Dyson * Since this routine is only advisory, we default to conservative 753867a482dSJohn Dyson * behavior. 754867a482dSJohn Dyson */ 755cd6eea25SDavid Greenman start = trunc_page((vm_offset_t) uap->addr); 756cd6eea25SDavid Greenman end = round_page((vm_offset_t) uap->addr + uap->len); 757867a482dSJohn Dyson 75805ba50f5SJake Burkholder if (vm_map_madvise(map, start, end, uap->behav)) 759094f6d26SAlan Cox return (EINVAL); 760094f6d26SAlan Cox return (0); 761df8bae1dSRodney W. Grimes } 762df8bae1dSRodney W. Grimes 763d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 764df8bae1dSRodney W. Grimes struct mincore_args { 765651bb817SAlexander Langer const void *addr; 7669154ee6aSPeter Wemm size_t len; 767df8bae1dSRodney W. Grimes char *vec; 768df8bae1dSRodney W. Grimes }; 769d2d3e875SBruce Evans #endif 7700d94caffSDavid Greenman 771d2c60af8SMatthew Dillon /* 772d2c60af8SMatthew Dillon * MPSAFE 773d2c60af8SMatthew Dillon */ 774df8bae1dSRodney W. Grimes int 7758451d0ddSKip Macy sys_mincore(td, uap) 776b40ce416SJulian Elischer struct thread *td; 777df8bae1dSRodney W. Grimes struct mincore_args *uap; 778df8bae1dSRodney W. Grimes { 779867a482dSJohn Dyson vm_offset_t addr, first_addr; 780867a482dSJohn Dyson vm_offset_t end, cend; 781867a482dSJohn Dyson pmap_t pmap; 782867a482dSJohn Dyson vm_map_t map; 78302c04a2fSJohn Dyson char *vec; 784d2c60af8SMatthew Dillon int error = 0; 785867a482dSJohn Dyson int vecindex, lastvecindex; 78654d92145SMatthew Dillon vm_map_entry_t current; 787867a482dSJohn Dyson vm_map_entry_t entry; 788567e51e1SAlan Cox vm_object_t object; 789567e51e1SAlan Cox vm_paddr_t locked_pa; 790567e51e1SAlan Cox vm_page_t m; 791567e51e1SAlan Cox vm_pindex_t pindex; 792867a482dSJohn Dyson int mincoreinfo; 793dd2622a8SAlan Cox unsigned int timestamp; 794567e51e1SAlan Cox boolean_t locked; 795df8bae1dSRodney W. Grimes 796867a482dSJohn Dyson /* 797867a482dSJohn Dyson * Make sure that the addresses presented are valid for user 798867a482dSJohn Dyson * mode. 799867a482dSJohn Dyson */ 800867a482dSJohn Dyson first_addr = addr = trunc_page((vm_offset_t) uap->addr); 8019154ee6aSPeter Wemm end = addr + (vm_size_t)round_page(uap->len); 80205ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 80305ba50f5SJake Burkholder if (end > vm_map_max(map) || end < addr) 804455dd7d4SKonstantin Belousov return (ENOMEM); 80502c04a2fSJohn Dyson 806867a482dSJohn Dyson /* 807867a482dSJohn Dyson * Address of byte vector 808867a482dSJohn Dyson */ 80902c04a2fSJohn Dyson vec = uap->vec; 810867a482dSJohn Dyson 811b40ce416SJulian Elischer pmap = vmspace_pmap(td->td_proc->p_vmspace); 812867a482dSJohn Dyson 813eff50fcdSAlan Cox vm_map_lock_read(map); 814dd2622a8SAlan Cox RestartScan: 815dd2622a8SAlan Cox timestamp = map->timestamp; 816867a482dSJohn Dyson 817455dd7d4SKonstantin Belousov if (!vm_map_lookup_entry(map, addr, &entry)) { 818455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 819455dd7d4SKonstantin Belousov return (ENOMEM); 820455dd7d4SKonstantin Belousov } 821867a482dSJohn Dyson 822867a482dSJohn Dyson /* 823867a482dSJohn Dyson * Do this on a map entry basis so that if the pages are not 824867a482dSJohn Dyson * in the current processes address space, we can easily look 825867a482dSJohn Dyson * up the pages elsewhere. 826867a482dSJohn Dyson */ 827867a482dSJohn Dyson lastvecindex = -1; 828867a482dSJohn Dyson for (current = entry; 829867a482dSJohn Dyson (current != &map->header) && (current->start < end); 830867a482dSJohn Dyson current = current->next) { 831867a482dSJohn Dyson 832867a482dSJohn Dyson /* 833455dd7d4SKonstantin Belousov * check for contiguity 834455dd7d4SKonstantin Belousov */ 835455dd7d4SKonstantin Belousov if (current->end < end && 836455dd7d4SKonstantin Belousov (entry->next == &map->header || 837455dd7d4SKonstantin Belousov current->next->start > current->end)) { 838455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 839455dd7d4SKonstantin Belousov return (ENOMEM); 840455dd7d4SKonstantin Belousov } 841455dd7d4SKonstantin Belousov 842455dd7d4SKonstantin Belousov /* 843867a482dSJohn Dyson * ignore submaps (for now) or null objects 844867a482dSJohn Dyson */ 8459fdfe602SMatthew Dillon if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 846867a482dSJohn Dyson current->object.vm_object == NULL) 847867a482dSJohn Dyson continue; 848867a482dSJohn Dyson 849867a482dSJohn Dyson /* 850867a482dSJohn Dyson * limit this scan to the current map entry and the 851867a482dSJohn Dyson * limits for the mincore call 852867a482dSJohn Dyson */ 853867a482dSJohn Dyson if (addr < current->start) 854867a482dSJohn Dyson addr = current->start; 855867a482dSJohn Dyson cend = current->end; 856867a482dSJohn Dyson if (cend > end) 857867a482dSJohn Dyson cend = end; 858867a482dSJohn Dyson 859867a482dSJohn Dyson /* 860867a482dSJohn Dyson * scan this entry one page at a time 861867a482dSJohn Dyson */ 862867a482dSJohn Dyson while (addr < cend) { 863867a482dSJohn Dyson /* 864867a482dSJohn Dyson * Check pmap first, it is likely faster, also 865867a482dSJohn Dyson * it can provide info as to whether we are the 866867a482dSJohn Dyson * one referencing or modifying the page. 867867a482dSJohn Dyson */ 868567e51e1SAlan Cox object = NULL; 869567e51e1SAlan Cox locked_pa = 0; 870567e51e1SAlan Cox retry: 871567e51e1SAlan Cox m = NULL; 872567e51e1SAlan Cox mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 873567e51e1SAlan Cox if (locked_pa != 0) { 874867a482dSJohn Dyson /* 875567e51e1SAlan Cox * The page is mapped by this process but not 876567e51e1SAlan Cox * both accessed and modified. It is also 877567e51e1SAlan Cox * managed. Acquire the object lock so that 878567e51e1SAlan Cox * other mappings might be examined. 879867a482dSJohn Dyson */ 880567e51e1SAlan Cox m = PHYS_TO_VM_PAGE(locked_pa); 881567e51e1SAlan Cox if (m->object != object) { 882567e51e1SAlan Cox if (object != NULL) 883567e51e1SAlan Cox VM_OBJECT_UNLOCK(object); 884567e51e1SAlan Cox object = m->object; 885567e51e1SAlan Cox locked = VM_OBJECT_TRYLOCK(object); 886567e51e1SAlan Cox vm_page_unlock(m); 887567e51e1SAlan Cox if (!locked) { 888567e51e1SAlan Cox VM_OBJECT_LOCK(object); 8892965a453SKip Macy vm_page_lock(m); 890567e51e1SAlan Cox goto retry; 891567e51e1SAlan Cox } 892567e51e1SAlan Cox } else 893567e51e1SAlan Cox vm_page_unlock(m); 894567e51e1SAlan Cox KASSERT(m->valid == VM_PAGE_BITS_ALL, 895567e51e1SAlan Cox ("mincore: page %p is mapped but invalid", 896567e51e1SAlan Cox m)); 897567e51e1SAlan Cox } else if (mincoreinfo == 0) { 898567e51e1SAlan Cox /* 899567e51e1SAlan Cox * The page is not mapped by this process. If 900567e51e1SAlan Cox * the object implements managed pages, then 901567e51e1SAlan Cox * determine if the page is resident so that 902567e51e1SAlan Cox * the mappings might be examined. 903567e51e1SAlan Cox */ 904567e51e1SAlan Cox if (current->object.vm_object != object) { 905567e51e1SAlan Cox if (object != NULL) 906567e51e1SAlan Cox VM_OBJECT_UNLOCK(object); 907567e51e1SAlan Cox object = current->object.vm_object; 908567e51e1SAlan Cox VM_OBJECT_LOCK(object); 909567e51e1SAlan Cox } 910567e51e1SAlan Cox if (object->type == OBJT_DEFAULT || 911567e51e1SAlan Cox object->type == OBJT_SWAP || 912567e51e1SAlan Cox object->type == OBJT_VNODE) { 913567e51e1SAlan Cox pindex = OFF_TO_IDX(current->offset + 914567e51e1SAlan Cox (addr - current->start)); 915567e51e1SAlan Cox m = vm_page_lookup(object, pindex); 9161c8279e4SAlan Cox if (m == NULL && 9171c8279e4SAlan Cox vm_page_is_cached(object, pindex)) 9181c8279e4SAlan Cox mincoreinfo = MINCORE_INCORE; 919567e51e1SAlan Cox if (m != NULL && m->valid == 0) 920567e51e1SAlan Cox m = NULL; 921567e51e1SAlan Cox if (m != NULL) 922567e51e1SAlan Cox mincoreinfo = MINCORE_INCORE; 923567e51e1SAlan Cox } 924567e51e1SAlan Cox } 925567e51e1SAlan Cox if (m != NULL) { 926567e51e1SAlan Cox /* Examine other mappings to the page. */ 927567e51e1SAlan Cox if (m->dirty == 0 && pmap_is_modified(m)) 928567e51e1SAlan Cox vm_page_dirty(m); 929567e51e1SAlan Cox if (m->dirty != 0) 930867a482dSJohn Dyson mincoreinfo |= MINCORE_MODIFIED_OTHER; 931c46b90e9SAlan Cox /* 9323407fefeSKonstantin Belousov * The first test for PGA_REFERENCED is an 933c46b90e9SAlan Cox * optimization. The second test is 934c46b90e9SAlan Cox * required because a concurrent pmap 935c46b90e9SAlan Cox * operation could clear the last reference 9363407fefeSKonstantin Belousov * and set PGA_REFERENCED before the call to 937c46b90e9SAlan Cox * pmap_is_referenced(). 938c46b90e9SAlan Cox */ 9393407fefeSKonstantin Belousov if ((m->aflags & PGA_REFERENCED) != 0 || 940c46b90e9SAlan Cox pmap_is_referenced(m) || 9413407fefeSKonstantin Belousov (m->aflags & PGA_REFERENCED) != 0) 942867a482dSJohn Dyson mincoreinfo |= MINCORE_REFERENCED_OTHER; 9439b5a5d81SJohn Dyson } 944567e51e1SAlan Cox if (object != NULL) 945567e51e1SAlan Cox VM_OBJECT_UNLOCK(object); 946867a482dSJohn Dyson 947867a482dSJohn Dyson /* 948dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 949dd2622a8SAlan Cox * the map, we release the lock. 950dd2622a8SAlan Cox */ 951dd2622a8SAlan Cox vm_map_unlock_read(map); 952dd2622a8SAlan Cox 953dd2622a8SAlan Cox /* 954867a482dSJohn Dyson * calculate index into user supplied byte vector 955867a482dSJohn Dyson */ 956867a482dSJohn Dyson vecindex = OFF_TO_IDX(addr - first_addr); 957867a482dSJohn Dyson 958867a482dSJohn Dyson /* 959867a482dSJohn Dyson * If we have skipped map entries, we need to make sure that 960867a482dSJohn Dyson * the byte vector is zeroed for those skipped entries. 961867a482dSJohn Dyson */ 962867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 963867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 964867a482dSJohn Dyson if (error) { 965d2c60af8SMatthew Dillon error = EFAULT; 966d2c60af8SMatthew Dillon goto done2; 967867a482dSJohn Dyson } 968867a482dSJohn Dyson ++lastvecindex; 969867a482dSJohn Dyson } 970867a482dSJohn Dyson 971867a482dSJohn Dyson /* 972867a482dSJohn Dyson * Pass the page information to the user 973867a482dSJohn Dyson */ 974867a482dSJohn Dyson error = subyte(vec + vecindex, mincoreinfo); 975867a482dSJohn Dyson if (error) { 976d2c60af8SMatthew Dillon error = EFAULT; 977d2c60af8SMatthew Dillon goto done2; 978867a482dSJohn Dyson } 979dd2622a8SAlan Cox 980dd2622a8SAlan Cox /* 981dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 982dd2622a8SAlan Cox * output may be invalid. 983dd2622a8SAlan Cox */ 984dd2622a8SAlan Cox vm_map_lock_read(map); 985dd2622a8SAlan Cox if (timestamp != map->timestamp) 986dd2622a8SAlan Cox goto RestartScan; 987dd2622a8SAlan Cox 988867a482dSJohn Dyson lastvecindex = vecindex; 98902c04a2fSJohn Dyson addr += PAGE_SIZE; 99002c04a2fSJohn Dyson } 991867a482dSJohn Dyson } 992867a482dSJohn Dyson 993867a482dSJohn Dyson /* 994dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 995dd2622a8SAlan Cox * the map, we release the lock. 996dd2622a8SAlan Cox */ 997dd2622a8SAlan Cox vm_map_unlock_read(map); 998dd2622a8SAlan Cox 999dd2622a8SAlan Cox /* 1000867a482dSJohn Dyson * Zero the last entries in the byte vector. 1001867a482dSJohn Dyson */ 1002867a482dSJohn Dyson vecindex = OFF_TO_IDX(end - first_addr); 1003867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 1004867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 1005867a482dSJohn Dyson if (error) { 1006d2c60af8SMatthew Dillon error = EFAULT; 1007d2c60af8SMatthew Dillon goto done2; 1008867a482dSJohn Dyson } 1009867a482dSJohn Dyson ++lastvecindex; 1010867a482dSJohn Dyson } 1011867a482dSJohn Dyson 1012dd2622a8SAlan Cox /* 1013dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 1014dd2622a8SAlan Cox * output may be invalid. 1015dd2622a8SAlan Cox */ 1016dd2622a8SAlan Cox vm_map_lock_read(map); 1017dd2622a8SAlan Cox if (timestamp != map->timestamp) 1018dd2622a8SAlan Cox goto RestartScan; 1019eff50fcdSAlan Cox vm_map_unlock_read(map); 1020d2c60af8SMatthew Dillon done2: 1021d2c60af8SMatthew Dillon return (error); 1022df8bae1dSRodney W. Grimes } 1023df8bae1dSRodney W. Grimes 1024d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 1025df8bae1dSRodney W. Grimes struct mlock_args { 1026651bb817SAlexander Langer const void *addr; 1027df8bae1dSRodney W. Grimes size_t len; 1028df8bae1dSRodney W. Grimes }; 1029d2d3e875SBruce Evans #endif 1030d2c60af8SMatthew Dillon /* 1031d2c60af8SMatthew Dillon * MPSAFE 1032d2c60af8SMatthew Dillon */ 1033df8bae1dSRodney W. Grimes int 10348451d0ddSKip Macy sys_mlock(td, uap) 1035b40ce416SJulian Elischer struct thread *td; 1036df8bae1dSRodney W. Grimes struct mlock_args *uap; 1037df8bae1dSRodney W. Grimes { 1038f0ea4612SDon Lewis struct proc *proc; 1039bb734798SDon Lewis vm_offset_t addr, end, last, start; 1040bb734798SDon Lewis vm_size_t npages, size; 10411ba5ad42SEdward Tomasz Napierala unsigned long nsize; 1042bb734798SDon Lewis int error; 1043df8bae1dSRodney W. Grimes 1044acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MLOCK); 104547934cefSDon Lewis if (error) 104647934cefSDon Lewis return (error); 104716929939SDon Lewis addr = (vm_offset_t)uap->addr; 104816929939SDon Lewis size = uap->len; 1049bb734798SDon Lewis last = addr + size; 105016929939SDon Lewis start = trunc_page(addr); 1051bb734798SDon Lewis end = round_page(last); 1052bb734798SDon Lewis if (last < addr || end < addr) 1053df8bae1dSRodney W. Grimes return (EINVAL); 105416929939SDon Lewis npages = atop(end - start); 105516929939SDon Lewis if (npages > vm_page_max_wired) 105616929939SDon Lewis return (ENOMEM); 1057f0ea4612SDon Lewis proc = td->td_proc; 105847934cefSDon Lewis PROC_LOCK(proc); 1059c4e357e8SAndrey Zonov nsize = ptoa(npages + vmspace_wired_count(proc->p_vmspace)); 10601ba5ad42SEdward Tomasz Napierala if (nsize > lim_cur(proc, RLIMIT_MEMLOCK)) { 106147934cefSDon Lewis PROC_UNLOCK(proc); 10624a40e3d4SJohn Dyson return (ENOMEM); 106391d5354aSJohn Baldwin } 106447934cefSDon Lewis PROC_UNLOCK(proc); 10652feb50bfSAttilio Rao if (npages + cnt.v_wire_count > vm_page_max_wired) 106616929939SDon Lewis return (EAGAIN); 1067afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10681ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 10691ba5ad42SEdward Tomasz Napierala error = racct_set(proc, RACCT_MEMLOCK, nsize); 10701ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 10711ba5ad42SEdward Tomasz Napierala if (error != 0) 10721ba5ad42SEdward Tomasz Napierala return (ENOMEM); 1073afcc55f3SEdward Tomasz Napierala #endif 107416929939SDon Lewis error = vm_map_wire(&proc->p_vmspace->vm_map, start, end, 107516929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1076afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10771ba5ad42SEdward Tomasz Napierala if (error != KERN_SUCCESS) { 10781ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 10791ba5ad42SEdward Tomasz Napierala racct_set(proc, RACCT_MEMLOCK, 1080c4e357e8SAndrey Zonov ptoa(vmspace_wired_count(proc->p_vmspace))); 10811ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 10821ba5ad42SEdward Tomasz Napierala } 1083afcc55f3SEdward Tomasz Napierala #endif 1084df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1085df8bae1dSRodney W. Grimes } 1086df8bae1dSRodney W. Grimes 1087d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 10884a40e3d4SJohn Dyson struct mlockall_args { 10894a40e3d4SJohn Dyson int how; 10904a40e3d4SJohn Dyson }; 10914a40e3d4SJohn Dyson #endif 10924a40e3d4SJohn Dyson 1093d2c60af8SMatthew Dillon /* 1094d2c60af8SMatthew Dillon * MPSAFE 1095d2c60af8SMatthew Dillon */ 10964a40e3d4SJohn Dyson int 10978451d0ddSKip Macy sys_mlockall(td, uap) 1098b40ce416SJulian Elischer struct thread *td; 10994a40e3d4SJohn Dyson struct mlockall_args *uap; 11004a40e3d4SJohn Dyson { 1101abd498aaSBruce M Simpson vm_map_t map; 1102abd498aaSBruce M Simpson int error; 1103abd498aaSBruce M Simpson 1104abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 1105*7e19eda4SAndrey Zonov error = priv_check(td, PRIV_VM_MLOCK); 1106*7e19eda4SAndrey Zonov if (error) 1107*7e19eda4SAndrey Zonov return (error); 1108abd498aaSBruce M Simpson 1109abd498aaSBruce M Simpson if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1110abd498aaSBruce M Simpson return (EINVAL); 1111abd498aaSBruce M Simpson 1112abd498aaSBruce M Simpson /* 1113abd498aaSBruce M Simpson * If wiring all pages in the process would cause it to exceed 1114abd498aaSBruce M Simpson * a hard resource limit, return ENOMEM. 1115abd498aaSBruce M Simpson */ 1116*7e19eda4SAndrey Zonov if (!old_mlock && uap->how & MCL_CURRENT) { 111791d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 1118fd6f4ffbSEdward Tomasz Napierala if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { 111991d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1120abd498aaSBruce M Simpson return (ENOMEM); 112191d5354aSJohn Baldwin } 112291d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1123*7e19eda4SAndrey Zonov } 1124afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11251ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11261ba5ad42SEdward Tomasz Napierala error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 11271ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11281ba5ad42SEdward Tomasz Napierala if (error != 0) 11291ba5ad42SEdward Tomasz Napierala return (ENOMEM); 1130afcc55f3SEdward Tomasz Napierala #endif 1131abd498aaSBruce M Simpson 1132abd498aaSBruce M Simpson if (uap->how & MCL_FUTURE) { 1133abd498aaSBruce M Simpson vm_map_lock(map); 1134abd498aaSBruce M Simpson vm_map_modflags(map, MAP_WIREFUTURE, 0); 1135abd498aaSBruce M Simpson vm_map_unlock(map); 1136abd498aaSBruce M Simpson error = 0; 1137abd498aaSBruce M Simpson } 1138abd498aaSBruce M Simpson 1139abd498aaSBruce M Simpson if (uap->how & MCL_CURRENT) { 1140abd498aaSBruce M Simpson /* 1141abd498aaSBruce M Simpson * P1003.1-2001 mandates that all currently mapped pages 1142abd498aaSBruce M Simpson * will be memory resident and locked (wired) upon return 1143abd498aaSBruce M Simpson * from mlockall(). vm_map_wire() will wire pages, by 1144abd498aaSBruce M Simpson * calling vm_fault_wire() for each page in the region. 1145abd498aaSBruce M Simpson */ 1146abd498aaSBruce M Simpson error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1147abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1148abd498aaSBruce M Simpson error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1149abd498aaSBruce M Simpson } 1150afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11511ba5ad42SEdward Tomasz Napierala if (error != KERN_SUCCESS) { 11521ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11531ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 1154c4e357e8SAndrey Zonov ptoa(vmspace_wired_count(td->td_proc->p_vmspace))); 11551ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11561ba5ad42SEdward Tomasz Napierala } 1157afcc55f3SEdward Tomasz Napierala #endif 1158abd498aaSBruce M Simpson 1159abd498aaSBruce M Simpson return (error); 11604a40e3d4SJohn Dyson } 11614a40e3d4SJohn Dyson 11624a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1163fa721254SAlfred Perlstein struct munlockall_args { 1164abd498aaSBruce M Simpson register_t dummy; 11654a40e3d4SJohn Dyson }; 11664a40e3d4SJohn Dyson #endif 11674a40e3d4SJohn Dyson 1168d2c60af8SMatthew Dillon /* 1169d2c60af8SMatthew Dillon * MPSAFE 1170d2c60af8SMatthew Dillon */ 11714a40e3d4SJohn Dyson int 11728451d0ddSKip Macy sys_munlockall(td, uap) 1173b40ce416SJulian Elischer struct thread *td; 11744a40e3d4SJohn Dyson struct munlockall_args *uap; 11754a40e3d4SJohn Dyson { 1176abd498aaSBruce M Simpson vm_map_t map; 1177abd498aaSBruce M Simpson int error; 1178abd498aaSBruce M Simpson 1179abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 1180acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 1181abd498aaSBruce M Simpson if (error) 1182abd498aaSBruce M Simpson return (error); 1183abd498aaSBruce M Simpson 1184abd498aaSBruce M Simpson /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1185abd498aaSBruce M Simpson vm_map_lock(map); 1186abd498aaSBruce M Simpson vm_map_modflags(map, 0, MAP_WIREFUTURE); 1187abd498aaSBruce M Simpson vm_map_unlock(map); 1188abd498aaSBruce M Simpson 1189abd498aaSBruce M Simpson /* Forcibly unwire all pages. */ 1190abd498aaSBruce M Simpson error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1191abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1192afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11931ba5ad42SEdward Tomasz Napierala if (error == KERN_SUCCESS) { 11941ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11951ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 0); 11961ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11971ba5ad42SEdward Tomasz Napierala } 1198afcc55f3SEdward Tomasz Napierala #endif 1199abd498aaSBruce M Simpson 1200abd498aaSBruce M Simpson return (error); 12014a40e3d4SJohn Dyson } 12024a40e3d4SJohn Dyson 12034a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1204df8bae1dSRodney W. Grimes struct munlock_args { 1205651bb817SAlexander Langer const void *addr; 1206df8bae1dSRodney W. Grimes size_t len; 1207df8bae1dSRodney W. Grimes }; 1208d2d3e875SBruce Evans #endif 1209d2c60af8SMatthew Dillon /* 1210d2c60af8SMatthew Dillon * MPSAFE 1211d2c60af8SMatthew Dillon */ 1212df8bae1dSRodney W. Grimes int 12138451d0ddSKip Macy sys_munlock(td, uap) 1214b40ce416SJulian Elischer struct thread *td; 1215df8bae1dSRodney W. Grimes struct munlock_args *uap; 1216df8bae1dSRodney W. Grimes { 1217bb734798SDon Lewis vm_offset_t addr, end, last, start; 121816929939SDon Lewis vm_size_t size; 1219df8bae1dSRodney W. Grimes int error; 1220df8bae1dSRodney W. Grimes 1221acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 122247934cefSDon Lewis if (error) 122347934cefSDon Lewis return (error); 122416929939SDon Lewis addr = (vm_offset_t)uap->addr; 122516929939SDon Lewis size = uap->len; 1226bb734798SDon Lewis last = addr + size; 122716929939SDon Lewis start = trunc_page(addr); 1228bb734798SDon Lewis end = round_page(last); 1229bb734798SDon Lewis if (last < addr || end < addr) 1230df8bae1dSRodney W. Grimes return (EINVAL); 123116929939SDon Lewis error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 123216929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1233afcc55f3SEdward Tomasz Napierala #ifdef RACCT 12341ba5ad42SEdward Tomasz Napierala if (error == KERN_SUCCESS) { 12351ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 12361ba5ad42SEdward Tomasz Napierala racct_sub(td->td_proc, RACCT_MEMLOCK, ptoa(end - start)); 12371ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 12381ba5ad42SEdward Tomasz Napierala } 1239afcc55f3SEdward Tomasz Napierala #endif 1240df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1241df8bae1dSRodney W. Grimes } 1242df8bae1dSRodney W. Grimes 1243df8bae1dSRodney W. Grimes /* 1244c8daea13SAlexander Kabaev * vm_mmap_vnode() 1245c8daea13SAlexander Kabaev * 1246c8daea13SAlexander Kabaev * Helper function for vm_mmap. Perform sanity check specific for mmap 1247c8daea13SAlexander Kabaev * operations on vnodes. 124884110e7eSKonstantin Belousov * 124984110e7eSKonstantin Belousov * For VCHR vnodes, the vnode lock is held over the call to 125084110e7eSKonstantin Belousov * vm_mmap_cdev() to keep vp->v_rdev valid. 1251c8daea13SAlexander Kabaev */ 1252c8daea13SAlexander Kabaev int 1253c8daea13SAlexander Kabaev vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1254c8daea13SAlexander Kabaev vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 125584110e7eSKonstantin Belousov struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 125684110e7eSKonstantin Belousov boolean_t *writecounted) 1257c8daea13SAlexander Kabaev { 1258c8daea13SAlexander Kabaev struct vattr va; 1259c8daea13SAlexander Kabaev vm_object_t obj; 126064345f0bSJohn Baldwin vm_offset_t foff; 1261ae51ff11SJeff Roberson struct mount *mp; 12620359a12eSAttilio Rao struct ucred *cred; 12635050aa86SKonstantin Belousov int error, flags, locktype; 1264c8daea13SAlexander Kabaev 1265ae51ff11SJeff Roberson mp = vp->v_mount; 12660359a12eSAttilio Rao cred = td->td_ucred; 126784110e7eSKonstantin Belousov if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) 126884110e7eSKonstantin Belousov locktype = LK_EXCLUSIVE; 126984110e7eSKonstantin Belousov else 127084110e7eSKonstantin Belousov locktype = LK_SHARED; 12715050aa86SKonstantin Belousov if ((error = vget(vp, locktype, td)) != 0) 1272c8daea13SAlexander Kabaev return (error); 127364345f0bSJohn Baldwin foff = *foffp; 1274c8daea13SAlexander Kabaev flags = *flagsp; 12758516dd18SPoul-Henning Kamp obj = vp->v_object; 1276c8daea13SAlexander Kabaev if (vp->v_type == VREG) { 1277c8daea13SAlexander Kabaev /* 1278c8daea13SAlexander Kabaev * Get the proper underlying object 1279c8daea13SAlexander Kabaev */ 12808516dd18SPoul-Henning Kamp if (obj == NULL) { 1281c8daea13SAlexander Kabaev error = EINVAL; 1282c8daea13SAlexander Kabaev goto done; 1283c8daea13SAlexander Kabaev } 1284c8daea13SAlexander Kabaev if (obj->handle != vp) { 1285c8daea13SAlexander Kabaev vput(vp); 1286c8daea13SAlexander Kabaev vp = (struct vnode *)obj->handle; 128784110e7eSKonstantin Belousov /* 128884110e7eSKonstantin Belousov * Bypass filesystems obey the mpsafety of the 128984110e7eSKonstantin Belousov * underlying fs. 129084110e7eSKonstantin Belousov */ 129184110e7eSKonstantin Belousov error = vget(vp, locktype, td); 12925050aa86SKonstantin Belousov if (error != 0) 129384110e7eSKonstantin Belousov return (error); 129484110e7eSKonstantin Belousov } 129584110e7eSKonstantin Belousov if (locktype == LK_EXCLUSIVE) { 129684110e7eSKonstantin Belousov *writecounted = TRUE; 129784110e7eSKonstantin Belousov vnode_pager_update_writecount(obj, 0, objsize); 129884110e7eSKonstantin Belousov } 1299c8daea13SAlexander Kabaev } else if (vp->v_type == VCHR) { 130064345f0bSJohn Baldwin error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp, 130164345f0bSJohn Baldwin vp->v_rdev, foffp, objp); 130264345f0bSJohn Baldwin if (error == 0) 130364345f0bSJohn Baldwin goto mark_atime; 130491a35e78SKonstantin Belousov goto done; 1305c8daea13SAlexander Kabaev } else { 1306c8daea13SAlexander Kabaev error = EINVAL; 1307c8daea13SAlexander Kabaev goto done; 1308c8daea13SAlexander Kabaev } 13090359a12eSAttilio Rao if ((error = VOP_GETATTR(vp, &va, cred))) 1310c8daea13SAlexander Kabaev goto done; 1311c92163dcSChristian S.J. Peron #ifdef MAC 13120359a12eSAttilio Rao error = mac_vnode_check_mmap(cred, vp, prot, flags); 1313c92163dcSChristian S.J. Peron if (error != 0) 1314c92163dcSChristian S.J. Peron goto done; 1315c92163dcSChristian S.J. Peron #endif 1316c8daea13SAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 1317c8daea13SAlexander Kabaev if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1318c8daea13SAlexander Kabaev if (prot & PROT_WRITE) { 1319c8daea13SAlexander Kabaev error = EPERM; 1320c8daea13SAlexander Kabaev goto done; 1321c8daea13SAlexander Kabaev } 1322c8daea13SAlexander Kabaev *maxprotp &= ~VM_PROT_WRITE; 1323c8daea13SAlexander Kabaev } 1324c8daea13SAlexander Kabaev } 1325c8daea13SAlexander Kabaev /* 1326c8daea13SAlexander Kabaev * If it is a regular file without any references 1327c8daea13SAlexander Kabaev * we do not need to sync it. 1328c8daea13SAlexander Kabaev * Adjust object size to be the size of actual file. 1329c8daea13SAlexander Kabaev */ 1330c8daea13SAlexander Kabaev objsize = round_page(va.va_size); 1331c8daea13SAlexander Kabaev if (va.va_nlink == 0) 1332c8daea13SAlexander Kabaev flags |= MAP_NOSYNC; 133384110e7eSKonstantin Belousov obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, cred); 1334c8daea13SAlexander Kabaev if (obj == NULL) { 133564345f0bSJohn Baldwin error = ENOMEM; 1336c8daea13SAlexander Kabaev goto done; 1337c8daea13SAlexander Kabaev } 1338c8daea13SAlexander Kabaev *objp = obj; 1339c8daea13SAlexander Kabaev *flagsp = flags; 134064345f0bSJohn Baldwin 134164345f0bSJohn Baldwin mark_atime: 13420359a12eSAttilio Rao vfs_mark_atime(vp, cred); 13431e309003SDiomidis Spinellis 1344c8daea13SAlexander Kabaev done: 1345c8daea13SAlexander Kabaev vput(vp); 1346c8daea13SAlexander Kabaev return (error); 1347c8daea13SAlexander Kabaev } 1348c8daea13SAlexander Kabaev 1349c8daea13SAlexander Kabaev /* 135098df9218SJohn Baldwin * vm_mmap_cdev() 135198df9218SJohn Baldwin * 135298df9218SJohn Baldwin * MPSAFE 135398df9218SJohn Baldwin * 135498df9218SJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 135598df9218SJohn Baldwin * operations on cdevs. 135698df9218SJohn Baldwin */ 135798df9218SJohn Baldwin int 135898df9218SJohn Baldwin vm_mmap_cdev(struct thread *td, vm_size_t objsize, 135998df9218SJohn Baldwin vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 136064345f0bSJohn Baldwin struct cdev *cdev, vm_ooffset_t *foff, vm_object_t *objp) 136198df9218SJohn Baldwin { 136298df9218SJohn Baldwin vm_object_t obj; 136391a35e78SKonstantin Belousov struct cdevsw *dsw; 13643979450bSKonstantin Belousov int error, flags, ref; 136598df9218SJohn Baldwin 136698df9218SJohn Baldwin flags = *flagsp; 136798df9218SJohn Baldwin 13683979450bSKonstantin Belousov dsw = dev_refthread(cdev, &ref); 136991a35e78SKonstantin Belousov if (dsw == NULL) 137091a35e78SKonstantin Belousov return (ENXIO); 137191a35e78SKonstantin Belousov if (dsw->d_flags & D_MMAP_ANON) { 13723979450bSKonstantin Belousov dev_relthread(cdev, ref); 137398df9218SJohn Baldwin *maxprotp = VM_PROT_ALL; 137498df9218SJohn Baldwin *flagsp |= MAP_ANON; 137598df9218SJohn Baldwin return (0); 137698df9218SJohn Baldwin } 137798df9218SJohn Baldwin /* 137864345f0bSJohn Baldwin * cdevs do not provide private mappings of any kind. 137998df9218SJohn Baldwin */ 138098df9218SJohn Baldwin if ((*maxprotp & VM_PROT_WRITE) == 0 && 138164345f0bSJohn Baldwin (prot & PROT_WRITE) != 0) { 13823979450bSKonstantin Belousov dev_relthread(cdev, ref); 138398df9218SJohn Baldwin return (EACCES); 138464345f0bSJohn Baldwin } 138564345f0bSJohn Baldwin if (flags & (MAP_PRIVATE|MAP_COPY)) { 13863979450bSKonstantin Belousov dev_relthread(cdev, ref); 138798df9218SJohn Baldwin return (EINVAL); 138864345f0bSJohn Baldwin } 138998df9218SJohn Baldwin /* 139098df9218SJohn Baldwin * Force device mappings to be shared. 139198df9218SJohn Baldwin */ 139298df9218SJohn Baldwin flags |= MAP_SHARED; 139398df9218SJohn Baldwin #ifdef MAC_XXX 139464345f0bSJohn Baldwin error = mac_cdev_check_mmap(td->td_ucred, cdev, prot); 139564345f0bSJohn Baldwin if (error != 0) { 13963979450bSKonstantin Belousov dev_relthread(cdev, ref); 139798df9218SJohn Baldwin return (error); 139864345f0bSJohn Baldwin } 139998df9218SJohn Baldwin #endif 140064345f0bSJohn Baldwin /* 140164345f0bSJohn Baldwin * First, try d_mmap_single(). If that is not implemented 140264345f0bSJohn Baldwin * (returns ENODEV), fall back to using the device pager. 140364345f0bSJohn Baldwin * Note that d_mmap_single() must return a reference to the 140464345f0bSJohn Baldwin * object (it needs to bump the reference count of the object 140564345f0bSJohn Baldwin * it returns somehow). 140664345f0bSJohn Baldwin * 140764345f0bSJohn Baldwin * XXX assumes VM_PROT_* == PROT_* 140864345f0bSJohn Baldwin */ 140964345f0bSJohn Baldwin error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 14103979450bSKonstantin Belousov dev_relthread(cdev, ref); 141164345f0bSJohn Baldwin if (error != ENODEV) 141264345f0bSJohn Baldwin return (error); 14133364c323SKonstantin Belousov obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 14143364c323SKonstantin Belousov td->td_ucred); 141598df9218SJohn Baldwin if (obj == NULL) 141698df9218SJohn Baldwin return (EINVAL); 141798df9218SJohn Baldwin *objp = obj; 141898df9218SJohn Baldwin *flagsp = flags; 141998df9218SJohn Baldwin return (0); 142098df9218SJohn Baldwin } 142198df9218SJohn Baldwin 142298df9218SJohn Baldwin /* 14238e38aeffSJohn Baldwin * vm_mmap_shm() 14248e38aeffSJohn Baldwin * 14258e38aeffSJohn Baldwin * MPSAFE 14268e38aeffSJohn Baldwin * 14278e38aeffSJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 14288e38aeffSJohn Baldwin * operations on shm file descriptors. 14298e38aeffSJohn Baldwin */ 14308e38aeffSJohn Baldwin int 14318e38aeffSJohn Baldwin vm_mmap_shm(struct thread *td, vm_size_t objsize, 14328e38aeffSJohn Baldwin vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 14338e38aeffSJohn Baldwin struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp) 14348e38aeffSJohn Baldwin { 14358e38aeffSJohn Baldwin int error; 14368e38aeffSJohn Baldwin 1437da048309SAlan Cox if ((*flagsp & MAP_SHARED) != 0 && 1438da048309SAlan Cox (*maxprotp & VM_PROT_WRITE) == 0 && 14398e38aeffSJohn Baldwin (prot & PROT_WRITE) != 0) 14408e38aeffSJohn Baldwin return (EACCES); 14418e38aeffSJohn Baldwin #ifdef MAC 14428e38aeffSJohn Baldwin error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp); 14438e38aeffSJohn Baldwin if (error != 0) 14448e38aeffSJohn Baldwin return (error); 14458e38aeffSJohn Baldwin #endif 14468e38aeffSJohn Baldwin error = shm_mmap(shmfd, objsize, foff, objp); 14478e38aeffSJohn Baldwin if (error) 14488e38aeffSJohn Baldwin return (error); 14498e38aeffSJohn Baldwin return (0); 14508e38aeffSJohn Baldwin } 14518e38aeffSJohn Baldwin 14528e38aeffSJohn Baldwin /* 1453d2c60af8SMatthew Dillon * vm_mmap() 1454d2c60af8SMatthew Dillon * 1455d2c60af8SMatthew Dillon * MPSAFE 1456d2c60af8SMatthew Dillon * 1457d2c60af8SMatthew Dillon * Internal version of mmap. Currently used by mmap, exec, and sys5 1458d2c60af8SMatthew Dillon * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. 1459df8bae1dSRodney W. Grimes */ 1460df8bae1dSRodney W. Grimes int 1461b9dcd593SBruce Evans vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1462b9dcd593SBruce Evans vm_prot_t maxprot, int flags, 146398df9218SJohn Baldwin objtype_t handle_type, void *handle, 1464b9dcd593SBruce Evans vm_ooffset_t foff) 1465df8bae1dSRodney W. Grimes { 1466df8bae1dSRodney W. Grimes boolean_t fitit; 14676bda842dSMatt Jacob vm_object_t object = NULL; 1468b40ce416SJulian Elischer struct thread *td = curthread; 1469f9230ad6SAlan Cox int docow, error, rv; 147084110e7eSKonstantin Belousov boolean_t writecounted; 1471df8bae1dSRodney W. Grimes 1472df8bae1dSRodney W. Grimes if (size == 0) 1473df8bae1dSRodney W. Grimes return (0); 1474df8bae1dSRodney W. Grimes 1475749474f2SPeter Wemm size = round_page(size); 1476df8bae1dSRodney W. Grimes 1477a6492969SAlan Cox if (map == &td->td_proc->p_vmspace->vm_map) { 147891d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 1479a6492969SAlan Cox if (map->size + size > lim_cur(td->td_proc, RLIMIT_VMEM)) { 148091d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1481070f64feSMatthew Dillon return (ENOMEM); 1482070f64feSMatthew Dillon } 1483a6492969SAlan Cox if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 14841ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 14851ba5ad42SEdward Tomasz Napierala return (ENOMEM); 14861ba5ad42SEdward Tomasz Napierala } 1487*7e19eda4SAndrey Zonov if (!old_mlock && map->flags & MAP_WIREFUTURE) { 1488*7e19eda4SAndrey Zonov if (ptoa(vmspace_wired_count(td->td_proc->p_vmspace)) + 1489*7e19eda4SAndrey Zonov size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { 1490*7e19eda4SAndrey Zonov racct_set_force(td->td_proc, RACCT_VMEM, 1491*7e19eda4SAndrey Zonov map->size); 1492*7e19eda4SAndrey Zonov PROC_UNLOCK(td->td_proc); 1493*7e19eda4SAndrey Zonov return (ENOMEM); 1494*7e19eda4SAndrey Zonov } 1495*7e19eda4SAndrey Zonov error = racct_set(td->td_proc, RACCT_MEMLOCK, 1496*7e19eda4SAndrey Zonov ptoa(vmspace_wired_count(td->td_proc->p_vmspace)) + 1497*7e19eda4SAndrey Zonov size); 1498*7e19eda4SAndrey Zonov if (error != 0) { 1499*7e19eda4SAndrey Zonov racct_set_force(td->td_proc, RACCT_VMEM, 1500*7e19eda4SAndrey Zonov map->size); 1501*7e19eda4SAndrey Zonov PROC_UNLOCK(td->td_proc); 1502*7e19eda4SAndrey Zonov return (error); 1503*7e19eda4SAndrey Zonov } 1504*7e19eda4SAndrey Zonov } 150591d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1506a6492969SAlan Cox } 1507070f64feSMatthew Dillon 1508df8bae1dSRodney W. Grimes /* 1509bc9ad247SDavid Greenman * We currently can only deal with page aligned file offsets. 1510bc9ad247SDavid Greenman * The check is here rather than in the syscall because the 1511bc9ad247SDavid Greenman * kernel calls this function internally for other mmaping 1512bc9ad247SDavid Greenman * operations (such as in exec) and non-aligned offsets will 1513bc9ad247SDavid Greenman * cause pmap inconsistencies...so we want to be sure to 1514bc9ad247SDavid Greenman * disallow this in all cases. 1515bc9ad247SDavid Greenman */ 1516bc9ad247SDavid Greenman if (foff & PAGE_MASK) 1517bc9ad247SDavid Greenman return (EINVAL); 1518bc9ad247SDavid Greenman 151906cb7259SDavid Greenman if ((flags & MAP_FIXED) == 0) { 152006cb7259SDavid Greenman fitit = TRUE; 152106cb7259SDavid Greenman *addr = round_page(*addr); 152206cb7259SDavid Greenman } else { 152306cb7259SDavid Greenman if (*addr != trunc_page(*addr)) 152406cb7259SDavid Greenman return (EINVAL); 152506cb7259SDavid Greenman fitit = FALSE; 152606cb7259SDavid Greenman } 152784110e7eSKonstantin Belousov writecounted = FALSE; 152884110e7eSKonstantin Belousov 1529bc9ad247SDavid Greenman /* 153024a1cce3SDavid Greenman * Lookup/allocate object. 1531df8bae1dSRodney W. Grimes */ 153298df9218SJohn Baldwin switch (handle_type) { 153398df9218SJohn Baldwin case OBJT_DEVICE: 153498df9218SJohn Baldwin error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, 153564345f0bSJohn Baldwin handle, &foff, &object); 153698df9218SJohn Baldwin break; 153798df9218SJohn Baldwin case OBJT_VNODE: 1538c8daea13SAlexander Kabaev error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 153984110e7eSKonstantin Belousov handle, &foff, &object, &writecounted); 154098df9218SJohn Baldwin break; 15418e38aeffSJohn Baldwin case OBJT_SWAP: 15428e38aeffSJohn Baldwin error = vm_mmap_shm(td, size, prot, &maxprot, &flags, 15438e38aeffSJohn Baldwin handle, foff, &object); 15448e38aeffSJohn Baldwin break; 154598df9218SJohn Baldwin case OBJT_DEFAULT: 154698df9218SJohn Baldwin if (handle == NULL) { 154798df9218SJohn Baldwin error = 0; 154898df9218SJohn Baldwin break; 154998df9218SJohn Baldwin } 155098df9218SJohn Baldwin /* FALLTHROUGH */ 155198df9218SJohn Baldwin default: 155298df9218SJohn Baldwin error = EINVAL; 15536bda842dSMatt Jacob break; 155498df9218SJohn Baldwin } 155598df9218SJohn Baldwin if (error) 1556c8daea13SAlexander Kabaev return (error); 15575f55e841SDavid Greenman if (flags & MAP_ANON) { 1558c8daea13SAlexander Kabaev object = NULL; 1559c8daea13SAlexander Kabaev docow = 0; 15605f55e841SDavid Greenman /* 15615f55e841SDavid Greenman * Unnamed anonymous regions always start at 0. 15625f55e841SDavid Greenman */ 156367bf6868SJohn Dyson if (handle == 0) 15645f55e841SDavid Greenman foff = 0; 156574ffb9afSAlan Cox } else if (flags & MAP_PREFAULT_READ) 156674ffb9afSAlan Cox docow = MAP_PREFAULT; 156774ffb9afSAlan Cox else 15684738fa09SAlan Cox docow = MAP_PREFAULT_PARTIAL; 1569df8bae1dSRodney W. Grimes 15704f79d873SMatthew Dillon if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 15714738fa09SAlan Cox docow |= MAP_COPY_ON_WRITE; 15724f79d873SMatthew Dillon if (flags & MAP_NOSYNC) 15734f79d873SMatthew Dillon docow |= MAP_DISABLE_SYNCER; 15749730a5daSPaul Saab if (flags & MAP_NOCORE) 15759730a5daSPaul Saab docow |= MAP_DISABLE_COREDUMP; 15768211bd45SKonstantin Belousov /* Shared memory is also shared with children. */ 15778211bd45SKonstantin Belousov if (flags & MAP_SHARED) 15788211bd45SKonstantin Belousov docow |= MAP_INHERIT_SHARE; 157984110e7eSKonstantin Belousov if (writecounted) 158084110e7eSKonstantin Belousov docow |= MAP_VN_WRITECOUNT; 15815850152dSJohn Dyson 15822267af78SJulian Elischer if (flags & MAP_STACK) 1583fd75d710SMarcel Moolenaar rv = vm_map_stack(map, *addr, size, prot, maxprot, 1584fd75d710SMarcel Moolenaar docow | MAP_STACK_GROWS_DOWN); 1585d239bd3cSKonstantin Belousov else if (fitit) 1586d0a83a83SAlan Cox rv = vm_map_find(map, object, foff, addr, size, 1587d0a83a83SAlan Cox object != NULL && object->type == OBJT_DEVICE ? 1588d0a83a83SAlan Cox VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, prot, maxprot, docow); 15892267af78SJulian Elischer else 1590b8ca4ef2SAlan Cox rv = vm_map_fixed(map, object, foff, *addr, size, 1591bd7e5f99SJohn Dyson prot, maxprot, docow); 1592bd7e5f99SJohn Dyson 1593f9230ad6SAlan Cox if (rv == KERN_SUCCESS) { 15947fb0c17eSDavid Greenman /* 1595f9230ad6SAlan Cox * If the process has requested that all future mappings 1596f9230ad6SAlan Cox * be wired, then heed this. 1597f9230ad6SAlan Cox */ 15981472f4f4SKonstantin Belousov if (map->flags & MAP_WIREFUTURE) { 1599f9230ad6SAlan Cox vm_map_wire(map, *addr, *addr + size, 16001472f4f4SKonstantin Belousov VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? 16011472f4f4SKonstantin Belousov VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); 16021472f4f4SKonstantin Belousov } 1603f9230ad6SAlan Cox } else { 1604f9230ad6SAlan Cox /* 160584110e7eSKonstantin Belousov * If this mapping was accounted for in the vnode's 160684110e7eSKonstantin Belousov * writecount, then undo that now. 16077fb0c17eSDavid Greenman */ 160884110e7eSKonstantin Belousov if (writecounted) 160984110e7eSKonstantin Belousov vnode_pager_release_writecount(object, 0, size); 1610f9230ad6SAlan Cox /* 1611f9230ad6SAlan Cox * Lose the object reference. Will destroy the 1612f9230ad6SAlan Cox * object if it's an unnamed anonymous mapping 1613f9230ad6SAlan Cox * or named anonymous without other references. 1614f9230ad6SAlan Cox */ 1615df8bae1dSRodney W. Grimes vm_object_deallocate(object); 1616df8bae1dSRodney W. Grimes } 16172e32165cSKonstantin Belousov return (vm_mmap_to_errno(rv)); 16182e32165cSKonstantin Belousov } 16192e32165cSKonstantin Belousov 1620f9230ad6SAlan Cox /* 1621f9230ad6SAlan Cox * Translate a Mach VM return code to zero on success or the appropriate errno 1622f9230ad6SAlan Cox * on failure. 1623f9230ad6SAlan Cox */ 16242e32165cSKonstantin Belousov int 16252e32165cSKonstantin Belousov vm_mmap_to_errno(int rv) 16262e32165cSKonstantin Belousov { 16272e32165cSKonstantin Belousov 1628df8bae1dSRodney W. Grimes switch (rv) { 1629df8bae1dSRodney W. Grimes case KERN_SUCCESS: 1630df8bae1dSRodney W. Grimes return (0); 1631df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 1632df8bae1dSRodney W. Grimes case KERN_NO_SPACE: 1633df8bae1dSRodney W. Grimes return (ENOMEM); 1634df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 1635df8bae1dSRodney W. Grimes return (EACCES); 1636df8bae1dSRodney W. Grimes default: 1637df8bae1dSRodney W. Grimes return (EINVAL); 1638df8bae1dSRodney W. Grimes } 1639df8bae1dSRodney W. Grimes } 1640