160727d8bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1988 University of Utah. 3df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 4df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 5df8bae1dSRodney W. Grimes * 6df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 7df8bae1dSRodney W. Grimes * the Systems Programming Group of the University of Utah Computer 8df8bae1dSRodney W. Grimes * Science Department. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 19df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 20df8bae1dSRodney W. Grimes * without specific prior written permission. 21df8bae1dSRodney W. Grimes * 22df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32df8bae1dSRodney W. Grimes * SUCH DAMAGE. 33df8bae1dSRodney W. Grimes * 34df8bae1dSRodney W. Grimes * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35df8bae1dSRodney W. Grimes * 36df8bae1dSRodney W. Grimes * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37df8bae1dSRodney W. Grimes */ 38df8bae1dSRodney W. Grimes 39df8bae1dSRodney W. Grimes /* 40df8bae1dSRodney W. Grimes * Mapped file (mmap) interface to VM 41df8bae1dSRodney W. Grimes */ 42df8bae1dSRodney W. Grimes 43874651b1SDavid E. O'Brien #include <sys/cdefs.h> 44874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 45874651b1SDavid E. O'Brien 465591b823SEivind Eklund #include "opt_compat.h" 4749874f6eSJoseph Koshy #include "opt_hwpmc_hooks.h" 48e9822d92SJoerg Wunsch 49df8bae1dSRodney W. Grimes #include <sys/param.h> 50df8bae1dSRodney W. Grimes #include <sys/systm.h> 51a9d2f8d8SRobert Watson #include <sys/capability.h> 52a9d2f8d8SRobert Watson #include <sys/kernel.h> 53fb919e4dSMark Murray #include <sys/lock.h> 5423955314SAlfred Perlstein #include <sys/mutex.h> 55d2d3e875SBruce Evans #include <sys/sysproto.h> 56df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 57acd3428bSRobert Watson #include <sys/priv.h> 58df8bae1dSRodney W. Grimes #include <sys/proc.h> 591ba5ad42SEdward Tomasz Napierala #include <sys/racct.h> 60070f64feSMatthew Dillon #include <sys/resource.h> 61070f64feSMatthew Dillon #include <sys/resourcevar.h> 6289f6b863SAttilio Rao #include <sys/rwlock.h> 637e19eda4SAndrey Zonov #include <sys/sysctl.h> 64df8bae1dSRodney W. Grimes #include <sys/vnode.h> 653ac4d1efSBruce Evans #include <sys/fcntl.h> 66df8bae1dSRodney W. Grimes #include <sys/file.h> 67df8bae1dSRodney W. Grimes #include <sys/mman.h> 68b483c7f6SGuido van Rooij #include <sys/mount.h> 69df8bae1dSRodney W. Grimes #include <sys/conf.h> 704183b6b6SPeter Wemm #include <sys/stat.h> 71497a8238SKonstantin Belousov #include <sys/sysent.h> 72efeaf95aSDavid Greenman #include <sys/vmmeter.h> 73df8bae1dSRodney W. Grimes 74aed55708SRobert Watson #include <security/mac/mac_framework.h> 75aed55708SRobert Watson 76df8bae1dSRodney W. Grimes #include <vm/vm.h> 77efeaf95aSDavid Greenman #include <vm/vm_param.h> 78efeaf95aSDavid Greenman #include <vm/pmap.h> 79efeaf95aSDavid Greenman #include <vm/vm_map.h> 80efeaf95aSDavid Greenman #include <vm/vm_object.h> 811c7c3c6aSMatthew Dillon #include <vm/vm_page.h> 82df8bae1dSRodney W. Grimes #include <vm/vm_pager.h> 83b5e8ce9fSBruce Evans #include <vm/vm_pageout.h> 84efeaf95aSDavid Greenman #include <vm/vm_extern.h> 85867a482dSJohn Dyson #include <vm/vm_page.h> 8684110e7eSKonstantin Belousov #include <vm/vnode_pager.h> 87df8bae1dSRodney W. Grimes 8849874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 8949874f6eSJoseph Koshy #include <sys/pmckern.h> 9049874f6eSJoseph Koshy #endif 9149874f6eSJoseph Koshy 927e19eda4SAndrey Zonov int old_mlock = 0; 937e19eda4SAndrey Zonov SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RW | CTLFLAG_TUN, &old_mlock, 0, 947e19eda4SAndrey Zonov "Do not apply RLIMIT_MEMLOCK on mlockall"); 957e19eda4SAndrey Zonov TUNABLE_INT("vm.old_mlock", &old_mlock); 967e19eda4SAndrey Zonov 97d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 98df8bae1dSRodney W. Grimes struct sbrk_args { 99df8bae1dSRodney W. Grimes int incr; 100df8bae1dSRodney W. Grimes }; 101d2d3e875SBruce Evans #endif 1020d94caffSDavid Greenman 103c8daea13SAlexander Kabaev static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 10484110e7eSKonstantin Belousov int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *); 10598df9218SJohn Baldwin static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 10664345f0bSJohn Baldwin int *, struct cdev *, vm_ooffset_t *, vm_object_t *); 1078e38aeffSJohn Baldwin static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 1088e38aeffSJohn Baldwin int *, struct shmfd *, vm_ooffset_t, vm_object_t *); 109c8daea13SAlexander Kabaev 110d2c60af8SMatthew Dillon /* 111d2c60af8SMatthew Dillon * MPSAFE 112d2c60af8SMatthew Dillon */ 113df8bae1dSRodney W. Grimes /* ARGSUSED */ 114df8bae1dSRodney W. Grimes int 1158451d0ddSKip Macy sys_sbrk(td, uap) 116b40ce416SJulian Elischer struct thread *td; 117df8bae1dSRodney W. Grimes struct sbrk_args *uap; 118df8bae1dSRodney W. Grimes { 119df8bae1dSRodney W. Grimes /* Not yet implemented */ 120df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 121df8bae1dSRodney W. Grimes } 122df8bae1dSRodney W. Grimes 123d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 124df8bae1dSRodney W. Grimes struct sstk_args { 125df8bae1dSRodney W. Grimes int incr; 126df8bae1dSRodney W. Grimes }; 127d2d3e875SBruce Evans #endif 1280d94caffSDavid Greenman 129d2c60af8SMatthew Dillon /* 130d2c60af8SMatthew Dillon * MPSAFE 131d2c60af8SMatthew Dillon */ 132df8bae1dSRodney W. Grimes /* ARGSUSED */ 133df8bae1dSRodney W. Grimes int 1348451d0ddSKip Macy sys_sstk(td, uap) 135b40ce416SJulian Elischer struct thread *td; 136df8bae1dSRodney W. Grimes struct sstk_args *uap; 137df8bae1dSRodney W. Grimes { 138df8bae1dSRodney W. Grimes /* Not yet implemented */ 139df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 140df8bae1dSRodney W. Grimes } 141df8bae1dSRodney W. Grimes 1421930e303SPoul-Henning Kamp #if defined(COMPAT_43) 143d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 144df8bae1dSRodney W. Grimes struct getpagesize_args { 145df8bae1dSRodney W. Grimes int dummy; 146df8bae1dSRodney W. Grimes }; 147d2d3e875SBruce Evans #endif 1480d94caffSDavid Greenman 149df8bae1dSRodney W. Grimes int 150b40ce416SJulian Elischer ogetpagesize(td, uap) 151b40ce416SJulian Elischer struct thread *td; 152df8bae1dSRodney W. Grimes struct getpagesize_args *uap; 153df8bae1dSRodney W. Grimes { 1540cddd8f0SMatthew Dillon /* MP SAFE */ 155b40ce416SJulian Elischer td->td_retval[0] = PAGE_SIZE; 156df8bae1dSRodney W. Grimes return (0); 157df8bae1dSRodney W. Grimes } 1581930e303SPoul-Henning Kamp #endif /* COMPAT_43 */ 159df8bae1dSRodney W. Grimes 16054f42e4bSPeter Wemm 16154f42e4bSPeter Wemm /* 16254f42e4bSPeter Wemm * Memory Map (mmap) system call. Note that the file offset 16354f42e4bSPeter Wemm * and address are allowed to be NOT page aligned, though if 16454f42e4bSPeter Wemm * the MAP_FIXED flag it set, both must have the same remainder 16554f42e4bSPeter Wemm * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 16654f42e4bSPeter Wemm * page-aligned, the actual mapping starts at trunc_page(addr) 16754f42e4bSPeter Wemm * and the return value is adjusted up by the page offset. 168b4309055SMatthew Dillon * 169b4309055SMatthew Dillon * Generally speaking, only character devices which are themselves 170b4309055SMatthew Dillon * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 171b4309055SMatthew Dillon * there would be no cache coherency between a descriptor and a VM mapping 172b4309055SMatthew Dillon * both to the same character device. 17354f42e4bSPeter Wemm */ 174d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 175df8bae1dSRodney W. Grimes struct mmap_args { 176651bb817SAlexander Langer void *addr; 177df8bae1dSRodney W. Grimes size_t len; 178df8bae1dSRodney W. Grimes int prot; 179df8bae1dSRodney W. Grimes int flags; 180df8bae1dSRodney W. Grimes int fd; 181df8bae1dSRodney W. Grimes long pad; 182df8bae1dSRodney W. Grimes off_t pos; 183df8bae1dSRodney W. Grimes }; 184d2d3e875SBruce Evans #endif 185df8bae1dSRodney W. Grimes 186d2c60af8SMatthew Dillon /* 187d2c60af8SMatthew Dillon * MPSAFE 188d2c60af8SMatthew Dillon */ 189df8bae1dSRodney W. Grimes int 1908451d0ddSKip Macy sys_mmap(td, uap) 191b40ce416SJulian Elischer struct thread *td; 19254d92145SMatthew Dillon struct mmap_args *uap; 193df8bae1dSRodney W. Grimes { 19449874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 19549874f6eSJoseph Koshy struct pmckern_map_in pkm; 19649874f6eSJoseph Koshy #endif 197c8daea13SAlexander Kabaev struct file *fp; 198df8bae1dSRodney W. Grimes struct vnode *vp; 199df8bae1dSRodney W. Grimes vm_offset_t addr; 2009154ee6aSPeter Wemm vm_size_t size, pageoff; 201a9d2f8d8SRobert Watson vm_prot_t cap_maxprot, prot, maxprot; 202651bb817SAlexander Langer void *handle; 20398df9218SJohn Baldwin objtype_t handle_type; 204df8bae1dSRodney W. Grimes int flags, error; 20554f42e4bSPeter Wemm off_t pos; 206b40ce416SJulian Elischer struct vmspace *vms = td->td_proc->p_vmspace; 207a9d2f8d8SRobert Watson cap_rights_t rights; 208df8bae1dSRodney W. Grimes 20954f42e4bSPeter Wemm addr = (vm_offset_t) uap->addr; 21054f42e4bSPeter Wemm size = uap->len; 211df8bae1dSRodney W. Grimes prot = uap->prot & VM_PROT_ALL; 212df8bae1dSRodney W. Grimes flags = uap->flags; 21354f42e4bSPeter Wemm pos = uap->pos; 21454f42e4bSPeter Wemm 215426da3bcSAlfred Perlstein fp = NULL; 21627bfa958SSimon L. B. Nielsen 2177707ccabSKonstantin Belousov /* 2187707ccabSKonstantin Belousov * Enforce the constraints. 2197707ccabSKonstantin Belousov * Mapping of length 0 is only allowed for old binaries. 2207707ccabSKonstantin Belousov * Anonymous mapping shall specify -1 as filedescriptor and 2217707ccabSKonstantin Belousov * zero position for new code. Be nice to ancient a.out 2227707ccabSKonstantin Belousov * binaries and correct pos for anonymous mapping, since old 2237707ccabSKonstantin Belousov * ld.so sometimes issues anonymous map requests with non-zero 2247707ccabSKonstantin Belousov * pos. 2257707ccabSKonstantin Belousov */ 2267707ccabSKonstantin Belousov if (!SV_CURPROC_FLAG(SV_AOUT)) { 2277707ccabSKonstantin Belousov if ((uap->len == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) || 2287707ccabSKonstantin Belousov ((flags & MAP_ANON) != 0 && (uap->fd != -1 || pos != 0))) 229df8bae1dSRodney W. Grimes return (EINVAL); 2307707ccabSKonstantin Belousov } else { 2317707ccabSKonstantin Belousov if ((flags & MAP_ANON) != 0) 2327707ccabSKonstantin Belousov pos = 0; 2337707ccabSKonstantin Belousov } 2349154ee6aSPeter Wemm 2352267af78SJulian Elischer if (flags & MAP_STACK) { 2362267af78SJulian Elischer if ((uap->fd != -1) || 2372267af78SJulian Elischer ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 2382267af78SJulian Elischer return (EINVAL); 2392267af78SJulian Elischer flags |= MAP_ANON; 2402267af78SJulian Elischer pos = 0; 2412907af2aSJulian Elischer } 2422907af2aSJulian Elischer 2439154ee6aSPeter Wemm /* 24454f42e4bSPeter Wemm * Align the file position to a page boundary, 24554f42e4bSPeter Wemm * and save its page offset component. 2469154ee6aSPeter Wemm */ 24754f42e4bSPeter Wemm pageoff = (pos & PAGE_MASK); 24854f42e4bSPeter Wemm pos -= pageoff; 24954f42e4bSPeter Wemm 25054f42e4bSPeter Wemm /* Adjust size for rounding (on both ends). */ 25154f42e4bSPeter Wemm size += pageoff; /* low end... */ 25254f42e4bSPeter Wemm size = (vm_size_t) round_page(size); /* hi end */ 2539154ee6aSPeter Wemm 254df8bae1dSRodney W. Grimes /* 2550d94caffSDavid Greenman * Check for illegal addresses. Watch out for address wrap... Note 2560d94caffSDavid Greenman * that VM_*_ADDRESS are not constants due to casts (argh). 257df8bae1dSRodney W. Grimes */ 258df8bae1dSRodney W. Grimes if (flags & MAP_FIXED) { 25954f42e4bSPeter Wemm /* 26054f42e4bSPeter Wemm * The specified address must have the same remainder 26154f42e4bSPeter Wemm * as the file offset taken modulo PAGE_SIZE, so it 26254f42e4bSPeter Wemm * should be aligned after adjustment by pageoff. 26354f42e4bSPeter Wemm */ 26454f42e4bSPeter Wemm addr -= pageoff; 26554f42e4bSPeter Wemm if (addr & PAGE_MASK) 26654f42e4bSPeter Wemm return (EINVAL); 26727bfa958SSimon L. B. Nielsen 26854f42e4bSPeter Wemm /* Address range must be all in user VM space. */ 26905ba50f5SJake Burkholder if (addr < vm_map_min(&vms->vm_map) || 27005ba50f5SJake Burkholder addr + size > vm_map_max(&vms->vm_map)) 271df8bae1dSRodney W. Grimes return (EINVAL); 272bbc0ec52SDavid Greenman if (addr + size < addr) 273df8bae1dSRodney W. Grimes return (EINVAL); 27491d5354aSJohn Baldwin } else { 275df8bae1dSRodney W. Grimes /* 27654f42e4bSPeter Wemm * XXX for non-fixed mappings where no hint is provided or 27754f42e4bSPeter Wemm * the hint would fall in the potential heap space, 27854f42e4bSPeter Wemm * place it after the end of the largest possible heap. 279df8bae1dSRodney W. Grimes * 28054f42e4bSPeter Wemm * There should really be a pmap call to determine a reasonable 28154f42e4bSPeter Wemm * location. 282df8bae1dSRodney W. Grimes */ 28391d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 28491d5354aSJohn Baldwin if (addr == 0 || 2851f6889a1SMatthew Dillon (addr >= round_page((vm_offset_t)vms->vm_taddr) && 286c460ac3aSPeter Wemm addr < round_page((vm_offset_t)vms->vm_daddr + 28791d5354aSJohn Baldwin lim_max(td->td_proc, RLIMIT_DATA)))) 288c460ac3aSPeter Wemm addr = round_page((vm_offset_t)vms->vm_daddr + 28991d5354aSJohn Baldwin lim_max(td->td_proc, RLIMIT_DATA)); 29091d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 29191d5354aSJohn Baldwin } 292df8bae1dSRodney W. Grimes if (flags & MAP_ANON) { 293df8bae1dSRodney W. Grimes /* 294df8bae1dSRodney W. Grimes * Mapping blank space is trivial. 295df8bae1dSRodney W. Grimes */ 296df8bae1dSRodney W. Grimes handle = NULL; 29798df9218SJohn Baldwin handle_type = OBJT_DEFAULT; 298df8bae1dSRodney W. Grimes maxprot = VM_PROT_ALL; 299a9d2f8d8SRobert Watson cap_maxprot = VM_PROT_ALL; 30030d4dd7eSAlexander Kabaev } else { 301df8bae1dSRodney W. Grimes /* 302a9d2f8d8SRobert Watson * Mapping file, get fp for validation and don't let the 303a9d2f8d8SRobert Watson * descriptor disappear on us if we block. Check capability 304a9d2f8d8SRobert Watson * rights, but also return the maximum rights to be combined 305a9d2f8d8SRobert Watson * with maxprot later. 306df8bae1dSRodney W. Grimes */ 307a9d2f8d8SRobert Watson rights = CAP_MMAP; 308a9d2f8d8SRobert Watson if (prot & PROT_READ) 3092609222aSPawel Jakub Dawidek rights |= CAP_MMAP_R; 310a9d2f8d8SRobert Watson if ((flags & MAP_SHARED) != 0) { 311a9d2f8d8SRobert Watson if (prot & PROT_WRITE) 3122609222aSPawel Jakub Dawidek rights |= CAP_MMAP_W; 313a9d2f8d8SRobert Watson } 314a9d2f8d8SRobert Watson if (prot & PROT_EXEC) 3152609222aSPawel Jakub Dawidek rights |= CAP_MMAP_X; 316a9d2f8d8SRobert Watson if ((error = fget_mmap(td, uap->fd, rights, &cap_maxprot, 317a9d2f8d8SRobert Watson &fp)) != 0) 318426da3bcSAlfred Perlstein goto done; 3198e38aeffSJohn Baldwin if (fp->f_type == DTYPE_SHM) { 3208e38aeffSJohn Baldwin handle = fp->f_data; 3218e38aeffSJohn Baldwin handle_type = OBJT_SWAP; 3228e38aeffSJohn Baldwin maxprot = VM_PROT_NONE; 3238e38aeffSJohn Baldwin 3248e38aeffSJohn Baldwin /* FREAD should always be set. */ 3258e38aeffSJohn Baldwin if (fp->f_flag & FREAD) 3268e38aeffSJohn Baldwin maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; 3278e38aeffSJohn Baldwin if (fp->f_flag & FWRITE) 3288e38aeffSJohn Baldwin maxprot |= VM_PROT_WRITE; 3298e38aeffSJohn Baldwin goto map; 3308e38aeffSJohn Baldwin } 331e4ca250dSJohn Baldwin if (fp->f_type != DTYPE_VNODE) { 33289eae00bSTom Rhodes error = ENODEV; 333426da3bcSAlfred Perlstein goto done; 334e4ca250dSJohn Baldwin } 3358e38aeffSJohn Baldwin #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ 3368e38aeffSJohn Baldwin defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) 337279d7226SMatthew Dillon /* 338aa543039SGarrett Wollman * POSIX shared-memory objects are defined to have 339aa543039SGarrett Wollman * kernel persistence, and are not defined to support 340aa543039SGarrett Wollman * read(2)/write(2) -- or even open(2). Thus, we can 341aa543039SGarrett Wollman * use MAP_ASYNC to trade on-disk coherence for speed. 342aa543039SGarrett Wollman * The shm_open(3) library routine turns on the FPOSIXSHM 343aa543039SGarrett Wollman * flag to request this behavior. 344aa543039SGarrett Wollman */ 345aa543039SGarrett Wollman if (fp->f_flag & FPOSIXSHM) 346aa543039SGarrett Wollman flags |= MAP_NOSYNC; 3478e38aeffSJohn Baldwin #endif 3483b6d9652SPoul-Henning Kamp vp = fp->f_vnode; 349c8bdd56bSGuido van Rooij /* 350df8bae1dSRodney W. Grimes * Ensure that file and memory protections are 351df8bae1dSRodney W. Grimes * compatible. Note that we only worry about 352df8bae1dSRodney W. Grimes * writability if mapping is shared; in this case, 353df8bae1dSRodney W. Grimes * current and max prot are dictated by the open file. 354df8bae1dSRodney W. Grimes * XXX use the vnode instead? Problem is: what 3550d94caffSDavid Greenman * credentials do we use for determination? What if 3560d94caffSDavid Greenman * proc does a setuid? 357df8bae1dSRodney W. Grimes */ 3588eec77b0STim J. Robbins if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC) 359b483c7f6SGuido van Rooij maxprot = VM_PROT_NONE; 360b483c7f6SGuido van Rooij else 361b483c7f6SGuido van Rooij maxprot = VM_PROT_EXECUTE; 362279d7226SMatthew Dillon if (fp->f_flag & FREAD) { 363df8bae1dSRodney W. Grimes maxprot |= VM_PROT_READ; 364279d7226SMatthew Dillon } else if (prot & PROT_READ) { 365279d7226SMatthew Dillon error = EACCES; 366279d7226SMatthew Dillon goto done; 367279d7226SMatthew Dillon } 368c8bdd56bSGuido van Rooij /* 369c8bdd56bSGuido van Rooij * If we are sharing potential changes (either via 370c8bdd56bSGuido van Rooij * MAP_SHARED or via the implicit sharing of character 371c8bdd56bSGuido van Rooij * device mappings), and we are trying to get write 372c8bdd56bSGuido van Rooij * permission although we opened it without asking 373c8daea13SAlexander Kabaev * for it, bail out. 374c8bdd56bSGuido van Rooij */ 375ce7a036dSAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 37605feb99fSGuido van Rooij if ((fp->f_flag & FWRITE) != 0) { 377df8bae1dSRodney W. Grimes maxprot |= VM_PROT_WRITE; 378279d7226SMatthew Dillon } else if ((prot & PROT_WRITE) != 0) { 379279d7226SMatthew Dillon error = EACCES; 380279d7226SMatthew Dillon goto done; 381279d7226SMatthew Dillon } 382ce7a036dSAlexander Kabaev } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) { 38305feb99fSGuido van Rooij maxprot |= VM_PROT_WRITE; 384a9d2f8d8SRobert Watson cap_maxprot |= VM_PROT_WRITE; 385279d7226SMatthew Dillon } 386651bb817SAlexander Langer handle = (void *)vp; 38798df9218SJohn Baldwin handle_type = OBJT_VNODE; 38830d4dd7eSAlexander Kabaev } 3898e38aeffSJohn Baldwin map: 39036b90789SKonstantin Belousov td->td_fpop = fp; 391a9d2f8d8SRobert Watson maxprot &= cap_maxprot; 3921f6889a1SMatthew Dillon error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, 39398df9218SJohn Baldwin flags, handle_type, handle, pos); 39436b90789SKonstantin Belousov td->td_fpop = NULL; 39549874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 39649874f6eSJoseph Koshy /* inform hwpmc(4) if an executable is being mapped */ 39749874f6eSJoseph Koshy if (error == 0 && handle_type == OBJT_VNODE && 39849874f6eSJoseph Koshy (prot & PROT_EXEC)) { 39949874f6eSJoseph Koshy pkm.pm_file = handle; 40049874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 40149874f6eSJoseph Koshy PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); 40249874f6eSJoseph Koshy } 40349874f6eSJoseph Koshy #endif 404df8bae1dSRodney W. Grimes if (error == 0) 405b40ce416SJulian Elischer td->td_retval[0] = (register_t) (addr + pageoff); 406279d7226SMatthew Dillon done: 407279d7226SMatthew Dillon if (fp) 408b40ce416SJulian Elischer fdrop(fp, td); 409f6b5b182SJeff Roberson 410df8bae1dSRodney W. Grimes return (error); 411df8bae1dSRodney W. Grimes } 412df8bae1dSRodney W. Grimes 413c2815ad5SPeter Wemm int 414c2815ad5SPeter Wemm freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 415c2815ad5SPeter Wemm { 416c2815ad5SPeter Wemm struct mmap_args oargs; 417c2815ad5SPeter Wemm 418c2815ad5SPeter Wemm oargs.addr = uap->addr; 419c2815ad5SPeter Wemm oargs.len = uap->len; 420c2815ad5SPeter Wemm oargs.prot = uap->prot; 421c2815ad5SPeter Wemm oargs.flags = uap->flags; 422c2815ad5SPeter Wemm oargs.fd = uap->fd; 423c2815ad5SPeter Wemm oargs.pos = uap->pos; 4248451d0ddSKip Macy return (sys_mmap(td, &oargs)); 425c2815ad5SPeter Wemm } 426c2815ad5SPeter Wemm 42705f0fdd2SPoul-Henning Kamp #ifdef COMPAT_43 428d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 42905f0fdd2SPoul-Henning Kamp struct ommap_args { 43005f0fdd2SPoul-Henning Kamp caddr_t addr; 43105f0fdd2SPoul-Henning Kamp int len; 43205f0fdd2SPoul-Henning Kamp int prot; 43305f0fdd2SPoul-Henning Kamp int flags; 43405f0fdd2SPoul-Henning Kamp int fd; 43505f0fdd2SPoul-Henning Kamp long pos; 43605f0fdd2SPoul-Henning Kamp }; 437d2d3e875SBruce Evans #endif 43805f0fdd2SPoul-Henning Kamp int 439b40ce416SJulian Elischer ommap(td, uap) 440b40ce416SJulian Elischer struct thread *td; 44154d92145SMatthew Dillon struct ommap_args *uap; 44205f0fdd2SPoul-Henning Kamp { 44305f0fdd2SPoul-Henning Kamp struct mmap_args nargs; 44405f0fdd2SPoul-Henning Kamp static const char cvtbsdprot[8] = { 44505f0fdd2SPoul-Henning Kamp 0, 44605f0fdd2SPoul-Henning Kamp PROT_EXEC, 44705f0fdd2SPoul-Henning Kamp PROT_WRITE, 44805f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE, 44905f0fdd2SPoul-Henning Kamp PROT_READ, 45005f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_READ, 45105f0fdd2SPoul-Henning Kamp PROT_WRITE | PROT_READ, 45205f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE | PROT_READ, 45305f0fdd2SPoul-Henning Kamp }; 4540d94caffSDavid Greenman 45505f0fdd2SPoul-Henning Kamp #define OMAP_ANON 0x0002 45605f0fdd2SPoul-Henning Kamp #define OMAP_COPY 0x0020 45705f0fdd2SPoul-Henning Kamp #define OMAP_SHARED 0x0010 45805f0fdd2SPoul-Henning Kamp #define OMAP_FIXED 0x0100 45905f0fdd2SPoul-Henning Kamp 46005f0fdd2SPoul-Henning Kamp nargs.addr = uap->addr; 46105f0fdd2SPoul-Henning Kamp nargs.len = uap->len; 46205f0fdd2SPoul-Henning Kamp nargs.prot = cvtbsdprot[uap->prot & 0x7]; 463ee4116b8SKonstantin Belousov #ifdef COMPAT_FREEBSD32 464ee4116b8SKonstantin Belousov #if defined(__amd64__) || defined(__ia64__) 465ee4116b8SKonstantin Belousov if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && 466ee4116b8SKonstantin Belousov nargs.prot != 0) 467ee4116b8SKonstantin Belousov nargs.prot |= PROT_EXEC; 468ee4116b8SKonstantin Belousov #endif 469ee4116b8SKonstantin Belousov #endif 47005f0fdd2SPoul-Henning Kamp nargs.flags = 0; 47105f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_ANON) 47205f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_ANON; 47305f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_COPY) 47405f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_COPY; 47505f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_SHARED) 47605f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_SHARED; 47705f0fdd2SPoul-Henning Kamp else 47805f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_PRIVATE; 47905f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_FIXED) 48005f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_FIXED; 48105f0fdd2SPoul-Henning Kamp nargs.fd = uap->fd; 48205f0fdd2SPoul-Henning Kamp nargs.pos = uap->pos; 4838451d0ddSKip Macy return (sys_mmap(td, &nargs)); 48405f0fdd2SPoul-Henning Kamp } 48505f0fdd2SPoul-Henning Kamp #endif /* COMPAT_43 */ 48605f0fdd2SPoul-Henning Kamp 48705f0fdd2SPoul-Henning Kamp 488d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 489df8bae1dSRodney W. Grimes struct msync_args { 490651bb817SAlexander Langer void *addr; 491c899450bSPeter Wemm size_t len; 492e6c6af11SDavid Greenman int flags; 493df8bae1dSRodney W. Grimes }; 494d2d3e875SBruce Evans #endif 495d2c60af8SMatthew Dillon /* 496d2c60af8SMatthew Dillon * MPSAFE 497d2c60af8SMatthew Dillon */ 498df8bae1dSRodney W. Grimes int 4998451d0ddSKip Macy sys_msync(td, uap) 500b40ce416SJulian Elischer struct thread *td; 501df8bae1dSRodney W. Grimes struct msync_args *uap; 502df8bae1dSRodney W. Grimes { 503df8bae1dSRodney W. Grimes vm_offset_t addr; 504dabee6feSPeter Wemm vm_size_t size, pageoff; 505e6c6af11SDavid Greenman int flags; 506df8bae1dSRodney W. Grimes vm_map_t map; 507df8bae1dSRodney W. Grimes int rv; 508df8bae1dSRodney W. Grimes 509df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 5109154ee6aSPeter Wemm size = uap->len; 511e6c6af11SDavid Greenman flags = uap->flags; 512e6c6af11SDavid Greenman 513dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 514dabee6feSPeter Wemm addr -= pageoff; 515dabee6feSPeter Wemm size += pageoff; 516dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5179154ee6aSPeter Wemm if (addr + size < addr) 518dabee6feSPeter Wemm return (EINVAL); 519dabee6feSPeter Wemm 520dabee6feSPeter Wemm if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 5211e62bc63SDavid Greenman return (EINVAL); 5221e62bc63SDavid Greenman 523b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 5249154ee6aSPeter Wemm 525df8bae1dSRodney W. Grimes /* 526df8bae1dSRodney W. Grimes * Clean the pages and interpret the return value. 527df8bae1dSRodney W. Grimes */ 528950f8459SAlan Cox rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 529e6c6af11SDavid Greenman (flags & MS_INVALIDATE) != 0); 530df8bae1dSRodney W. Grimes switch (rv) { 531df8bae1dSRodney W. Grimes case KERN_SUCCESS: 532d2c60af8SMatthew Dillon return (0); 533df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 534df8bae1dSRodney W. Grimes return (EINVAL); /* Sun returns ENOMEM? */ 535b7b7cd44SAlan Cox case KERN_INVALID_ARGUMENT: 536b7b7cd44SAlan Cox return (EBUSY); 537126d6082SKonstantin Belousov case KERN_FAILURE: 538126d6082SKonstantin Belousov return (EIO); 539df8bae1dSRodney W. Grimes default: 540df8bae1dSRodney W. Grimes return (EINVAL); 541df8bae1dSRodney W. Grimes } 542df8bae1dSRodney W. Grimes } 543df8bae1dSRodney W. Grimes 544d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 545df8bae1dSRodney W. Grimes struct munmap_args { 546651bb817SAlexander Langer void *addr; 5479154ee6aSPeter Wemm size_t len; 548df8bae1dSRodney W. Grimes }; 549d2d3e875SBruce Evans #endif 550d2c60af8SMatthew Dillon /* 551d2c60af8SMatthew Dillon * MPSAFE 552d2c60af8SMatthew Dillon */ 553df8bae1dSRodney W. Grimes int 5548451d0ddSKip Macy sys_munmap(td, uap) 555b40ce416SJulian Elischer struct thread *td; 55654d92145SMatthew Dillon struct munmap_args *uap; 557df8bae1dSRodney W. Grimes { 55849874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 55949874f6eSJoseph Koshy struct pmckern_map_out pkm; 56049874f6eSJoseph Koshy vm_map_entry_t entry; 56149874f6eSJoseph Koshy #endif 562df8bae1dSRodney W. Grimes vm_offset_t addr; 563dabee6feSPeter Wemm vm_size_t size, pageoff; 564df8bae1dSRodney W. Grimes vm_map_t map; 565df8bae1dSRodney W. Grimes 566df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 5679154ee6aSPeter Wemm size = uap->len; 568d8834602SAlan Cox if (size == 0) 569d8834602SAlan Cox return (EINVAL); 570dabee6feSPeter Wemm 571dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 572dabee6feSPeter Wemm addr -= pageoff; 573dabee6feSPeter Wemm size += pageoff; 574dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5759154ee6aSPeter Wemm if (addr + size < addr) 576df8bae1dSRodney W. Grimes return (EINVAL); 5779154ee6aSPeter Wemm 578df8bae1dSRodney W. Grimes /* 57905ba50f5SJake Burkholder * Check for illegal addresses. Watch out for address wrap... 580df8bae1dSRodney W. Grimes */ 581b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 58205ba50f5SJake Burkholder if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 58305ba50f5SJake Burkholder return (EINVAL); 584d8834602SAlan Cox vm_map_lock(map); 58549874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 58649874f6eSJoseph Koshy /* 58749874f6eSJoseph Koshy * Inform hwpmc if the address range being unmapped contains 58849874f6eSJoseph Koshy * an executable region. 58949874f6eSJoseph Koshy */ 5900d419640SRyan Stone pkm.pm_address = (uintptr_t) NULL; 59149874f6eSJoseph Koshy if (vm_map_lookup_entry(map, addr, &entry)) { 59249874f6eSJoseph Koshy for (; 59349874f6eSJoseph Koshy entry != &map->header && entry->start < addr + size; 59449874f6eSJoseph Koshy entry = entry->next) { 59549874f6eSJoseph Koshy if (vm_map_check_protection(map, entry->start, 59649874f6eSJoseph Koshy entry->end, VM_PROT_EXECUTE) == TRUE) { 59749874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 59849874f6eSJoseph Koshy pkm.pm_size = (size_t) size; 59949874f6eSJoseph Koshy break; 60049874f6eSJoseph Koshy } 60149874f6eSJoseph Koshy } 60249874f6eSJoseph Koshy } 60349874f6eSJoseph Koshy #endif 604655c3490SKonstantin Belousov vm_map_delete(map, addr, addr + size); 6050d419640SRyan Stone 6060d419640SRyan Stone #ifdef HWPMC_HOOKS 6070d419640SRyan Stone /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 6080d419640SRyan Stone vm_map_lock_downgrade(map); 609d473d3a1SRyan Stone if (pkm.pm_address != (uintptr_t) NULL) 6100d419640SRyan Stone PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 6110d419640SRyan Stone vm_map_unlock_read(map); 6120d419640SRyan Stone #else 613d8834602SAlan Cox vm_map_unlock(map); 6140d419640SRyan Stone #endif 6150d419640SRyan Stone /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 616df8bae1dSRodney W. Grimes return (0); 617df8bae1dSRodney W. Grimes } 618df8bae1dSRodney W. Grimes 619d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 620df8bae1dSRodney W. Grimes struct mprotect_args { 621651bb817SAlexander Langer const void *addr; 6229154ee6aSPeter Wemm size_t len; 623df8bae1dSRodney W. Grimes int prot; 624df8bae1dSRodney W. Grimes }; 625d2d3e875SBruce Evans #endif 626d2c60af8SMatthew Dillon /* 627d2c60af8SMatthew Dillon * MPSAFE 628d2c60af8SMatthew Dillon */ 629df8bae1dSRodney W. Grimes int 6308451d0ddSKip Macy sys_mprotect(td, uap) 631b40ce416SJulian Elischer struct thread *td; 632df8bae1dSRodney W. Grimes struct mprotect_args *uap; 633df8bae1dSRodney W. Grimes { 634df8bae1dSRodney W. Grimes vm_offset_t addr; 635dabee6feSPeter Wemm vm_size_t size, pageoff; 63654d92145SMatthew Dillon vm_prot_t prot; 637df8bae1dSRodney W. Grimes 638df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 6399154ee6aSPeter Wemm size = uap->len; 640df8bae1dSRodney W. Grimes prot = uap->prot & VM_PROT_ALL; 641df8bae1dSRodney W. Grimes 642dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 643dabee6feSPeter Wemm addr -= pageoff; 644dabee6feSPeter Wemm size += pageoff; 645dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6469154ee6aSPeter Wemm if (addr + size < addr) 647dabee6feSPeter Wemm return (EINVAL); 648dabee6feSPeter Wemm 64943285049SAlan Cox switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 65043285049SAlan Cox addr + size, prot, FALSE)) { 651df8bae1dSRodney W. Grimes case KERN_SUCCESS: 652df8bae1dSRodney W. Grimes return (0); 653df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 654df8bae1dSRodney W. Grimes return (EACCES); 6553364c323SKonstantin Belousov case KERN_RESOURCE_SHORTAGE: 6563364c323SKonstantin Belousov return (ENOMEM); 657df8bae1dSRodney W. Grimes } 658df8bae1dSRodney W. Grimes return (EINVAL); 659df8bae1dSRodney W. Grimes } 660df8bae1dSRodney W. Grimes 661d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 662dabee6feSPeter Wemm struct minherit_args { 663651bb817SAlexander Langer void *addr; 6649154ee6aSPeter Wemm size_t len; 665dabee6feSPeter Wemm int inherit; 666dabee6feSPeter Wemm }; 667dabee6feSPeter Wemm #endif 668d2c60af8SMatthew Dillon /* 669d2c60af8SMatthew Dillon * MPSAFE 670d2c60af8SMatthew Dillon */ 671dabee6feSPeter Wemm int 6728451d0ddSKip Macy sys_minherit(td, uap) 673b40ce416SJulian Elischer struct thread *td; 674dabee6feSPeter Wemm struct minherit_args *uap; 675dabee6feSPeter Wemm { 676dabee6feSPeter Wemm vm_offset_t addr; 677dabee6feSPeter Wemm vm_size_t size, pageoff; 67854d92145SMatthew Dillon vm_inherit_t inherit; 679dabee6feSPeter Wemm 680dabee6feSPeter Wemm addr = (vm_offset_t)uap->addr; 6819154ee6aSPeter Wemm size = uap->len; 682dabee6feSPeter Wemm inherit = uap->inherit; 683dabee6feSPeter Wemm 684dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 685dabee6feSPeter Wemm addr -= pageoff; 686dabee6feSPeter Wemm size += pageoff; 687dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6889154ee6aSPeter Wemm if (addr + size < addr) 689dabee6feSPeter Wemm return (EINVAL); 690dabee6feSPeter Wemm 691e0be79afSAlan Cox switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 692e0be79afSAlan Cox addr + size, inherit)) { 693dabee6feSPeter Wemm case KERN_SUCCESS: 694dabee6feSPeter Wemm return (0); 695dabee6feSPeter Wemm case KERN_PROTECTION_FAILURE: 696dabee6feSPeter Wemm return (EACCES); 697dabee6feSPeter Wemm } 698dabee6feSPeter Wemm return (EINVAL); 699dabee6feSPeter Wemm } 700dabee6feSPeter Wemm 701dabee6feSPeter Wemm #ifndef _SYS_SYSPROTO_H_ 702df8bae1dSRodney W. Grimes struct madvise_args { 703651bb817SAlexander Langer void *addr; 7049154ee6aSPeter Wemm size_t len; 705df8bae1dSRodney W. Grimes int behav; 706df8bae1dSRodney W. Grimes }; 707d2d3e875SBruce Evans #endif 7080d94caffSDavid Greenman 709d2c60af8SMatthew Dillon /* 710d2c60af8SMatthew Dillon * MPSAFE 711d2c60af8SMatthew Dillon */ 712df8bae1dSRodney W. Grimes int 7138451d0ddSKip Macy sys_madvise(td, uap) 714b40ce416SJulian Elischer struct thread *td; 715df8bae1dSRodney W. Grimes struct madvise_args *uap; 716df8bae1dSRodney W. Grimes { 717f35329acSJohn Dyson vm_offset_t start, end; 71805ba50f5SJake Burkholder vm_map_t map; 719f4cf2141SWes Peters struct proc *p; 720f4cf2141SWes Peters int error; 721b4309055SMatthew Dillon 722b4309055SMatthew Dillon /* 723f4cf2141SWes Peters * Check for our special case, advising the swap pager we are 724f4cf2141SWes Peters * "immortal." 725f4cf2141SWes Peters */ 726f4cf2141SWes Peters if (uap->behav == MADV_PROTECT) { 727acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MADV_PROTECT); 72869297bf8SJohn Baldwin if (error == 0) { 729f4cf2141SWes Peters p = td->td_proc; 730f4cf2141SWes Peters PROC_LOCK(p); 731f4cf2141SWes Peters p->p_flag |= P_PROTECTED; 732f4cf2141SWes Peters PROC_UNLOCK(p); 73369297bf8SJohn Baldwin } 734f4cf2141SWes Peters return (error); 735f4cf2141SWes Peters } 736f4cf2141SWes Peters /* 737b4309055SMatthew Dillon * Check for illegal behavior 738b4309055SMatthew Dillon */ 7399730a5daSPaul Saab if (uap->behav < 0 || uap->behav > MADV_CORE) 740b4309055SMatthew Dillon return (EINVAL); 741867a482dSJohn Dyson /* 742867a482dSJohn Dyson * Check for illegal addresses. Watch out for address wrap... Note 743867a482dSJohn Dyson * that VM_*_ADDRESS are not constants due to casts (argh). 744867a482dSJohn Dyson */ 74505ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 74605ba50f5SJake Burkholder if ((vm_offset_t)uap->addr < vm_map_min(map) || 74705ba50f5SJake Burkholder (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) 748867a482dSJohn Dyson return (EINVAL); 749867a482dSJohn Dyson if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 750867a482dSJohn Dyson return (EINVAL); 751867a482dSJohn Dyson 752867a482dSJohn Dyson /* 753867a482dSJohn Dyson * Since this routine is only advisory, we default to conservative 754867a482dSJohn Dyson * behavior. 755867a482dSJohn Dyson */ 756cd6eea25SDavid Greenman start = trunc_page((vm_offset_t) uap->addr); 757cd6eea25SDavid Greenman end = round_page((vm_offset_t) uap->addr + uap->len); 758867a482dSJohn Dyson 75905ba50f5SJake Burkholder if (vm_map_madvise(map, start, end, uap->behav)) 760094f6d26SAlan Cox return (EINVAL); 761094f6d26SAlan Cox return (0); 762df8bae1dSRodney W. Grimes } 763df8bae1dSRodney W. Grimes 764d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 765df8bae1dSRodney W. Grimes struct mincore_args { 766651bb817SAlexander Langer const void *addr; 7679154ee6aSPeter Wemm size_t len; 768df8bae1dSRodney W. Grimes char *vec; 769df8bae1dSRodney W. Grimes }; 770d2d3e875SBruce Evans #endif 7710d94caffSDavid Greenman 772d2c60af8SMatthew Dillon /* 773d2c60af8SMatthew Dillon * MPSAFE 774d2c60af8SMatthew Dillon */ 775df8bae1dSRodney W. Grimes int 7768451d0ddSKip Macy sys_mincore(td, uap) 777b40ce416SJulian Elischer struct thread *td; 778df8bae1dSRodney W. Grimes struct mincore_args *uap; 779df8bae1dSRodney W. Grimes { 780867a482dSJohn Dyson vm_offset_t addr, first_addr; 781867a482dSJohn Dyson vm_offset_t end, cend; 782867a482dSJohn Dyson pmap_t pmap; 783867a482dSJohn Dyson vm_map_t map; 78402c04a2fSJohn Dyson char *vec; 785d2c60af8SMatthew Dillon int error = 0; 786867a482dSJohn Dyson int vecindex, lastvecindex; 78754d92145SMatthew Dillon vm_map_entry_t current; 788867a482dSJohn Dyson vm_map_entry_t entry; 789567e51e1SAlan Cox vm_object_t object; 790567e51e1SAlan Cox vm_paddr_t locked_pa; 791567e51e1SAlan Cox vm_page_t m; 792567e51e1SAlan Cox vm_pindex_t pindex; 793867a482dSJohn Dyson int mincoreinfo; 794dd2622a8SAlan Cox unsigned int timestamp; 795567e51e1SAlan Cox boolean_t locked; 796df8bae1dSRodney W. Grimes 797867a482dSJohn Dyson /* 798867a482dSJohn Dyson * Make sure that the addresses presented are valid for user 799867a482dSJohn Dyson * mode. 800867a482dSJohn Dyson */ 801867a482dSJohn Dyson first_addr = addr = trunc_page((vm_offset_t) uap->addr); 8029154ee6aSPeter Wemm end = addr + (vm_size_t)round_page(uap->len); 80305ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 80405ba50f5SJake Burkholder if (end > vm_map_max(map) || end < addr) 805455dd7d4SKonstantin Belousov return (ENOMEM); 80602c04a2fSJohn Dyson 807867a482dSJohn Dyson /* 808867a482dSJohn Dyson * Address of byte vector 809867a482dSJohn Dyson */ 81002c04a2fSJohn Dyson vec = uap->vec; 811867a482dSJohn Dyson 812b40ce416SJulian Elischer pmap = vmspace_pmap(td->td_proc->p_vmspace); 813867a482dSJohn Dyson 814eff50fcdSAlan Cox vm_map_lock_read(map); 815dd2622a8SAlan Cox RestartScan: 816dd2622a8SAlan Cox timestamp = map->timestamp; 817867a482dSJohn Dyson 818455dd7d4SKonstantin Belousov if (!vm_map_lookup_entry(map, addr, &entry)) { 819455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 820455dd7d4SKonstantin Belousov return (ENOMEM); 821455dd7d4SKonstantin Belousov } 822867a482dSJohn Dyson 823867a482dSJohn Dyson /* 824867a482dSJohn Dyson * Do this on a map entry basis so that if the pages are not 825867a482dSJohn Dyson * in the current processes address space, we can easily look 826867a482dSJohn Dyson * up the pages elsewhere. 827867a482dSJohn Dyson */ 828867a482dSJohn Dyson lastvecindex = -1; 829867a482dSJohn Dyson for (current = entry; 830867a482dSJohn Dyson (current != &map->header) && (current->start < end); 831867a482dSJohn Dyson current = current->next) { 832867a482dSJohn Dyson 833867a482dSJohn Dyson /* 834455dd7d4SKonstantin Belousov * check for contiguity 835455dd7d4SKonstantin Belousov */ 836455dd7d4SKonstantin Belousov if (current->end < end && 837455dd7d4SKonstantin Belousov (entry->next == &map->header || 838455dd7d4SKonstantin Belousov current->next->start > current->end)) { 839455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 840455dd7d4SKonstantin Belousov return (ENOMEM); 841455dd7d4SKonstantin Belousov } 842455dd7d4SKonstantin Belousov 843455dd7d4SKonstantin Belousov /* 844867a482dSJohn Dyson * ignore submaps (for now) or null objects 845867a482dSJohn Dyson */ 8469fdfe602SMatthew Dillon if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 847867a482dSJohn Dyson current->object.vm_object == NULL) 848867a482dSJohn Dyson continue; 849867a482dSJohn Dyson 850867a482dSJohn Dyson /* 851867a482dSJohn Dyson * limit this scan to the current map entry and the 852867a482dSJohn Dyson * limits for the mincore call 853867a482dSJohn Dyson */ 854867a482dSJohn Dyson if (addr < current->start) 855867a482dSJohn Dyson addr = current->start; 856867a482dSJohn Dyson cend = current->end; 857867a482dSJohn Dyson if (cend > end) 858867a482dSJohn Dyson cend = end; 859867a482dSJohn Dyson 860867a482dSJohn Dyson /* 861867a482dSJohn Dyson * scan this entry one page at a time 862867a482dSJohn Dyson */ 863867a482dSJohn Dyson while (addr < cend) { 864867a482dSJohn Dyson /* 865867a482dSJohn Dyson * Check pmap first, it is likely faster, also 866867a482dSJohn Dyson * it can provide info as to whether we are the 867867a482dSJohn Dyson * one referencing or modifying the page. 868867a482dSJohn Dyson */ 869567e51e1SAlan Cox object = NULL; 870567e51e1SAlan Cox locked_pa = 0; 871567e51e1SAlan Cox retry: 872567e51e1SAlan Cox m = NULL; 873567e51e1SAlan Cox mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 874567e51e1SAlan Cox if (locked_pa != 0) { 875867a482dSJohn Dyson /* 876567e51e1SAlan Cox * The page is mapped by this process but not 877567e51e1SAlan Cox * both accessed and modified. It is also 878567e51e1SAlan Cox * managed. Acquire the object lock so that 879567e51e1SAlan Cox * other mappings might be examined. 880867a482dSJohn Dyson */ 881567e51e1SAlan Cox m = PHYS_TO_VM_PAGE(locked_pa); 882567e51e1SAlan Cox if (m->object != object) { 883567e51e1SAlan Cox if (object != NULL) 88489f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 885567e51e1SAlan Cox object = m->object; 88689f6b863SAttilio Rao locked = VM_OBJECT_TRYWLOCK(object); 887567e51e1SAlan Cox vm_page_unlock(m); 888567e51e1SAlan Cox if (!locked) { 88989f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 8902965a453SKip Macy vm_page_lock(m); 891567e51e1SAlan Cox goto retry; 892567e51e1SAlan Cox } 893567e51e1SAlan Cox } else 894567e51e1SAlan Cox vm_page_unlock(m); 895567e51e1SAlan Cox KASSERT(m->valid == VM_PAGE_BITS_ALL, 896567e51e1SAlan Cox ("mincore: page %p is mapped but invalid", 897567e51e1SAlan Cox m)); 898567e51e1SAlan Cox } else if (mincoreinfo == 0) { 899567e51e1SAlan Cox /* 900567e51e1SAlan Cox * The page is not mapped by this process. If 901567e51e1SAlan Cox * the object implements managed pages, then 902567e51e1SAlan Cox * determine if the page is resident so that 903567e51e1SAlan Cox * the mappings might be examined. 904567e51e1SAlan Cox */ 905567e51e1SAlan Cox if (current->object.vm_object != object) { 906567e51e1SAlan Cox if (object != NULL) 90789f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 908567e51e1SAlan Cox object = current->object.vm_object; 90989f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 910567e51e1SAlan Cox } 911567e51e1SAlan Cox if (object->type == OBJT_DEFAULT || 912567e51e1SAlan Cox object->type == OBJT_SWAP || 913567e51e1SAlan Cox object->type == OBJT_VNODE) { 914567e51e1SAlan Cox pindex = OFF_TO_IDX(current->offset + 915567e51e1SAlan Cox (addr - current->start)); 916567e51e1SAlan Cox m = vm_page_lookup(object, pindex); 9171c8279e4SAlan Cox if (m == NULL && 9181c8279e4SAlan Cox vm_page_is_cached(object, pindex)) 9191c8279e4SAlan Cox mincoreinfo = MINCORE_INCORE; 920567e51e1SAlan Cox if (m != NULL && m->valid == 0) 921567e51e1SAlan Cox m = NULL; 922567e51e1SAlan Cox if (m != NULL) 923567e51e1SAlan Cox mincoreinfo = MINCORE_INCORE; 924567e51e1SAlan Cox } 925567e51e1SAlan Cox } 926567e51e1SAlan Cox if (m != NULL) { 927567e51e1SAlan Cox /* Examine other mappings to the page. */ 928567e51e1SAlan Cox if (m->dirty == 0 && pmap_is_modified(m)) 929567e51e1SAlan Cox vm_page_dirty(m); 930567e51e1SAlan Cox if (m->dirty != 0) 931867a482dSJohn Dyson mincoreinfo |= MINCORE_MODIFIED_OTHER; 932c46b90e9SAlan Cox /* 9333407fefeSKonstantin Belousov * The first test for PGA_REFERENCED is an 934c46b90e9SAlan Cox * optimization. The second test is 935c46b90e9SAlan Cox * required because a concurrent pmap 936c46b90e9SAlan Cox * operation could clear the last reference 9373407fefeSKonstantin Belousov * and set PGA_REFERENCED before the call to 938c46b90e9SAlan Cox * pmap_is_referenced(). 939c46b90e9SAlan Cox */ 9403407fefeSKonstantin Belousov if ((m->aflags & PGA_REFERENCED) != 0 || 941c46b90e9SAlan Cox pmap_is_referenced(m) || 9423407fefeSKonstantin Belousov (m->aflags & PGA_REFERENCED) != 0) 943867a482dSJohn Dyson mincoreinfo |= MINCORE_REFERENCED_OTHER; 9449b5a5d81SJohn Dyson } 945567e51e1SAlan Cox if (object != NULL) 94689f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 947867a482dSJohn Dyson 948867a482dSJohn Dyson /* 949dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 950dd2622a8SAlan Cox * the map, we release the lock. 951dd2622a8SAlan Cox */ 952dd2622a8SAlan Cox vm_map_unlock_read(map); 953dd2622a8SAlan Cox 954dd2622a8SAlan Cox /* 955867a482dSJohn Dyson * calculate index into user supplied byte vector 956867a482dSJohn Dyson */ 957867a482dSJohn Dyson vecindex = OFF_TO_IDX(addr - first_addr); 958867a482dSJohn Dyson 959867a482dSJohn Dyson /* 960867a482dSJohn Dyson * If we have skipped map entries, we need to make sure that 961867a482dSJohn Dyson * the byte vector is zeroed for those skipped entries. 962867a482dSJohn Dyson */ 963867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 964867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 965867a482dSJohn Dyson if (error) { 966d2c60af8SMatthew Dillon error = EFAULT; 967d2c60af8SMatthew Dillon goto done2; 968867a482dSJohn Dyson } 969867a482dSJohn Dyson ++lastvecindex; 970867a482dSJohn Dyson } 971867a482dSJohn Dyson 972867a482dSJohn Dyson /* 973867a482dSJohn Dyson * Pass the page information to the user 974867a482dSJohn Dyson */ 975867a482dSJohn Dyson error = subyte(vec + vecindex, mincoreinfo); 976867a482dSJohn Dyson if (error) { 977d2c60af8SMatthew Dillon error = EFAULT; 978d2c60af8SMatthew Dillon goto done2; 979867a482dSJohn Dyson } 980dd2622a8SAlan Cox 981dd2622a8SAlan Cox /* 982dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 983dd2622a8SAlan Cox * output may be invalid. 984dd2622a8SAlan Cox */ 985dd2622a8SAlan Cox vm_map_lock_read(map); 986dd2622a8SAlan Cox if (timestamp != map->timestamp) 987dd2622a8SAlan Cox goto RestartScan; 988dd2622a8SAlan Cox 989867a482dSJohn Dyson lastvecindex = vecindex; 99002c04a2fSJohn Dyson addr += PAGE_SIZE; 99102c04a2fSJohn Dyson } 992867a482dSJohn Dyson } 993867a482dSJohn Dyson 994867a482dSJohn Dyson /* 995dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 996dd2622a8SAlan Cox * the map, we release the lock. 997dd2622a8SAlan Cox */ 998dd2622a8SAlan Cox vm_map_unlock_read(map); 999dd2622a8SAlan Cox 1000dd2622a8SAlan Cox /* 1001867a482dSJohn Dyson * Zero the last entries in the byte vector. 1002867a482dSJohn Dyson */ 1003867a482dSJohn Dyson vecindex = OFF_TO_IDX(end - first_addr); 1004867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 1005867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 1006867a482dSJohn Dyson if (error) { 1007d2c60af8SMatthew Dillon error = EFAULT; 1008d2c60af8SMatthew Dillon goto done2; 1009867a482dSJohn Dyson } 1010867a482dSJohn Dyson ++lastvecindex; 1011867a482dSJohn Dyson } 1012867a482dSJohn Dyson 1013dd2622a8SAlan Cox /* 1014dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 1015dd2622a8SAlan Cox * output may be invalid. 1016dd2622a8SAlan Cox */ 1017dd2622a8SAlan Cox vm_map_lock_read(map); 1018dd2622a8SAlan Cox if (timestamp != map->timestamp) 1019dd2622a8SAlan Cox goto RestartScan; 1020eff50fcdSAlan Cox vm_map_unlock_read(map); 1021d2c60af8SMatthew Dillon done2: 1022d2c60af8SMatthew Dillon return (error); 1023df8bae1dSRodney W. Grimes } 1024df8bae1dSRodney W. Grimes 1025d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 1026df8bae1dSRodney W. Grimes struct mlock_args { 1027651bb817SAlexander Langer const void *addr; 1028df8bae1dSRodney W. Grimes size_t len; 1029df8bae1dSRodney W. Grimes }; 1030d2d3e875SBruce Evans #endif 1031d2c60af8SMatthew Dillon /* 1032d2c60af8SMatthew Dillon * MPSAFE 1033d2c60af8SMatthew Dillon */ 1034df8bae1dSRodney W. Grimes int 10358451d0ddSKip Macy sys_mlock(td, uap) 1036b40ce416SJulian Elischer struct thread *td; 1037df8bae1dSRodney W. Grimes struct mlock_args *uap; 1038df8bae1dSRodney W. Grimes { 1039f0ea4612SDon Lewis struct proc *proc; 1040bb734798SDon Lewis vm_offset_t addr, end, last, start; 1041bb734798SDon Lewis vm_size_t npages, size; 10423ac7d297SAndrey Zonov vm_map_t map; 10431ba5ad42SEdward Tomasz Napierala unsigned long nsize; 1044bb734798SDon Lewis int error; 1045df8bae1dSRodney W. Grimes 1046acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MLOCK); 104747934cefSDon Lewis if (error) 104847934cefSDon Lewis return (error); 104916929939SDon Lewis addr = (vm_offset_t)uap->addr; 105016929939SDon Lewis size = uap->len; 1051bb734798SDon Lewis last = addr + size; 105216929939SDon Lewis start = trunc_page(addr); 1053bb734798SDon Lewis end = round_page(last); 1054bb734798SDon Lewis if (last < addr || end < addr) 1055df8bae1dSRodney W. Grimes return (EINVAL); 105616929939SDon Lewis npages = atop(end - start); 105716929939SDon Lewis if (npages > vm_page_max_wired) 105816929939SDon Lewis return (ENOMEM); 1059f0ea4612SDon Lewis proc = td->td_proc; 10603ac7d297SAndrey Zonov map = &proc->p_vmspace->vm_map; 106147934cefSDon Lewis PROC_LOCK(proc); 10623ac7d297SAndrey Zonov nsize = ptoa(npages + pmap_wired_count(map->pmap)); 10631ba5ad42SEdward Tomasz Napierala if (nsize > lim_cur(proc, RLIMIT_MEMLOCK)) { 106447934cefSDon Lewis PROC_UNLOCK(proc); 10654a40e3d4SJohn Dyson return (ENOMEM); 106691d5354aSJohn Baldwin } 106747934cefSDon Lewis PROC_UNLOCK(proc); 10682feb50bfSAttilio Rao if (npages + cnt.v_wire_count > vm_page_max_wired) 106916929939SDon Lewis return (EAGAIN); 1070afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10711ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 10721ba5ad42SEdward Tomasz Napierala error = racct_set(proc, RACCT_MEMLOCK, nsize); 10731ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 10741ba5ad42SEdward Tomasz Napierala if (error != 0) 10751ba5ad42SEdward Tomasz Napierala return (ENOMEM); 1076afcc55f3SEdward Tomasz Napierala #endif 10773ac7d297SAndrey Zonov error = vm_map_wire(map, start, end, 107816929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1079afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10801ba5ad42SEdward Tomasz Napierala if (error != KERN_SUCCESS) { 10811ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 10821ba5ad42SEdward Tomasz Napierala racct_set(proc, RACCT_MEMLOCK, 10833ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 10841ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 10851ba5ad42SEdward Tomasz Napierala } 1086afcc55f3SEdward Tomasz Napierala #endif 1087df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1088df8bae1dSRodney W. Grimes } 1089df8bae1dSRodney W. Grimes 1090d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 10914a40e3d4SJohn Dyson struct mlockall_args { 10924a40e3d4SJohn Dyson int how; 10934a40e3d4SJohn Dyson }; 10944a40e3d4SJohn Dyson #endif 10954a40e3d4SJohn Dyson 1096d2c60af8SMatthew Dillon /* 1097d2c60af8SMatthew Dillon * MPSAFE 1098d2c60af8SMatthew Dillon */ 10994a40e3d4SJohn Dyson int 11008451d0ddSKip Macy sys_mlockall(td, uap) 1101b40ce416SJulian Elischer struct thread *td; 11024a40e3d4SJohn Dyson struct mlockall_args *uap; 11034a40e3d4SJohn Dyson { 1104abd498aaSBruce M Simpson vm_map_t map; 1105abd498aaSBruce M Simpson int error; 1106abd498aaSBruce M Simpson 1107abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 11087e19eda4SAndrey Zonov error = priv_check(td, PRIV_VM_MLOCK); 11097e19eda4SAndrey Zonov if (error) 11107e19eda4SAndrey Zonov return (error); 1111abd498aaSBruce M Simpson 1112abd498aaSBruce M Simpson if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1113abd498aaSBruce M Simpson return (EINVAL); 1114abd498aaSBruce M Simpson 1115abd498aaSBruce M Simpson /* 1116abd498aaSBruce M Simpson * If wiring all pages in the process would cause it to exceed 1117abd498aaSBruce M Simpson * a hard resource limit, return ENOMEM. 1118abd498aaSBruce M Simpson */ 11197e19eda4SAndrey Zonov if (!old_mlock && uap->how & MCL_CURRENT) { 112091d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 1121fd6f4ffbSEdward Tomasz Napierala if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { 112291d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1123abd498aaSBruce M Simpson return (ENOMEM); 112491d5354aSJohn Baldwin } 112591d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 11267e19eda4SAndrey Zonov } 1127afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11281ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11291ba5ad42SEdward Tomasz Napierala error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 11301ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11311ba5ad42SEdward Tomasz Napierala if (error != 0) 11321ba5ad42SEdward Tomasz Napierala return (ENOMEM); 1133afcc55f3SEdward Tomasz Napierala #endif 1134abd498aaSBruce M Simpson 1135abd498aaSBruce M Simpson if (uap->how & MCL_FUTURE) { 1136abd498aaSBruce M Simpson vm_map_lock(map); 1137abd498aaSBruce M Simpson vm_map_modflags(map, MAP_WIREFUTURE, 0); 1138abd498aaSBruce M Simpson vm_map_unlock(map); 1139abd498aaSBruce M Simpson error = 0; 1140abd498aaSBruce M Simpson } 1141abd498aaSBruce M Simpson 1142abd498aaSBruce M Simpson if (uap->how & MCL_CURRENT) { 1143abd498aaSBruce M Simpson /* 1144abd498aaSBruce M Simpson * P1003.1-2001 mandates that all currently mapped pages 1145abd498aaSBruce M Simpson * will be memory resident and locked (wired) upon return 1146abd498aaSBruce M Simpson * from mlockall(). vm_map_wire() will wire pages, by 1147abd498aaSBruce M Simpson * calling vm_fault_wire() for each page in the region. 1148abd498aaSBruce M Simpson */ 1149abd498aaSBruce M Simpson error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1150abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1151abd498aaSBruce M Simpson error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1152abd498aaSBruce M Simpson } 1153afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11541ba5ad42SEdward Tomasz Napierala if (error != KERN_SUCCESS) { 11551ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11561ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 11573ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 11581ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11591ba5ad42SEdward Tomasz Napierala } 1160afcc55f3SEdward Tomasz Napierala #endif 1161abd498aaSBruce M Simpson 1162abd498aaSBruce M Simpson return (error); 11634a40e3d4SJohn Dyson } 11644a40e3d4SJohn Dyson 11654a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1166fa721254SAlfred Perlstein struct munlockall_args { 1167abd498aaSBruce M Simpson register_t dummy; 11684a40e3d4SJohn Dyson }; 11694a40e3d4SJohn Dyson #endif 11704a40e3d4SJohn Dyson 1171d2c60af8SMatthew Dillon /* 1172d2c60af8SMatthew Dillon * MPSAFE 1173d2c60af8SMatthew Dillon */ 11744a40e3d4SJohn Dyson int 11758451d0ddSKip Macy sys_munlockall(td, uap) 1176b40ce416SJulian Elischer struct thread *td; 11774a40e3d4SJohn Dyson struct munlockall_args *uap; 11784a40e3d4SJohn Dyson { 1179abd498aaSBruce M Simpson vm_map_t map; 1180abd498aaSBruce M Simpson int error; 1181abd498aaSBruce M Simpson 1182abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 1183acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 1184abd498aaSBruce M Simpson if (error) 1185abd498aaSBruce M Simpson return (error); 1186abd498aaSBruce M Simpson 1187abd498aaSBruce M Simpson /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1188abd498aaSBruce M Simpson vm_map_lock(map); 1189abd498aaSBruce M Simpson vm_map_modflags(map, 0, MAP_WIREFUTURE); 1190abd498aaSBruce M Simpson vm_map_unlock(map); 1191abd498aaSBruce M Simpson 1192abd498aaSBruce M Simpson /* Forcibly unwire all pages. */ 1193abd498aaSBruce M Simpson error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1194abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1195afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11961ba5ad42SEdward Tomasz Napierala if (error == KERN_SUCCESS) { 11971ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11981ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 0); 11991ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 12001ba5ad42SEdward Tomasz Napierala } 1201afcc55f3SEdward Tomasz Napierala #endif 1202abd498aaSBruce M Simpson 1203abd498aaSBruce M Simpson return (error); 12044a40e3d4SJohn Dyson } 12054a40e3d4SJohn Dyson 12064a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1207df8bae1dSRodney W. Grimes struct munlock_args { 1208651bb817SAlexander Langer const void *addr; 1209df8bae1dSRodney W. Grimes size_t len; 1210df8bae1dSRodney W. Grimes }; 1211d2d3e875SBruce Evans #endif 1212d2c60af8SMatthew Dillon /* 1213d2c60af8SMatthew Dillon * MPSAFE 1214d2c60af8SMatthew Dillon */ 1215df8bae1dSRodney W. Grimes int 12168451d0ddSKip Macy sys_munlock(td, uap) 1217b40ce416SJulian Elischer struct thread *td; 1218df8bae1dSRodney W. Grimes struct munlock_args *uap; 1219df8bae1dSRodney W. Grimes { 1220bb734798SDon Lewis vm_offset_t addr, end, last, start; 122116929939SDon Lewis vm_size_t size; 1222df8bae1dSRodney W. Grimes int error; 1223df8bae1dSRodney W. Grimes 1224acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 122547934cefSDon Lewis if (error) 122647934cefSDon Lewis return (error); 122716929939SDon Lewis addr = (vm_offset_t)uap->addr; 122816929939SDon Lewis size = uap->len; 1229bb734798SDon Lewis last = addr + size; 123016929939SDon Lewis start = trunc_page(addr); 1231bb734798SDon Lewis end = round_page(last); 1232bb734798SDon Lewis if (last < addr || end < addr) 1233df8bae1dSRodney W. Grimes return (EINVAL); 123416929939SDon Lewis error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 123516929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1236afcc55f3SEdward Tomasz Napierala #ifdef RACCT 12371ba5ad42SEdward Tomasz Napierala if (error == KERN_SUCCESS) { 12381ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 12391ba5ad42SEdward Tomasz Napierala racct_sub(td->td_proc, RACCT_MEMLOCK, ptoa(end - start)); 12401ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 12411ba5ad42SEdward Tomasz Napierala } 1242afcc55f3SEdward Tomasz Napierala #endif 1243df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1244df8bae1dSRodney W. Grimes } 1245df8bae1dSRodney W. Grimes 1246df8bae1dSRodney W. Grimes /* 1247c8daea13SAlexander Kabaev * vm_mmap_vnode() 1248c8daea13SAlexander Kabaev * 1249c8daea13SAlexander Kabaev * Helper function for vm_mmap. Perform sanity check specific for mmap 1250c8daea13SAlexander Kabaev * operations on vnodes. 125184110e7eSKonstantin Belousov * 125284110e7eSKonstantin Belousov * For VCHR vnodes, the vnode lock is held over the call to 125384110e7eSKonstantin Belousov * vm_mmap_cdev() to keep vp->v_rdev valid. 1254c8daea13SAlexander Kabaev */ 1255c8daea13SAlexander Kabaev int 1256c8daea13SAlexander Kabaev vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1257c8daea13SAlexander Kabaev vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 125884110e7eSKonstantin Belousov struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 125984110e7eSKonstantin Belousov boolean_t *writecounted) 1260c8daea13SAlexander Kabaev { 1261c8daea13SAlexander Kabaev struct vattr va; 1262c8daea13SAlexander Kabaev vm_object_t obj; 126364345f0bSJohn Baldwin vm_offset_t foff; 1264ae51ff11SJeff Roberson struct mount *mp; 12650359a12eSAttilio Rao struct ucred *cred; 12665050aa86SKonstantin Belousov int error, flags, locktype; 1267c8daea13SAlexander Kabaev 1268ae51ff11SJeff Roberson mp = vp->v_mount; 12690359a12eSAttilio Rao cred = td->td_ucred; 127084110e7eSKonstantin Belousov if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) 127184110e7eSKonstantin Belousov locktype = LK_EXCLUSIVE; 127284110e7eSKonstantin Belousov else 127384110e7eSKonstantin Belousov locktype = LK_SHARED; 12745050aa86SKonstantin Belousov if ((error = vget(vp, locktype, td)) != 0) 1275c8daea13SAlexander Kabaev return (error); 127664345f0bSJohn Baldwin foff = *foffp; 1277c8daea13SAlexander Kabaev flags = *flagsp; 12788516dd18SPoul-Henning Kamp obj = vp->v_object; 1279c8daea13SAlexander Kabaev if (vp->v_type == VREG) { 1280c8daea13SAlexander Kabaev /* 1281c8daea13SAlexander Kabaev * Get the proper underlying object 1282c8daea13SAlexander Kabaev */ 12838516dd18SPoul-Henning Kamp if (obj == NULL) { 1284c8daea13SAlexander Kabaev error = EINVAL; 1285c8daea13SAlexander Kabaev goto done; 1286c8daea13SAlexander Kabaev } 1287*e5f299ffSKonstantin Belousov if (obj->type == OBJT_VNODE && obj->handle != vp) { 1288c8daea13SAlexander Kabaev vput(vp); 1289c8daea13SAlexander Kabaev vp = (struct vnode *)obj->handle; 129084110e7eSKonstantin Belousov /* 129184110e7eSKonstantin Belousov * Bypass filesystems obey the mpsafety of the 129284110e7eSKonstantin Belousov * underlying fs. 129384110e7eSKonstantin Belousov */ 129484110e7eSKonstantin Belousov error = vget(vp, locktype, td); 12955050aa86SKonstantin Belousov if (error != 0) 129684110e7eSKonstantin Belousov return (error); 129784110e7eSKonstantin Belousov } 129884110e7eSKonstantin Belousov if (locktype == LK_EXCLUSIVE) { 129984110e7eSKonstantin Belousov *writecounted = TRUE; 130084110e7eSKonstantin Belousov vnode_pager_update_writecount(obj, 0, objsize); 130184110e7eSKonstantin Belousov } 1302c8daea13SAlexander Kabaev } else if (vp->v_type == VCHR) { 130364345f0bSJohn Baldwin error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp, 130464345f0bSJohn Baldwin vp->v_rdev, foffp, objp); 130564345f0bSJohn Baldwin if (error == 0) 130664345f0bSJohn Baldwin goto mark_atime; 130791a35e78SKonstantin Belousov goto done; 1308c8daea13SAlexander Kabaev } else { 1309c8daea13SAlexander Kabaev error = EINVAL; 1310c8daea13SAlexander Kabaev goto done; 1311c8daea13SAlexander Kabaev } 13120359a12eSAttilio Rao if ((error = VOP_GETATTR(vp, &va, cred))) 1313c8daea13SAlexander Kabaev goto done; 1314c92163dcSChristian S.J. Peron #ifdef MAC 13150359a12eSAttilio Rao error = mac_vnode_check_mmap(cred, vp, prot, flags); 1316c92163dcSChristian S.J. Peron if (error != 0) 1317c92163dcSChristian S.J. Peron goto done; 1318c92163dcSChristian S.J. Peron #endif 1319c8daea13SAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 1320c8daea13SAlexander Kabaev if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1321c8daea13SAlexander Kabaev if (prot & PROT_WRITE) { 1322c8daea13SAlexander Kabaev error = EPERM; 1323c8daea13SAlexander Kabaev goto done; 1324c8daea13SAlexander Kabaev } 1325c8daea13SAlexander Kabaev *maxprotp &= ~VM_PROT_WRITE; 1326c8daea13SAlexander Kabaev } 1327c8daea13SAlexander Kabaev } 1328c8daea13SAlexander Kabaev /* 1329c8daea13SAlexander Kabaev * If it is a regular file without any references 1330c8daea13SAlexander Kabaev * we do not need to sync it. 1331c8daea13SAlexander Kabaev * Adjust object size to be the size of actual file. 1332c8daea13SAlexander Kabaev */ 1333c8daea13SAlexander Kabaev objsize = round_page(va.va_size); 1334c8daea13SAlexander Kabaev if (va.va_nlink == 0) 1335c8daea13SAlexander Kabaev flags |= MAP_NOSYNC; 1336*e5f299ffSKonstantin Belousov if (obj->type == OBJT_VNODE) 1337*e5f299ffSKonstantin Belousov obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, 1338*e5f299ffSKonstantin Belousov cred); 1339*e5f299ffSKonstantin Belousov else { 1340*e5f299ffSKonstantin Belousov KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP, 1341*e5f299ffSKonstantin Belousov ("wrong object type")); 1342*e5f299ffSKonstantin Belousov vm_object_reference(obj); 1343*e5f299ffSKonstantin Belousov } 1344c8daea13SAlexander Kabaev if (obj == NULL) { 134564345f0bSJohn Baldwin error = ENOMEM; 1346c8daea13SAlexander Kabaev goto done; 1347c8daea13SAlexander Kabaev } 1348c8daea13SAlexander Kabaev *objp = obj; 1349c8daea13SAlexander Kabaev *flagsp = flags; 135064345f0bSJohn Baldwin 135164345f0bSJohn Baldwin mark_atime: 13520359a12eSAttilio Rao vfs_mark_atime(vp, cred); 13531e309003SDiomidis Spinellis 1354c8daea13SAlexander Kabaev done: 1355bafa6cfcSKonstantin Belousov if (error != 0 && *writecounted) { 1356bafa6cfcSKonstantin Belousov *writecounted = FALSE; 1357bafa6cfcSKonstantin Belousov vnode_pager_update_writecount(obj, objsize, 0); 1358bafa6cfcSKonstantin Belousov } 1359c8daea13SAlexander Kabaev vput(vp); 1360c8daea13SAlexander Kabaev return (error); 1361c8daea13SAlexander Kabaev } 1362c8daea13SAlexander Kabaev 1363c8daea13SAlexander Kabaev /* 136498df9218SJohn Baldwin * vm_mmap_cdev() 136598df9218SJohn Baldwin * 136698df9218SJohn Baldwin * MPSAFE 136798df9218SJohn Baldwin * 136898df9218SJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 136998df9218SJohn Baldwin * operations on cdevs. 137098df9218SJohn Baldwin */ 137198df9218SJohn Baldwin int 137298df9218SJohn Baldwin vm_mmap_cdev(struct thread *td, vm_size_t objsize, 137398df9218SJohn Baldwin vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 137464345f0bSJohn Baldwin struct cdev *cdev, vm_ooffset_t *foff, vm_object_t *objp) 137598df9218SJohn Baldwin { 137698df9218SJohn Baldwin vm_object_t obj; 137791a35e78SKonstantin Belousov struct cdevsw *dsw; 13783979450bSKonstantin Belousov int error, flags, ref; 137998df9218SJohn Baldwin 138098df9218SJohn Baldwin flags = *flagsp; 138198df9218SJohn Baldwin 13823979450bSKonstantin Belousov dsw = dev_refthread(cdev, &ref); 138391a35e78SKonstantin Belousov if (dsw == NULL) 138491a35e78SKonstantin Belousov return (ENXIO); 138591a35e78SKonstantin Belousov if (dsw->d_flags & D_MMAP_ANON) { 13863979450bSKonstantin Belousov dev_relthread(cdev, ref); 138798df9218SJohn Baldwin *maxprotp = VM_PROT_ALL; 138898df9218SJohn Baldwin *flagsp |= MAP_ANON; 138998df9218SJohn Baldwin return (0); 139098df9218SJohn Baldwin } 139198df9218SJohn Baldwin /* 139264345f0bSJohn Baldwin * cdevs do not provide private mappings of any kind. 139398df9218SJohn Baldwin */ 139498df9218SJohn Baldwin if ((*maxprotp & VM_PROT_WRITE) == 0 && 139564345f0bSJohn Baldwin (prot & PROT_WRITE) != 0) { 13963979450bSKonstantin Belousov dev_relthread(cdev, ref); 139798df9218SJohn Baldwin return (EACCES); 139864345f0bSJohn Baldwin } 139964345f0bSJohn Baldwin if (flags & (MAP_PRIVATE|MAP_COPY)) { 14003979450bSKonstantin Belousov dev_relthread(cdev, ref); 140198df9218SJohn Baldwin return (EINVAL); 140264345f0bSJohn Baldwin } 140398df9218SJohn Baldwin /* 140498df9218SJohn Baldwin * Force device mappings to be shared. 140598df9218SJohn Baldwin */ 140698df9218SJohn Baldwin flags |= MAP_SHARED; 140798df9218SJohn Baldwin #ifdef MAC_XXX 140864345f0bSJohn Baldwin error = mac_cdev_check_mmap(td->td_ucred, cdev, prot); 140964345f0bSJohn Baldwin if (error != 0) { 14103979450bSKonstantin Belousov dev_relthread(cdev, ref); 141198df9218SJohn Baldwin return (error); 141264345f0bSJohn Baldwin } 141398df9218SJohn Baldwin #endif 141464345f0bSJohn Baldwin /* 141564345f0bSJohn Baldwin * First, try d_mmap_single(). If that is not implemented 141664345f0bSJohn Baldwin * (returns ENODEV), fall back to using the device pager. 141764345f0bSJohn Baldwin * Note that d_mmap_single() must return a reference to the 141864345f0bSJohn Baldwin * object (it needs to bump the reference count of the object 141964345f0bSJohn Baldwin * it returns somehow). 142064345f0bSJohn Baldwin * 142164345f0bSJohn Baldwin * XXX assumes VM_PROT_* == PROT_* 142264345f0bSJohn Baldwin */ 142364345f0bSJohn Baldwin error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 14243979450bSKonstantin Belousov dev_relthread(cdev, ref); 142564345f0bSJohn Baldwin if (error != ENODEV) 142664345f0bSJohn Baldwin return (error); 14273364c323SKonstantin Belousov obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 14283364c323SKonstantin Belousov td->td_ucred); 142998df9218SJohn Baldwin if (obj == NULL) 143098df9218SJohn Baldwin return (EINVAL); 143198df9218SJohn Baldwin *objp = obj; 143298df9218SJohn Baldwin *flagsp = flags; 143398df9218SJohn Baldwin return (0); 143498df9218SJohn Baldwin } 143598df9218SJohn Baldwin 143698df9218SJohn Baldwin /* 14378e38aeffSJohn Baldwin * vm_mmap_shm() 14388e38aeffSJohn Baldwin * 14398e38aeffSJohn Baldwin * MPSAFE 14408e38aeffSJohn Baldwin * 14418e38aeffSJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 14428e38aeffSJohn Baldwin * operations on shm file descriptors. 14438e38aeffSJohn Baldwin */ 14448e38aeffSJohn Baldwin int 14458e38aeffSJohn Baldwin vm_mmap_shm(struct thread *td, vm_size_t objsize, 14468e38aeffSJohn Baldwin vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 14478e38aeffSJohn Baldwin struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp) 14488e38aeffSJohn Baldwin { 14498e38aeffSJohn Baldwin int error; 14508e38aeffSJohn Baldwin 1451da048309SAlan Cox if ((*flagsp & MAP_SHARED) != 0 && 1452da048309SAlan Cox (*maxprotp & VM_PROT_WRITE) == 0 && 14538e38aeffSJohn Baldwin (prot & PROT_WRITE) != 0) 14548e38aeffSJohn Baldwin return (EACCES); 14558e38aeffSJohn Baldwin #ifdef MAC 14568e38aeffSJohn Baldwin error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp); 14578e38aeffSJohn Baldwin if (error != 0) 14588e38aeffSJohn Baldwin return (error); 14598e38aeffSJohn Baldwin #endif 14608e38aeffSJohn Baldwin error = shm_mmap(shmfd, objsize, foff, objp); 14618e38aeffSJohn Baldwin if (error) 14628e38aeffSJohn Baldwin return (error); 14638e38aeffSJohn Baldwin return (0); 14648e38aeffSJohn Baldwin } 14658e38aeffSJohn Baldwin 14668e38aeffSJohn Baldwin /* 1467d2c60af8SMatthew Dillon * vm_mmap() 1468d2c60af8SMatthew Dillon * 1469d2c60af8SMatthew Dillon * MPSAFE 1470d2c60af8SMatthew Dillon * 1471d2c60af8SMatthew Dillon * Internal version of mmap. Currently used by mmap, exec, and sys5 1472d2c60af8SMatthew Dillon * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. 1473df8bae1dSRodney W. Grimes */ 1474df8bae1dSRodney W. Grimes int 1475b9dcd593SBruce Evans vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1476b9dcd593SBruce Evans vm_prot_t maxprot, int flags, 147798df9218SJohn Baldwin objtype_t handle_type, void *handle, 1478b9dcd593SBruce Evans vm_ooffset_t foff) 1479df8bae1dSRodney W. Grimes { 1480df8bae1dSRodney W. Grimes boolean_t fitit; 14816bda842dSMatt Jacob vm_object_t object = NULL; 1482b40ce416SJulian Elischer struct thread *td = curthread; 1483f9230ad6SAlan Cox int docow, error, rv; 148484110e7eSKonstantin Belousov boolean_t writecounted; 1485df8bae1dSRodney W. Grimes 1486df8bae1dSRodney W. Grimes if (size == 0) 1487df8bae1dSRodney W. Grimes return (0); 1488df8bae1dSRodney W. Grimes 1489749474f2SPeter Wemm size = round_page(size); 1490df8bae1dSRodney W. Grimes 1491a6492969SAlan Cox if (map == &td->td_proc->p_vmspace->vm_map) { 149291d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 1493a6492969SAlan Cox if (map->size + size > lim_cur(td->td_proc, RLIMIT_VMEM)) { 149491d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1495070f64feSMatthew Dillon return (ENOMEM); 1496070f64feSMatthew Dillon } 1497a6492969SAlan Cox if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 14981ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 14991ba5ad42SEdward Tomasz Napierala return (ENOMEM); 15001ba5ad42SEdward Tomasz Napierala } 15017e19eda4SAndrey Zonov if (!old_mlock && map->flags & MAP_WIREFUTURE) { 15023ac7d297SAndrey Zonov if (ptoa(pmap_wired_count(map->pmap)) + size > 15033ac7d297SAndrey Zonov lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { 15047e19eda4SAndrey Zonov racct_set_force(td->td_proc, RACCT_VMEM, 15057e19eda4SAndrey Zonov map->size); 15067e19eda4SAndrey Zonov PROC_UNLOCK(td->td_proc); 15077e19eda4SAndrey Zonov return (ENOMEM); 15087e19eda4SAndrey Zonov } 15097e19eda4SAndrey Zonov error = racct_set(td->td_proc, RACCT_MEMLOCK, 15103ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap)) + size); 15117e19eda4SAndrey Zonov if (error != 0) { 15127e19eda4SAndrey Zonov racct_set_force(td->td_proc, RACCT_VMEM, 15137e19eda4SAndrey Zonov map->size); 15147e19eda4SAndrey Zonov PROC_UNLOCK(td->td_proc); 15157e19eda4SAndrey Zonov return (error); 15167e19eda4SAndrey Zonov } 15177e19eda4SAndrey Zonov } 151891d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1519a6492969SAlan Cox } 1520070f64feSMatthew Dillon 1521df8bae1dSRodney W. Grimes /* 1522bc9ad247SDavid Greenman * We currently can only deal with page aligned file offsets. 1523bc9ad247SDavid Greenman * The check is here rather than in the syscall because the 1524bc9ad247SDavid Greenman * kernel calls this function internally for other mmaping 1525bc9ad247SDavid Greenman * operations (such as in exec) and non-aligned offsets will 1526bc9ad247SDavid Greenman * cause pmap inconsistencies...so we want to be sure to 1527bc9ad247SDavid Greenman * disallow this in all cases. 1528bc9ad247SDavid Greenman */ 1529bc9ad247SDavid Greenman if (foff & PAGE_MASK) 1530bc9ad247SDavid Greenman return (EINVAL); 1531bc9ad247SDavid Greenman 153206cb7259SDavid Greenman if ((flags & MAP_FIXED) == 0) { 153306cb7259SDavid Greenman fitit = TRUE; 153406cb7259SDavid Greenman *addr = round_page(*addr); 153506cb7259SDavid Greenman } else { 153606cb7259SDavid Greenman if (*addr != trunc_page(*addr)) 153706cb7259SDavid Greenman return (EINVAL); 153806cb7259SDavid Greenman fitit = FALSE; 153906cb7259SDavid Greenman } 154084110e7eSKonstantin Belousov writecounted = FALSE; 154184110e7eSKonstantin Belousov 1542bc9ad247SDavid Greenman /* 154324a1cce3SDavid Greenman * Lookup/allocate object. 1544df8bae1dSRodney W. Grimes */ 154598df9218SJohn Baldwin switch (handle_type) { 154698df9218SJohn Baldwin case OBJT_DEVICE: 154798df9218SJohn Baldwin error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, 154864345f0bSJohn Baldwin handle, &foff, &object); 154998df9218SJohn Baldwin break; 155098df9218SJohn Baldwin case OBJT_VNODE: 1551c8daea13SAlexander Kabaev error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 155284110e7eSKonstantin Belousov handle, &foff, &object, &writecounted); 155398df9218SJohn Baldwin break; 15548e38aeffSJohn Baldwin case OBJT_SWAP: 15558e38aeffSJohn Baldwin error = vm_mmap_shm(td, size, prot, &maxprot, &flags, 15568e38aeffSJohn Baldwin handle, foff, &object); 15578e38aeffSJohn Baldwin break; 155898df9218SJohn Baldwin case OBJT_DEFAULT: 155998df9218SJohn Baldwin if (handle == NULL) { 156098df9218SJohn Baldwin error = 0; 156198df9218SJohn Baldwin break; 156298df9218SJohn Baldwin } 156398df9218SJohn Baldwin /* FALLTHROUGH */ 156498df9218SJohn Baldwin default: 156598df9218SJohn Baldwin error = EINVAL; 15666bda842dSMatt Jacob break; 156798df9218SJohn Baldwin } 156898df9218SJohn Baldwin if (error) 1569c8daea13SAlexander Kabaev return (error); 15705f55e841SDavid Greenman if (flags & MAP_ANON) { 1571c8daea13SAlexander Kabaev object = NULL; 1572c8daea13SAlexander Kabaev docow = 0; 15735f55e841SDavid Greenman /* 15745f55e841SDavid Greenman * Unnamed anonymous regions always start at 0. 15755f55e841SDavid Greenman */ 157667bf6868SJohn Dyson if (handle == 0) 15775f55e841SDavid Greenman foff = 0; 157874ffb9afSAlan Cox } else if (flags & MAP_PREFAULT_READ) 157974ffb9afSAlan Cox docow = MAP_PREFAULT; 158074ffb9afSAlan Cox else 15814738fa09SAlan Cox docow = MAP_PREFAULT_PARTIAL; 1582df8bae1dSRodney W. Grimes 15834f79d873SMatthew Dillon if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 15844738fa09SAlan Cox docow |= MAP_COPY_ON_WRITE; 15854f79d873SMatthew Dillon if (flags & MAP_NOSYNC) 15864f79d873SMatthew Dillon docow |= MAP_DISABLE_SYNCER; 15879730a5daSPaul Saab if (flags & MAP_NOCORE) 15889730a5daSPaul Saab docow |= MAP_DISABLE_COREDUMP; 15898211bd45SKonstantin Belousov /* Shared memory is also shared with children. */ 15908211bd45SKonstantin Belousov if (flags & MAP_SHARED) 15918211bd45SKonstantin Belousov docow |= MAP_INHERIT_SHARE; 159284110e7eSKonstantin Belousov if (writecounted) 159384110e7eSKonstantin Belousov docow |= MAP_VN_WRITECOUNT; 15945850152dSJohn Dyson 15952267af78SJulian Elischer if (flags & MAP_STACK) 1596fd75d710SMarcel Moolenaar rv = vm_map_stack(map, *addr, size, prot, maxprot, 1597fd75d710SMarcel Moolenaar docow | MAP_STACK_GROWS_DOWN); 1598d239bd3cSKonstantin Belousov else if (fitit) 1599d0a83a83SAlan Cox rv = vm_map_find(map, object, foff, addr, size, 1600d0a83a83SAlan Cox object != NULL && object->type == OBJT_DEVICE ? 1601d0a83a83SAlan Cox VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, prot, maxprot, docow); 16022267af78SJulian Elischer else 1603b8ca4ef2SAlan Cox rv = vm_map_fixed(map, object, foff, *addr, size, 1604bd7e5f99SJohn Dyson prot, maxprot, docow); 1605bd7e5f99SJohn Dyson 1606f9230ad6SAlan Cox if (rv == KERN_SUCCESS) { 16077fb0c17eSDavid Greenman /* 1608f9230ad6SAlan Cox * If the process has requested that all future mappings 1609f9230ad6SAlan Cox * be wired, then heed this. 1610f9230ad6SAlan Cox */ 16111472f4f4SKonstantin Belousov if (map->flags & MAP_WIREFUTURE) { 1612f9230ad6SAlan Cox vm_map_wire(map, *addr, *addr + size, 16131472f4f4SKonstantin Belousov VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? 16141472f4f4SKonstantin Belousov VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); 16151472f4f4SKonstantin Belousov } 1616f9230ad6SAlan Cox } else { 1617f9230ad6SAlan Cox /* 161884110e7eSKonstantin Belousov * If this mapping was accounted for in the vnode's 161984110e7eSKonstantin Belousov * writecount, then undo that now. 16207fb0c17eSDavid Greenman */ 162184110e7eSKonstantin Belousov if (writecounted) 162284110e7eSKonstantin Belousov vnode_pager_release_writecount(object, 0, size); 1623f9230ad6SAlan Cox /* 1624f9230ad6SAlan Cox * Lose the object reference. Will destroy the 1625f9230ad6SAlan Cox * object if it's an unnamed anonymous mapping 1626f9230ad6SAlan Cox * or named anonymous without other references. 1627f9230ad6SAlan Cox */ 1628df8bae1dSRodney W. Grimes vm_object_deallocate(object); 1629df8bae1dSRodney W. Grimes } 16302e32165cSKonstantin Belousov return (vm_mmap_to_errno(rv)); 16312e32165cSKonstantin Belousov } 16322e32165cSKonstantin Belousov 1633f9230ad6SAlan Cox /* 1634f9230ad6SAlan Cox * Translate a Mach VM return code to zero on success or the appropriate errno 1635f9230ad6SAlan Cox * on failure. 1636f9230ad6SAlan Cox */ 16372e32165cSKonstantin Belousov int 16382e32165cSKonstantin Belousov vm_mmap_to_errno(int rv) 16392e32165cSKonstantin Belousov { 16402e32165cSKonstantin Belousov 1641df8bae1dSRodney W. Grimes switch (rv) { 1642df8bae1dSRodney W. Grimes case KERN_SUCCESS: 1643df8bae1dSRodney W. Grimes return (0); 1644df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 1645df8bae1dSRodney W. Grimes case KERN_NO_SPACE: 1646df8bae1dSRodney W. Grimes return (ENOMEM); 1647df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 1648df8bae1dSRodney W. Grimes return (EACCES); 1649df8bae1dSRodney W. Grimes default: 1650df8bae1dSRodney W. Grimes return (EINVAL); 1651df8bae1dSRodney W. Grimes } 1652df8bae1dSRodney W. Grimes } 1653