160727d8bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1988 University of Utah. 3df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 4df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 5df8bae1dSRodney W. Grimes * 6df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 7df8bae1dSRodney W. Grimes * the Systems Programming Group of the University of Utah Computer 8df8bae1dSRodney W. Grimes * Science Department. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 19df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 20df8bae1dSRodney W. Grimes * without specific prior written permission. 21df8bae1dSRodney W. Grimes * 22df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32df8bae1dSRodney W. Grimes * SUCH DAMAGE. 33df8bae1dSRodney W. Grimes * 34df8bae1dSRodney W. Grimes * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35df8bae1dSRodney W. Grimes * 36df8bae1dSRodney W. Grimes * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37df8bae1dSRodney W. Grimes */ 38df8bae1dSRodney W. Grimes 39df8bae1dSRodney W. Grimes /* 40df8bae1dSRodney W. Grimes * Mapped file (mmap) interface to VM 41df8bae1dSRodney W. Grimes */ 42df8bae1dSRodney W. Grimes 43874651b1SDavid E. O'Brien #include <sys/cdefs.h> 44874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 45874651b1SDavid E. O'Brien 465591b823SEivind Eklund #include "opt_compat.h" 4749874f6eSJoseph Koshy #include "opt_hwpmc_hooks.h" 483e732e7dSRobert Watson #include "opt_mac.h" 49e9822d92SJoerg Wunsch 50df8bae1dSRodney W. Grimes #include <sys/param.h> 51df8bae1dSRodney W. Grimes #include <sys/systm.h> 52fb919e4dSMark Murray #include <sys/kernel.h> 53fb919e4dSMark Murray #include <sys/lock.h> 5423955314SAlfred Perlstein #include <sys/mutex.h> 55d2d3e875SBruce Evans #include <sys/sysproto.h> 56df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 57acd3428bSRobert Watson #include <sys/priv.h> 58df8bae1dSRodney W. Grimes #include <sys/proc.h> 59070f64feSMatthew Dillon #include <sys/resource.h> 60070f64feSMatthew Dillon #include <sys/resourcevar.h> 61df8bae1dSRodney W. Grimes #include <sys/vnode.h> 623ac4d1efSBruce Evans #include <sys/fcntl.h> 63df8bae1dSRodney W. Grimes #include <sys/file.h> 64df8bae1dSRodney W. Grimes #include <sys/mman.h> 65b483c7f6SGuido van Rooij #include <sys/mount.h> 66df8bae1dSRodney W. Grimes #include <sys/conf.h> 674183b6b6SPeter Wemm #include <sys/stat.h> 68efeaf95aSDavid Greenman #include <sys/vmmeter.h> 691f6889a1SMatthew Dillon #include <sys/sysctl.h> 70df8bae1dSRodney W. Grimes 71aed55708SRobert Watson #include <security/mac/mac_framework.h> 72aed55708SRobert Watson 73df8bae1dSRodney W. Grimes #include <vm/vm.h> 74efeaf95aSDavid Greenman #include <vm/vm_param.h> 75efeaf95aSDavid Greenman #include <vm/pmap.h> 76efeaf95aSDavid Greenman #include <vm/vm_map.h> 77efeaf95aSDavid Greenman #include <vm/vm_object.h> 781c7c3c6aSMatthew Dillon #include <vm/vm_page.h> 79df8bae1dSRodney W. Grimes #include <vm/vm_pager.h> 80b5e8ce9fSBruce Evans #include <vm/vm_pageout.h> 81efeaf95aSDavid Greenman #include <vm/vm_extern.h> 82867a482dSJohn Dyson #include <vm/vm_page.h> 831f6889a1SMatthew Dillon #include <vm/vm_kern.h> 84df8bae1dSRodney W. Grimes 8549874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 8649874f6eSJoseph Koshy #include <sys/pmckern.h> 8749874f6eSJoseph Koshy #endif 8849874f6eSJoseph Koshy 89d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 90df8bae1dSRodney W. Grimes struct sbrk_args { 91df8bae1dSRodney W. Grimes int incr; 92df8bae1dSRodney W. Grimes }; 93d2d3e875SBruce Evans #endif 940d94caffSDavid Greenman 951f6889a1SMatthew Dillon static int max_proc_mmap; 966bd9cb1cSTom Rhodes SYSCTL_INT(_vm, OID_AUTO, max_proc_mmap, CTLFLAG_RW, &max_proc_mmap, 0, 976bd9cb1cSTom Rhodes "Maximum number of memory-mapped files per process"); 981f6889a1SMatthew Dillon 991f6889a1SMatthew Dillon /* 1001f6889a1SMatthew Dillon * Set the maximum number of vm_map_entry structures per process. Roughly 1011f6889a1SMatthew Dillon * speaking vm_map_entry structures are tiny, so allowing them to eat 1/100 1021f6889a1SMatthew Dillon * of our KVM malloc space still results in generous limits. We want a 1031f6889a1SMatthew Dillon * default that is good enough to prevent the kernel running out of resources 1041f6889a1SMatthew Dillon * if attacked from compromised user account but generous enough such that 1051f6889a1SMatthew Dillon * multi-threaded processes are not unduly inconvenienced. 1061f6889a1SMatthew Dillon */ 10711caded3SAlfred Perlstein static void vmmapentry_rsrc_init(void *); 108237fdd78SRobert Watson SYSINIT(vmmersrc, SI_SUB_KVM_RSRC, SI_ORDER_FIRST, vmmapentry_rsrc_init, 109237fdd78SRobert Watson NULL); 1101f6889a1SMatthew Dillon 1111f6889a1SMatthew Dillon static void 1121f6889a1SMatthew Dillon vmmapentry_rsrc_init(dummy) 1131f6889a1SMatthew Dillon void *dummy; 1141f6889a1SMatthew Dillon { 1151f6889a1SMatthew Dillon max_proc_mmap = vm_kmem_size / sizeof(struct vm_map_entry); 1161f6889a1SMatthew Dillon max_proc_mmap /= 100; 1171f6889a1SMatthew Dillon } 1181f6889a1SMatthew Dillon 119c8daea13SAlexander Kabaev static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 120c8daea13SAlexander Kabaev int *, struct vnode *, vm_ooffset_t, vm_object_t *); 12198df9218SJohn Baldwin static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 12298df9218SJohn Baldwin int *, struct cdev *, vm_ooffset_t, vm_object_t *); 1238e38aeffSJohn Baldwin static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 1248e38aeffSJohn Baldwin int *, struct shmfd *, vm_ooffset_t, vm_object_t *); 125c8daea13SAlexander Kabaev 126d2c60af8SMatthew Dillon /* 127d2c60af8SMatthew Dillon * MPSAFE 128d2c60af8SMatthew Dillon */ 129df8bae1dSRodney W. Grimes /* ARGSUSED */ 130df8bae1dSRodney W. Grimes int 131b40ce416SJulian Elischer sbrk(td, uap) 132b40ce416SJulian Elischer struct thread *td; 133df8bae1dSRodney W. Grimes struct sbrk_args *uap; 134df8bae1dSRodney W. Grimes { 135df8bae1dSRodney W. Grimes /* Not yet implemented */ 136df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 137df8bae1dSRodney W. Grimes } 138df8bae1dSRodney W. Grimes 139d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 140df8bae1dSRodney W. Grimes struct sstk_args { 141df8bae1dSRodney W. Grimes int incr; 142df8bae1dSRodney W. Grimes }; 143d2d3e875SBruce Evans #endif 1440d94caffSDavid Greenman 145d2c60af8SMatthew Dillon /* 146d2c60af8SMatthew Dillon * MPSAFE 147d2c60af8SMatthew Dillon */ 148df8bae1dSRodney W. Grimes /* ARGSUSED */ 149df8bae1dSRodney W. Grimes int 150b40ce416SJulian Elischer sstk(td, uap) 151b40ce416SJulian Elischer struct thread *td; 152df8bae1dSRodney W. Grimes struct sstk_args *uap; 153df8bae1dSRodney W. Grimes { 154df8bae1dSRodney W. Grimes /* Not yet implemented */ 155df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 156df8bae1dSRodney W. Grimes } 157df8bae1dSRodney W. Grimes 1581930e303SPoul-Henning Kamp #if defined(COMPAT_43) 159d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 160df8bae1dSRodney W. Grimes struct getpagesize_args { 161df8bae1dSRodney W. Grimes int dummy; 162df8bae1dSRodney W. Grimes }; 163d2d3e875SBruce Evans #endif 1640d94caffSDavid Greenman 165df8bae1dSRodney W. Grimes /* ARGSUSED */ 166df8bae1dSRodney W. Grimes int 167b40ce416SJulian Elischer ogetpagesize(td, uap) 168b40ce416SJulian Elischer struct thread *td; 169df8bae1dSRodney W. Grimes struct getpagesize_args *uap; 170df8bae1dSRodney W. Grimes { 1710cddd8f0SMatthew Dillon /* MP SAFE */ 172b40ce416SJulian Elischer td->td_retval[0] = PAGE_SIZE; 173df8bae1dSRodney W. Grimes return (0); 174df8bae1dSRodney W. Grimes } 1751930e303SPoul-Henning Kamp #endif /* COMPAT_43 */ 176df8bae1dSRodney W. Grimes 17754f42e4bSPeter Wemm 17854f42e4bSPeter Wemm /* 17954f42e4bSPeter Wemm * Memory Map (mmap) system call. Note that the file offset 18054f42e4bSPeter Wemm * and address are allowed to be NOT page aligned, though if 18154f42e4bSPeter Wemm * the MAP_FIXED flag it set, both must have the same remainder 18254f42e4bSPeter Wemm * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 18354f42e4bSPeter Wemm * page-aligned, the actual mapping starts at trunc_page(addr) 18454f42e4bSPeter Wemm * and the return value is adjusted up by the page offset. 185b4309055SMatthew Dillon * 186b4309055SMatthew Dillon * Generally speaking, only character devices which are themselves 187b4309055SMatthew Dillon * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 188b4309055SMatthew Dillon * there would be no cache coherency between a descriptor and a VM mapping 189b4309055SMatthew Dillon * both to the same character device. 190b4309055SMatthew Dillon * 191b4309055SMatthew Dillon * Block devices can be mmap'd no matter what they represent. Cache coherency 192b4309055SMatthew Dillon * is maintained as long as you do not write directly to the underlying 193b4309055SMatthew Dillon * character device. 19454f42e4bSPeter Wemm */ 195d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 196df8bae1dSRodney W. Grimes struct mmap_args { 197651bb817SAlexander Langer void *addr; 198df8bae1dSRodney W. Grimes size_t len; 199df8bae1dSRodney W. Grimes int prot; 200df8bae1dSRodney W. Grimes int flags; 201df8bae1dSRodney W. Grimes int fd; 202df8bae1dSRodney W. Grimes long pad; 203df8bae1dSRodney W. Grimes off_t pos; 204df8bae1dSRodney W. Grimes }; 205d2d3e875SBruce Evans #endif 206df8bae1dSRodney W. Grimes 207d2c60af8SMatthew Dillon /* 208d2c60af8SMatthew Dillon * MPSAFE 209d2c60af8SMatthew Dillon */ 210df8bae1dSRodney W. Grimes int 211b40ce416SJulian Elischer mmap(td, uap) 212b40ce416SJulian Elischer struct thread *td; 21354d92145SMatthew Dillon struct mmap_args *uap; 214df8bae1dSRodney W. Grimes { 21549874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 21649874f6eSJoseph Koshy struct pmckern_map_in pkm; 21749874f6eSJoseph Koshy #endif 218c8daea13SAlexander Kabaev struct file *fp; 219df8bae1dSRodney W. Grimes struct vnode *vp; 220df8bae1dSRodney W. Grimes vm_offset_t addr; 2219154ee6aSPeter Wemm vm_size_t size, pageoff; 222df8bae1dSRodney W. Grimes vm_prot_t prot, maxprot; 223651bb817SAlexander Langer void *handle; 22498df9218SJohn Baldwin objtype_t handle_type; 225df8bae1dSRodney W. Grimes int flags, error; 22654f42e4bSPeter Wemm off_t pos; 227b40ce416SJulian Elischer struct vmspace *vms = td->td_proc->p_vmspace; 228df8bae1dSRodney W. Grimes 22954f42e4bSPeter Wemm addr = (vm_offset_t) uap->addr; 23054f42e4bSPeter Wemm size = uap->len; 231df8bae1dSRodney W. Grimes prot = uap->prot & VM_PROT_ALL; 232df8bae1dSRodney W. Grimes flags = uap->flags; 23354f42e4bSPeter Wemm pos = uap->pos; 23454f42e4bSPeter Wemm 235426da3bcSAlfred Perlstein fp = NULL; 23654f42e4bSPeter Wemm /* make sure mapping fits into numeric range etc */ 237fc565456SDmitrij Tejblum if ((ssize_t) uap->len < 0 || 23854f42e4bSPeter Wemm ((flags & MAP_ANON) && uap->fd != -1)) 239df8bae1dSRodney W. Grimes return (EINVAL); 2409154ee6aSPeter Wemm 2412267af78SJulian Elischer if (flags & MAP_STACK) { 2422267af78SJulian Elischer if ((uap->fd != -1) || 2432267af78SJulian Elischer ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 2442267af78SJulian Elischer return (EINVAL); 2452267af78SJulian Elischer flags |= MAP_ANON; 2462267af78SJulian Elischer pos = 0; 2472907af2aSJulian Elischer } 2482907af2aSJulian Elischer 2499154ee6aSPeter Wemm /* 25054f42e4bSPeter Wemm * Align the file position to a page boundary, 25154f42e4bSPeter Wemm * and save its page offset component. 2529154ee6aSPeter Wemm */ 25354f42e4bSPeter Wemm pageoff = (pos & PAGE_MASK); 25454f42e4bSPeter Wemm pos -= pageoff; 25554f42e4bSPeter Wemm 25654f42e4bSPeter Wemm /* Adjust size for rounding (on both ends). */ 25754f42e4bSPeter Wemm size += pageoff; /* low end... */ 25854f42e4bSPeter Wemm size = (vm_size_t) round_page(size); /* hi end */ 2599154ee6aSPeter Wemm 260df8bae1dSRodney W. Grimes /* 2610d94caffSDavid Greenman * Check for illegal addresses. Watch out for address wrap... Note 2620d94caffSDavid Greenman * that VM_*_ADDRESS are not constants due to casts (argh). 263df8bae1dSRodney W. Grimes */ 264df8bae1dSRodney W. Grimes if (flags & MAP_FIXED) { 26554f42e4bSPeter Wemm /* 26654f42e4bSPeter Wemm * The specified address must have the same remainder 26754f42e4bSPeter Wemm * as the file offset taken modulo PAGE_SIZE, so it 26854f42e4bSPeter Wemm * should be aligned after adjustment by pageoff. 26954f42e4bSPeter Wemm */ 27054f42e4bSPeter Wemm addr -= pageoff; 27154f42e4bSPeter Wemm if (addr & PAGE_MASK) 27254f42e4bSPeter Wemm return (EINVAL); 27354f42e4bSPeter Wemm /* Address range must be all in user VM space. */ 27405ba50f5SJake Burkholder if (addr < vm_map_min(&vms->vm_map) || 27505ba50f5SJake Burkholder addr + size > vm_map_max(&vms->vm_map)) 276df8bae1dSRodney W. Grimes return (EINVAL); 277bbc0ec52SDavid Greenman if (addr + size < addr) 278df8bae1dSRodney W. Grimes return (EINVAL); 27991d5354aSJohn Baldwin } else { 280df8bae1dSRodney W. Grimes /* 28154f42e4bSPeter Wemm * XXX for non-fixed mappings where no hint is provided or 28254f42e4bSPeter Wemm * the hint would fall in the potential heap space, 28354f42e4bSPeter Wemm * place it after the end of the largest possible heap. 284df8bae1dSRodney W. Grimes * 28554f42e4bSPeter Wemm * There should really be a pmap call to determine a reasonable 28654f42e4bSPeter Wemm * location. 287df8bae1dSRodney W. Grimes */ 28891d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 28991d5354aSJohn Baldwin if (addr == 0 || 2901f6889a1SMatthew Dillon (addr >= round_page((vm_offset_t)vms->vm_taddr) && 291c460ac3aSPeter Wemm addr < round_page((vm_offset_t)vms->vm_daddr + 29291d5354aSJohn Baldwin lim_max(td->td_proc, RLIMIT_DATA)))) 293c460ac3aSPeter Wemm addr = round_page((vm_offset_t)vms->vm_daddr + 29491d5354aSJohn Baldwin lim_max(td->td_proc, RLIMIT_DATA)); 29591d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 29691d5354aSJohn Baldwin } 297df8bae1dSRodney W. Grimes if (flags & MAP_ANON) { 298df8bae1dSRodney W. Grimes /* 299df8bae1dSRodney W. Grimes * Mapping blank space is trivial. 300df8bae1dSRodney W. Grimes */ 301df8bae1dSRodney W. Grimes handle = NULL; 30298df9218SJohn Baldwin handle_type = OBJT_DEFAULT; 303df8bae1dSRodney W. Grimes maxprot = VM_PROT_ALL; 30454f42e4bSPeter Wemm pos = 0; 30530d4dd7eSAlexander Kabaev } else { 306df8bae1dSRodney W. Grimes /* 3078e38aeffSJohn Baldwin * Mapping file, get fp for validation and 3088e38aeffSJohn Baldwin * don't let the descriptor disappear on us if we block. 309df8bae1dSRodney W. Grimes */ 310a4db4953SAlfred Perlstein if ((error = fget(td, uap->fd, &fp)) != 0) 311426da3bcSAlfred Perlstein goto done; 3128e38aeffSJohn Baldwin if (fp->f_type == DTYPE_SHM) { 3138e38aeffSJohn Baldwin handle = fp->f_data; 3148e38aeffSJohn Baldwin handle_type = OBJT_SWAP; 3158e38aeffSJohn Baldwin maxprot = VM_PROT_NONE; 3168e38aeffSJohn Baldwin 3178e38aeffSJohn Baldwin /* FREAD should always be set. */ 3188e38aeffSJohn Baldwin if (fp->f_flag & FREAD) 3198e38aeffSJohn Baldwin maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; 3208e38aeffSJohn Baldwin if (fp->f_flag & FWRITE) 3218e38aeffSJohn Baldwin maxprot |= VM_PROT_WRITE; 3228e38aeffSJohn Baldwin goto map; 3238e38aeffSJohn Baldwin } 324e4ca250dSJohn Baldwin if (fp->f_type != DTYPE_VNODE) { 32589eae00bSTom Rhodes error = ENODEV; 326426da3bcSAlfred Perlstein goto done; 327e4ca250dSJohn Baldwin } 3288e38aeffSJohn Baldwin #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ 3298e38aeffSJohn Baldwin defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) 330279d7226SMatthew Dillon /* 331aa543039SGarrett Wollman * POSIX shared-memory objects are defined to have 332aa543039SGarrett Wollman * kernel persistence, and are not defined to support 333aa543039SGarrett Wollman * read(2)/write(2) -- or even open(2). Thus, we can 334aa543039SGarrett Wollman * use MAP_ASYNC to trade on-disk coherence for speed. 335aa543039SGarrett Wollman * The shm_open(3) library routine turns on the FPOSIXSHM 336aa543039SGarrett Wollman * flag to request this behavior. 337aa543039SGarrett Wollman */ 338aa543039SGarrett Wollman if (fp->f_flag & FPOSIXSHM) 339aa543039SGarrett Wollman flags |= MAP_NOSYNC; 3408e38aeffSJohn Baldwin #endif 3413b6d9652SPoul-Henning Kamp vp = fp->f_vnode; 342c8bdd56bSGuido van Rooij /* 343df8bae1dSRodney W. Grimes * Ensure that file and memory protections are 344df8bae1dSRodney W. Grimes * compatible. Note that we only worry about 345df8bae1dSRodney W. Grimes * writability if mapping is shared; in this case, 346df8bae1dSRodney W. Grimes * current and max prot are dictated by the open file. 347df8bae1dSRodney W. Grimes * XXX use the vnode instead? Problem is: what 3480d94caffSDavid Greenman * credentials do we use for determination? What if 3490d94caffSDavid Greenman * proc does a setuid? 350df8bae1dSRodney W. Grimes */ 3518eec77b0STim J. Robbins if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC) 352b483c7f6SGuido van Rooij maxprot = VM_PROT_NONE; 353b483c7f6SGuido van Rooij else 354b483c7f6SGuido van Rooij maxprot = VM_PROT_EXECUTE; 355279d7226SMatthew Dillon if (fp->f_flag & FREAD) { 356df8bae1dSRodney W. Grimes maxprot |= VM_PROT_READ; 357279d7226SMatthew Dillon } else if (prot & PROT_READ) { 358279d7226SMatthew Dillon error = EACCES; 359279d7226SMatthew Dillon goto done; 360279d7226SMatthew Dillon } 361c8bdd56bSGuido van Rooij /* 362c8bdd56bSGuido van Rooij * If we are sharing potential changes (either via 363c8bdd56bSGuido van Rooij * MAP_SHARED or via the implicit sharing of character 364c8bdd56bSGuido van Rooij * device mappings), and we are trying to get write 365c8bdd56bSGuido van Rooij * permission although we opened it without asking 366c8daea13SAlexander Kabaev * for it, bail out. 367c8bdd56bSGuido van Rooij */ 368ce7a036dSAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 36905feb99fSGuido van Rooij if ((fp->f_flag & FWRITE) != 0) { 370df8bae1dSRodney W. Grimes maxprot |= VM_PROT_WRITE; 371279d7226SMatthew Dillon } else if ((prot & PROT_WRITE) != 0) { 372279d7226SMatthew Dillon error = EACCES; 373279d7226SMatthew Dillon goto done; 374279d7226SMatthew Dillon } 375ce7a036dSAlexander Kabaev } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) { 37605feb99fSGuido van Rooij maxprot |= VM_PROT_WRITE; 377279d7226SMatthew Dillon } 378651bb817SAlexander Langer handle = (void *)vp; 37998df9218SJohn Baldwin handle_type = OBJT_VNODE; 38030d4dd7eSAlexander Kabaev } 3818e38aeffSJohn Baldwin map: 3821f6889a1SMatthew Dillon 3831f6889a1SMatthew Dillon /* 3841f6889a1SMatthew Dillon * Do not allow more then a certain number of vm_map_entry structures 3851f6889a1SMatthew Dillon * per process. Scale with the number of rforks sharing the map 3861f6889a1SMatthew Dillon * to make the limit reasonable for threads. 3871f6889a1SMatthew Dillon */ 3881f6889a1SMatthew Dillon if (max_proc_mmap && 3891f6889a1SMatthew Dillon vms->vm_map.nentries >= max_proc_mmap * vms->vm_refcnt) { 390279d7226SMatthew Dillon error = ENOMEM; 391279d7226SMatthew Dillon goto done; 3921f6889a1SMatthew Dillon } 3931f6889a1SMatthew Dillon 39436b90789SKonstantin Belousov td->td_fpop = fp; 3951f6889a1SMatthew Dillon error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, 39698df9218SJohn Baldwin flags, handle_type, handle, pos); 39736b90789SKonstantin Belousov td->td_fpop = NULL; 39849874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 39949874f6eSJoseph Koshy /* inform hwpmc(4) if an executable is being mapped */ 40049874f6eSJoseph Koshy if (error == 0 && handle_type == OBJT_VNODE && 40149874f6eSJoseph Koshy (prot & PROT_EXEC)) { 40249874f6eSJoseph Koshy pkm.pm_file = handle; 40349874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 40449874f6eSJoseph Koshy PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); 40549874f6eSJoseph Koshy } 40649874f6eSJoseph Koshy #endif 407df8bae1dSRodney W. Grimes if (error == 0) 408b40ce416SJulian Elischer td->td_retval[0] = (register_t) (addr + pageoff); 409279d7226SMatthew Dillon done: 410279d7226SMatthew Dillon if (fp) 411b40ce416SJulian Elischer fdrop(fp, td); 412f6b5b182SJeff Roberson 413df8bae1dSRodney W. Grimes return (error); 414df8bae1dSRodney W. Grimes } 415df8bae1dSRodney W. Grimes 416c2815ad5SPeter Wemm int 417c2815ad5SPeter Wemm freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 418c2815ad5SPeter Wemm { 419c2815ad5SPeter Wemm struct mmap_args oargs; 420c2815ad5SPeter Wemm 421c2815ad5SPeter Wemm oargs.addr = uap->addr; 422c2815ad5SPeter Wemm oargs.len = uap->len; 423c2815ad5SPeter Wemm oargs.prot = uap->prot; 424c2815ad5SPeter Wemm oargs.flags = uap->flags; 425c2815ad5SPeter Wemm oargs.fd = uap->fd; 426c2815ad5SPeter Wemm oargs.pos = uap->pos; 427c2815ad5SPeter Wemm return (mmap(td, &oargs)); 428c2815ad5SPeter Wemm } 429c2815ad5SPeter Wemm 43005f0fdd2SPoul-Henning Kamp #ifdef COMPAT_43 431d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 43205f0fdd2SPoul-Henning Kamp struct ommap_args { 43305f0fdd2SPoul-Henning Kamp caddr_t addr; 43405f0fdd2SPoul-Henning Kamp int len; 43505f0fdd2SPoul-Henning Kamp int prot; 43605f0fdd2SPoul-Henning Kamp int flags; 43705f0fdd2SPoul-Henning Kamp int fd; 43805f0fdd2SPoul-Henning Kamp long pos; 43905f0fdd2SPoul-Henning Kamp }; 440d2d3e875SBruce Evans #endif 44105f0fdd2SPoul-Henning Kamp int 442b40ce416SJulian Elischer ommap(td, uap) 443b40ce416SJulian Elischer struct thread *td; 44454d92145SMatthew Dillon struct ommap_args *uap; 44505f0fdd2SPoul-Henning Kamp { 44605f0fdd2SPoul-Henning Kamp struct mmap_args nargs; 44705f0fdd2SPoul-Henning Kamp static const char cvtbsdprot[8] = { 44805f0fdd2SPoul-Henning Kamp 0, 44905f0fdd2SPoul-Henning Kamp PROT_EXEC, 45005f0fdd2SPoul-Henning Kamp PROT_WRITE, 45105f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE, 45205f0fdd2SPoul-Henning Kamp PROT_READ, 45305f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_READ, 45405f0fdd2SPoul-Henning Kamp PROT_WRITE | PROT_READ, 45505f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE | PROT_READ, 45605f0fdd2SPoul-Henning Kamp }; 4570d94caffSDavid Greenman 45805f0fdd2SPoul-Henning Kamp #define OMAP_ANON 0x0002 45905f0fdd2SPoul-Henning Kamp #define OMAP_COPY 0x0020 46005f0fdd2SPoul-Henning Kamp #define OMAP_SHARED 0x0010 46105f0fdd2SPoul-Henning Kamp #define OMAP_FIXED 0x0100 46205f0fdd2SPoul-Henning Kamp 46305f0fdd2SPoul-Henning Kamp nargs.addr = uap->addr; 46405f0fdd2SPoul-Henning Kamp nargs.len = uap->len; 46505f0fdd2SPoul-Henning Kamp nargs.prot = cvtbsdprot[uap->prot & 0x7]; 46605f0fdd2SPoul-Henning Kamp nargs.flags = 0; 46705f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_ANON) 46805f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_ANON; 46905f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_COPY) 47005f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_COPY; 47105f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_SHARED) 47205f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_SHARED; 47305f0fdd2SPoul-Henning Kamp else 47405f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_PRIVATE; 47505f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_FIXED) 47605f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_FIXED; 47705f0fdd2SPoul-Henning Kamp nargs.fd = uap->fd; 47805f0fdd2SPoul-Henning Kamp nargs.pos = uap->pos; 479b40ce416SJulian Elischer return (mmap(td, &nargs)); 48005f0fdd2SPoul-Henning Kamp } 48105f0fdd2SPoul-Henning Kamp #endif /* COMPAT_43 */ 48205f0fdd2SPoul-Henning Kamp 48305f0fdd2SPoul-Henning Kamp 484d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 485df8bae1dSRodney W. Grimes struct msync_args { 486651bb817SAlexander Langer void *addr; 487c899450bSPeter Wemm size_t len; 488e6c6af11SDavid Greenman int flags; 489df8bae1dSRodney W. Grimes }; 490d2d3e875SBruce Evans #endif 491d2c60af8SMatthew Dillon /* 492d2c60af8SMatthew Dillon * MPSAFE 493d2c60af8SMatthew Dillon */ 494df8bae1dSRodney W. Grimes int 495b40ce416SJulian Elischer msync(td, uap) 496b40ce416SJulian Elischer struct thread *td; 497df8bae1dSRodney W. Grimes struct msync_args *uap; 498df8bae1dSRodney W. Grimes { 499df8bae1dSRodney W. Grimes vm_offset_t addr; 500dabee6feSPeter Wemm vm_size_t size, pageoff; 501e6c6af11SDavid Greenman int flags; 502df8bae1dSRodney W. Grimes vm_map_t map; 503df8bae1dSRodney W. Grimes int rv; 504df8bae1dSRodney W. Grimes 505df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 5069154ee6aSPeter Wemm size = uap->len; 507e6c6af11SDavid Greenman flags = uap->flags; 508e6c6af11SDavid Greenman 509dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 510dabee6feSPeter Wemm addr -= pageoff; 511dabee6feSPeter Wemm size += pageoff; 512dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5139154ee6aSPeter Wemm if (addr + size < addr) 514dabee6feSPeter Wemm return (EINVAL); 515dabee6feSPeter Wemm 516dabee6feSPeter Wemm if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 5171e62bc63SDavid Greenman return (EINVAL); 5181e62bc63SDavid Greenman 519b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 5209154ee6aSPeter Wemm 521df8bae1dSRodney W. Grimes /* 522df8bae1dSRodney W. Grimes * Clean the pages and interpret the return value. 523df8bae1dSRodney W. Grimes */ 524950f8459SAlan Cox rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 525e6c6af11SDavid Greenman (flags & MS_INVALIDATE) != 0); 526df8bae1dSRodney W. Grimes switch (rv) { 527df8bae1dSRodney W. Grimes case KERN_SUCCESS: 528d2c60af8SMatthew Dillon return (0); 529df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 530df8bae1dSRodney W. Grimes return (EINVAL); /* Sun returns ENOMEM? */ 531b7b7cd44SAlan Cox case KERN_INVALID_ARGUMENT: 532b7b7cd44SAlan Cox return (EBUSY); 533df8bae1dSRodney W. Grimes default: 534df8bae1dSRodney W. Grimes return (EINVAL); 535df8bae1dSRodney W. Grimes } 536df8bae1dSRodney W. Grimes } 537df8bae1dSRodney W. Grimes 538d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 539df8bae1dSRodney W. Grimes struct munmap_args { 540651bb817SAlexander Langer void *addr; 5419154ee6aSPeter Wemm size_t len; 542df8bae1dSRodney W. Grimes }; 543d2d3e875SBruce Evans #endif 544d2c60af8SMatthew Dillon /* 545d2c60af8SMatthew Dillon * MPSAFE 546d2c60af8SMatthew Dillon */ 547df8bae1dSRodney W. Grimes int 548b40ce416SJulian Elischer munmap(td, uap) 549b40ce416SJulian Elischer struct thread *td; 55054d92145SMatthew Dillon struct munmap_args *uap; 551df8bae1dSRodney W. Grimes { 55249874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 55349874f6eSJoseph Koshy struct pmckern_map_out pkm; 55449874f6eSJoseph Koshy vm_map_entry_t entry; 55549874f6eSJoseph Koshy #endif 556df8bae1dSRodney W. Grimes vm_offset_t addr; 557dabee6feSPeter Wemm vm_size_t size, pageoff; 558df8bae1dSRodney W. Grimes vm_map_t map; 559df8bae1dSRodney W. Grimes 560df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 5619154ee6aSPeter Wemm size = uap->len; 562d8834602SAlan Cox if (size == 0) 563d8834602SAlan Cox return (EINVAL); 564dabee6feSPeter Wemm 565dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 566dabee6feSPeter Wemm addr -= pageoff; 567dabee6feSPeter Wemm size += pageoff; 568dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5699154ee6aSPeter Wemm if (addr + size < addr) 570df8bae1dSRodney W. Grimes return (EINVAL); 5719154ee6aSPeter Wemm 572df8bae1dSRodney W. Grimes /* 57305ba50f5SJake Burkholder * Check for illegal addresses. Watch out for address wrap... 574df8bae1dSRodney W. Grimes */ 575b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 57605ba50f5SJake Burkholder if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 57705ba50f5SJake Burkholder return (EINVAL); 578d8834602SAlan Cox vm_map_lock(map); 57949874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 58049874f6eSJoseph Koshy /* 58149874f6eSJoseph Koshy * Inform hwpmc if the address range being unmapped contains 58249874f6eSJoseph Koshy * an executable region. 58349874f6eSJoseph Koshy */ 58449874f6eSJoseph Koshy if (vm_map_lookup_entry(map, addr, &entry)) { 58549874f6eSJoseph Koshy for (; 58649874f6eSJoseph Koshy entry != &map->header && entry->start < addr + size; 58749874f6eSJoseph Koshy entry = entry->next) { 58849874f6eSJoseph Koshy if (vm_map_check_protection(map, entry->start, 58949874f6eSJoseph Koshy entry->end, VM_PROT_EXECUTE) == TRUE) { 59049874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 59149874f6eSJoseph Koshy pkm.pm_size = (size_t) size; 59249874f6eSJoseph Koshy PMC_CALL_HOOK(td, PMC_FN_MUNMAP, 59349874f6eSJoseph Koshy (void *) &pkm); 59449874f6eSJoseph Koshy break; 59549874f6eSJoseph Koshy } 59649874f6eSJoseph Koshy } 59749874f6eSJoseph Koshy } 59849874f6eSJoseph Koshy #endif 599df8bae1dSRodney W. Grimes /* returns nothing but KERN_SUCCESS anyway */ 600d8834602SAlan Cox vm_map_delete(map, addr, addr + size); 601d8834602SAlan Cox vm_map_unlock(map); 602df8bae1dSRodney W. Grimes return (0); 603df8bae1dSRodney W. Grimes } 604df8bae1dSRodney W. Grimes 605d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 606df8bae1dSRodney W. Grimes struct mprotect_args { 607651bb817SAlexander Langer const void *addr; 6089154ee6aSPeter Wemm size_t len; 609df8bae1dSRodney W. Grimes int prot; 610df8bae1dSRodney W. Grimes }; 611d2d3e875SBruce Evans #endif 612d2c60af8SMatthew Dillon /* 613d2c60af8SMatthew Dillon * MPSAFE 614d2c60af8SMatthew Dillon */ 615df8bae1dSRodney W. Grimes int 616b40ce416SJulian Elischer mprotect(td, uap) 617b40ce416SJulian Elischer struct thread *td; 618df8bae1dSRodney W. Grimes struct mprotect_args *uap; 619df8bae1dSRodney W. Grimes { 620df8bae1dSRodney W. Grimes vm_offset_t addr; 621dabee6feSPeter Wemm vm_size_t size, pageoff; 62254d92145SMatthew Dillon vm_prot_t prot; 623df8bae1dSRodney W. Grimes 624df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 6259154ee6aSPeter Wemm size = uap->len; 626df8bae1dSRodney W. Grimes prot = uap->prot & VM_PROT_ALL; 627d0aea04fSJohn Dyson #if defined(VM_PROT_READ_IS_EXEC) 628d0aea04fSJohn Dyson if (prot & VM_PROT_READ) 629d0aea04fSJohn Dyson prot |= VM_PROT_EXECUTE; 630d0aea04fSJohn Dyson #endif 631df8bae1dSRodney W. Grimes 632dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 633dabee6feSPeter Wemm addr -= pageoff; 634dabee6feSPeter Wemm size += pageoff; 635dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6369154ee6aSPeter Wemm if (addr + size < addr) 637dabee6feSPeter Wemm return (EINVAL); 638dabee6feSPeter Wemm 63943285049SAlan Cox switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 64043285049SAlan Cox addr + size, prot, FALSE)) { 641df8bae1dSRodney W. Grimes case KERN_SUCCESS: 642df8bae1dSRodney W. Grimes return (0); 643df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 644df8bae1dSRodney W. Grimes return (EACCES); 645df8bae1dSRodney W. Grimes } 646df8bae1dSRodney W. Grimes return (EINVAL); 647df8bae1dSRodney W. Grimes } 648df8bae1dSRodney W. Grimes 649d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 650dabee6feSPeter Wemm struct minherit_args { 651651bb817SAlexander Langer void *addr; 6529154ee6aSPeter Wemm size_t len; 653dabee6feSPeter Wemm int inherit; 654dabee6feSPeter Wemm }; 655dabee6feSPeter Wemm #endif 656d2c60af8SMatthew Dillon /* 657d2c60af8SMatthew Dillon * MPSAFE 658d2c60af8SMatthew Dillon */ 659dabee6feSPeter Wemm int 660b40ce416SJulian Elischer minherit(td, uap) 661b40ce416SJulian Elischer struct thread *td; 662dabee6feSPeter Wemm struct minherit_args *uap; 663dabee6feSPeter Wemm { 664dabee6feSPeter Wemm vm_offset_t addr; 665dabee6feSPeter Wemm vm_size_t size, pageoff; 66654d92145SMatthew Dillon vm_inherit_t inherit; 667dabee6feSPeter Wemm 668dabee6feSPeter Wemm addr = (vm_offset_t)uap->addr; 6699154ee6aSPeter Wemm size = uap->len; 670dabee6feSPeter Wemm inherit = uap->inherit; 671dabee6feSPeter Wemm 672dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 673dabee6feSPeter Wemm addr -= pageoff; 674dabee6feSPeter Wemm size += pageoff; 675dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6769154ee6aSPeter Wemm if (addr + size < addr) 677dabee6feSPeter Wemm return (EINVAL); 678dabee6feSPeter Wemm 679e0be79afSAlan Cox switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 680e0be79afSAlan Cox addr + size, inherit)) { 681dabee6feSPeter Wemm case KERN_SUCCESS: 682dabee6feSPeter Wemm return (0); 683dabee6feSPeter Wemm case KERN_PROTECTION_FAILURE: 684dabee6feSPeter Wemm return (EACCES); 685dabee6feSPeter Wemm } 686dabee6feSPeter Wemm return (EINVAL); 687dabee6feSPeter Wemm } 688dabee6feSPeter Wemm 689dabee6feSPeter Wemm #ifndef _SYS_SYSPROTO_H_ 690df8bae1dSRodney W. Grimes struct madvise_args { 691651bb817SAlexander Langer void *addr; 6929154ee6aSPeter Wemm size_t len; 693df8bae1dSRodney W. Grimes int behav; 694df8bae1dSRodney W. Grimes }; 695d2d3e875SBruce Evans #endif 6960d94caffSDavid Greenman 697d2c60af8SMatthew Dillon /* 698d2c60af8SMatthew Dillon * MPSAFE 699d2c60af8SMatthew Dillon */ 700df8bae1dSRodney W. Grimes /* ARGSUSED */ 701df8bae1dSRodney W. Grimes int 702b40ce416SJulian Elischer madvise(td, uap) 703b40ce416SJulian Elischer struct thread *td; 704df8bae1dSRodney W. Grimes struct madvise_args *uap; 705df8bae1dSRodney W. Grimes { 706f35329acSJohn Dyson vm_offset_t start, end; 70705ba50f5SJake Burkholder vm_map_t map; 708f4cf2141SWes Peters struct proc *p; 709f4cf2141SWes Peters int error; 710b4309055SMatthew Dillon 711b4309055SMatthew Dillon /* 712f4cf2141SWes Peters * Check for our special case, advising the swap pager we are 713f4cf2141SWes Peters * "immortal." 714f4cf2141SWes Peters */ 715f4cf2141SWes Peters if (uap->behav == MADV_PROTECT) { 716acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MADV_PROTECT); 71769297bf8SJohn Baldwin if (error == 0) { 718f4cf2141SWes Peters p = td->td_proc; 719f4cf2141SWes Peters PROC_LOCK(p); 720f4cf2141SWes Peters p->p_flag |= P_PROTECTED; 721f4cf2141SWes Peters PROC_UNLOCK(p); 72269297bf8SJohn Baldwin } 723f4cf2141SWes Peters return (error); 724f4cf2141SWes Peters } 725f4cf2141SWes Peters /* 726b4309055SMatthew Dillon * Check for illegal behavior 727b4309055SMatthew Dillon */ 7289730a5daSPaul Saab if (uap->behav < 0 || uap->behav > MADV_CORE) 729b4309055SMatthew Dillon return (EINVAL); 730867a482dSJohn Dyson /* 731867a482dSJohn Dyson * Check for illegal addresses. Watch out for address wrap... Note 732867a482dSJohn Dyson * that VM_*_ADDRESS are not constants due to casts (argh). 733867a482dSJohn Dyson */ 73405ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 73505ba50f5SJake Burkholder if ((vm_offset_t)uap->addr < vm_map_min(map) || 73605ba50f5SJake Burkholder (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) 737867a482dSJohn Dyson return (EINVAL); 738867a482dSJohn Dyson if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 739867a482dSJohn Dyson return (EINVAL); 740867a482dSJohn Dyson 741867a482dSJohn Dyson /* 742867a482dSJohn Dyson * Since this routine is only advisory, we default to conservative 743867a482dSJohn Dyson * behavior. 744867a482dSJohn Dyson */ 745cd6eea25SDavid Greenman start = trunc_page((vm_offset_t) uap->addr); 746cd6eea25SDavid Greenman end = round_page((vm_offset_t) uap->addr + uap->len); 747867a482dSJohn Dyson 74805ba50f5SJake Burkholder if (vm_map_madvise(map, start, end, uap->behav)) 749094f6d26SAlan Cox return (EINVAL); 750094f6d26SAlan Cox return (0); 751df8bae1dSRodney W. Grimes } 752df8bae1dSRodney W. Grimes 753d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 754df8bae1dSRodney W. Grimes struct mincore_args { 755651bb817SAlexander Langer const void *addr; 7569154ee6aSPeter Wemm size_t len; 757df8bae1dSRodney W. Grimes char *vec; 758df8bae1dSRodney W. Grimes }; 759d2d3e875SBruce Evans #endif 7600d94caffSDavid Greenman 761d2c60af8SMatthew Dillon /* 762d2c60af8SMatthew Dillon * MPSAFE 763d2c60af8SMatthew Dillon */ 764df8bae1dSRodney W. Grimes /* ARGSUSED */ 765df8bae1dSRodney W. Grimes int 766b40ce416SJulian Elischer mincore(td, uap) 767b40ce416SJulian Elischer struct thread *td; 768df8bae1dSRodney W. Grimes struct mincore_args *uap; 769df8bae1dSRodney W. Grimes { 770867a482dSJohn Dyson vm_offset_t addr, first_addr; 771867a482dSJohn Dyson vm_offset_t end, cend; 772867a482dSJohn Dyson pmap_t pmap; 773867a482dSJohn Dyson vm_map_t map; 77402c04a2fSJohn Dyson char *vec; 775d2c60af8SMatthew Dillon int error = 0; 776867a482dSJohn Dyson int vecindex, lastvecindex; 77754d92145SMatthew Dillon vm_map_entry_t current; 778867a482dSJohn Dyson vm_map_entry_t entry; 779867a482dSJohn Dyson int mincoreinfo; 780dd2622a8SAlan Cox unsigned int timestamp; 781df8bae1dSRodney W. Grimes 782867a482dSJohn Dyson /* 783867a482dSJohn Dyson * Make sure that the addresses presented are valid for user 784867a482dSJohn Dyson * mode. 785867a482dSJohn Dyson */ 786867a482dSJohn Dyson first_addr = addr = trunc_page((vm_offset_t) uap->addr); 7879154ee6aSPeter Wemm end = addr + (vm_size_t)round_page(uap->len); 78805ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 78905ba50f5SJake Burkholder if (end > vm_map_max(map) || end < addr) 790455dd7d4SKonstantin Belousov return (ENOMEM); 79102c04a2fSJohn Dyson 792867a482dSJohn Dyson /* 793867a482dSJohn Dyson * Address of byte vector 794867a482dSJohn Dyson */ 79502c04a2fSJohn Dyson vec = uap->vec; 796867a482dSJohn Dyson 797b40ce416SJulian Elischer pmap = vmspace_pmap(td->td_proc->p_vmspace); 798867a482dSJohn Dyson 799eff50fcdSAlan Cox vm_map_lock_read(map); 800dd2622a8SAlan Cox RestartScan: 801dd2622a8SAlan Cox timestamp = map->timestamp; 802867a482dSJohn Dyson 803455dd7d4SKonstantin Belousov if (!vm_map_lookup_entry(map, addr, &entry)) { 804455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 805455dd7d4SKonstantin Belousov return (ENOMEM); 806455dd7d4SKonstantin Belousov } 807867a482dSJohn Dyson 808867a482dSJohn Dyson /* 809867a482dSJohn Dyson * Do this on a map entry basis so that if the pages are not 810867a482dSJohn Dyson * in the current processes address space, we can easily look 811867a482dSJohn Dyson * up the pages elsewhere. 812867a482dSJohn Dyson */ 813867a482dSJohn Dyson lastvecindex = -1; 814867a482dSJohn Dyson for (current = entry; 815867a482dSJohn Dyson (current != &map->header) && (current->start < end); 816867a482dSJohn Dyson current = current->next) { 817867a482dSJohn Dyson 818867a482dSJohn Dyson /* 819455dd7d4SKonstantin Belousov * check for contiguity 820455dd7d4SKonstantin Belousov */ 821455dd7d4SKonstantin Belousov if (current->end < end && 822455dd7d4SKonstantin Belousov (entry->next == &map->header || 823455dd7d4SKonstantin Belousov current->next->start > current->end)) { 824455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 825455dd7d4SKonstantin Belousov return (ENOMEM); 826455dd7d4SKonstantin Belousov } 827455dd7d4SKonstantin Belousov 828455dd7d4SKonstantin Belousov /* 829867a482dSJohn Dyson * ignore submaps (for now) or null objects 830867a482dSJohn Dyson */ 8319fdfe602SMatthew Dillon if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 832867a482dSJohn Dyson current->object.vm_object == NULL) 833867a482dSJohn Dyson continue; 834867a482dSJohn Dyson 835867a482dSJohn Dyson /* 836867a482dSJohn Dyson * limit this scan to the current map entry and the 837867a482dSJohn Dyson * limits for the mincore call 838867a482dSJohn Dyson */ 839867a482dSJohn Dyson if (addr < current->start) 840867a482dSJohn Dyson addr = current->start; 841867a482dSJohn Dyson cend = current->end; 842867a482dSJohn Dyson if (cend > end) 843867a482dSJohn Dyson cend = end; 844867a482dSJohn Dyson 845867a482dSJohn Dyson /* 846867a482dSJohn Dyson * scan this entry one page at a time 847867a482dSJohn Dyson */ 848867a482dSJohn Dyson while (addr < cend) { 849867a482dSJohn Dyson /* 850867a482dSJohn Dyson * Check pmap first, it is likely faster, also 851867a482dSJohn Dyson * it can provide info as to whether we are the 852867a482dSJohn Dyson * one referencing or modifying the page. 853867a482dSJohn Dyson */ 854867a482dSJohn Dyson mincoreinfo = pmap_mincore(pmap, addr); 855867a482dSJohn Dyson if (!mincoreinfo) { 856867a482dSJohn Dyson vm_pindex_t pindex; 857867a482dSJohn Dyson vm_ooffset_t offset; 858867a482dSJohn Dyson vm_page_t m; 859867a482dSJohn Dyson /* 860867a482dSJohn Dyson * calculate the page index into the object 861867a482dSJohn Dyson */ 862867a482dSJohn Dyson offset = current->offset + (addr - current->start); 863867a482dSJohn Dyson pindex = OFF_TO_IDX(offset); 864bc5b057fSAlan Cox VM_OBJECT_LOCK(current->object.vm_object); 865867a482dSJohn Dyson m = vm_page_lookup(current->object.vm_object, 866867a482dSJohn Dyson pindex); 867867a482dSJohn Dyson /* 868867a482dSJohn Dyson * if the page is resident, then gather information about 869867a482dSJohn Dyson * it. 870867a482dSJohn Dyson */ 871cafe836aSAlan Cox if (m != NULL && m->valid != 0) { 872867a482dSJohn Dyson mincoreinfo = MINCORE_INCORE; 8737ebcee37SAlan Cox vm_page_lock_queues(); 874867a482dSJohn Dyson if (m->dirty || 8750385347cSPeter Wemm pmap_is_modified(m)) 876867a482dSJohn Dyson mincoreinfo |= MINCORE_MODIFIED_OTHER; 877867a482dSJohn Dyson if ((m->flags & PG_REFERENCED) || 8780385347cSPeter Wemm pmap_ts_referenced(m)) { 879e69763a3SDoug Rabson vm_page_flag_set(m, PG_REFERENCED); 880867a482dSJohn Dyson mincoreinfo |= MINCORE_REFERENCED_OTHER; 88102c04a2fSJohn Dyson } 882e80b7b69SAlan Cox vm_page_unlock_queues(); 8839b5a5d81SJohn Dyson } 8847ebcee37SAlan Cox VM_OBJECT_UNLOCK(current->object.vm_object); 8857ebcee37SAlan Cox } 886867a482dSJohn Dyson 887867a482dSJohn Dyson /* 888dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 889dd2622a8SAlan Cox * the map, we release the lock. 890dd2622a8SAlan Cox */ 891dd2622a8SAlan Cox vm_map_unlock_read(map); 892dd2622a8SAlan Cox 893dd2622a8SAlan Cox /* 894867a482dSJohn Dyson * calculate index into user supplied byte vector 895867a482dSJohn Dyson */ 896867a482dSJohn Dyson vecindex = OFF_TO_IDX(addr - first_addr); 897867a482dSJohn Dyson 898867a482dSJohn Dyson /* 899867a482dSJohn Dyson * If we have skipped map entries, we need to make sure that 900867a482dSJohn Dyson * the byte vector is zeroed for those skipped entries. 901867a482dSJohn Dyson */ 902867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 903867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 904867a482dSJohn Dyson if (error) { 905d2c60af8SMatthew Dillon error = EFAULT; 906d2c60af8SMatthew Dillon goto done2; 907867a482dSJohn Dyson } 908867a482dSJohn Dyson ++lastvecindex; 909867a482dSJohn Dyson } 910867a482dSJohn Dyson 911867a482dSJohn Dyson /* 912867a482dSJohn Dyson * Pass the page information to the user 913867a482dSJohn Dyson */ 914867a482dSJohn Dyson error = subyte(vec + vecindex, mincoreinfo); 915867a482dSJohn Dyson if (error) { 916d2c60af8SMatthew Dillon error = EFAULT; 917d2c60af8SMatthew Dillon goto done2; 918867a482dSJohn Dyson } 919dd2622a8SAlan Cox 920dd2622a8SAlan Cox /* 921dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 922dd2622a8SAlan Cox * output may be invalid. 923dd2622a8SAlan Cox */ 924dd2622a8SAlan Cox vm_map_lock_read(map); 925dd2622a8SAlan Cox if (timestamp != map->timestamp) 926dd2622a8SAlan Cox goto RestartScan; 927dd2622a8SAlan Cox 928867a482dSJohn Dyson lastvecindex = vecindex; 92902c04a2fSJohn Dyson addr += PAGE_SIZE; 93002c04a2fSJohn Dyson } 931867a482dSJohn Dyson } 932867a482dSJohn Dyson 933867a482dSJohn Dyson /* 934dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 935dd2622a8SAlan Cox * the map, we release the lock. 936dd2622a8SAlan Cox */ 937dd2622a8SAlan Cox vm_map_unlock_read(map); 938dd2622a8SAlan Cox 939dd2622a8SAlan Cox /* 940867a482dSJohn Dyson * Zero the last entries in the byte vector. 941867a482dSJohn Dyson */ 942867a482dSJohn Dyson vecindex = OFF_TO_IDX(end - first_addr); 943867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 944867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 945867a482dSJohn Dyson if (error) { 946d2c60af8SMatthew Dillon error = EFAULT; 947d2c60af8SMatthew Dillon goto done2; 948867a482dSJohn Dyson } 949867a482dSJohn Dyson ++lastvecindex; 950867a482dSJohn Dyson } 951867a482dSJohn Dyson 952dd2622a8SAlan Cox /* 953dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 954dd2622a8SAlan Cox * output may be invalid. 955dd2622a8SAlan Cox */ 956dd2622a8SAlan Cox vm_map_lock_read(map); 957dd2622a8SAlan Cox if (timestamp != map->timestamp) 958dd2622a8SAlan Cox goto RestartScan; 959eff50fcdSAlan Cox vm_map_unlock_read(map); 960d2c60af8SMatthew Dillon done2: 961d2c60af8SMatthew Dillon return (error); 962df8bae1dSRodney W. Grimes } 963df8bae1dSRodney W. Grimes 964d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 965df8bae1dSRodney W. Grimes struct mlock_args { 966651bb817SAlexander Langer const void *addr; 967df8bae1dSRodney W. Grimes size_t len; 968df8bae1dSRodney W. Grimes }; 969d2d3e875SBruce Evans #endif 970d2c60af8SMatthew Dillon /* 971d2c60af8SMatthew Dillon * MPSAFE 972d2c60af8SMatthew Dillon */ 973df8bae1dSRodney W. Grimes int 974b40ce416SJulian Elischer mlock(td, uap) 975b40ce416SJulian Elischer struct thread *td; 976df8bae1dSRodney W. Grimes struct mlock_args *uap; 977df8bae1dSRodney W. Grimes { 978f0ea4612SDon Lewis struct proc *proc; 979bb734798SDon Lewis vm_offset_t addr, end, last, start; 980bb734798SDon Lewis vm_size_t npages, size; 981bb734798SDon Lewis int error; 982df8bae1dSRodney W. Grimes 983acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MLOCK); 98447934cefSDon Lewis if (error) 98547934cefSDon Lewis return (error); 98616929939SDon Lewis addr = (vm_offset_t)uap->addr; 98716929939SDon Lewis size = uap->len; 988bb734798SDon Lewis last = addr + size; 98916929939SDon Lewis start = trunc_page(addr); 990bb734798SDon Lewis end = round_page(last); 991bb734798SDon Lewis if (last < addr || end < addr) 992df8bae1dSRodney W. Grimes return (EINVAL); 99316929939SDon Lewis npages = atop(end - start); 99416929939SDon Lewis if (npages > vm_page_max_wired) 99516929939SDon Lewis return (ENOMEM); 996f0ea4612SDon Lewis proc = td->td_proc; 99747934cefSDon Lewis PROC_LOCK(proc); 998bb734798SDon Lewis if (ptoa(npages + 999bb734798SDon Lewis pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map))) > 1000bb734798SDon Lewis lim_cur(proc, RLIMIT_MEMLOCK)) { 100147934cefSDon Lewis PROC_UNLOCK(proc); 10024a40e3d4SJohn Dyson return (ENOMEM); 100391d5354aSJohn Baldwin } 100447934cefSDon Lewis PROC_UNLOCK(proc); 10052feb50bfSAttilio Rao if (npages + cnt.v_wire_count > vm_page_max_wired) 100616929939SDon Lewis return (EAGAIN); 100716929939SDon Lewis error = vm_map_wire(&proc->p_vmspace->vm_map, start, end, 100816929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1009df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1010df8bae1dSRodney W. Grimes } 1011df8bae1dSRodney W. Grimes 1012d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 10134a40e3d4SJohn Dyson struct mlockall_args { 10144a40e3d4SJohn Dyson int how; 10154a40e3d4SJohn Dyson }; 10164a40e3d4SJohn Dyson #endif 10174a40e3d4SJohn Dyson 1018d2c60af8SMatthew Dillon /* 1019d2c60af8SMatthew Dillon * MPSAFE 1020d2c60af8SMatthew Dillon */ 10214a40e3d4SJohn Dyson int 1022b40ce416SJulian Elischer mlockall(td, uap) 1023b40ce416SJulian Elischer struct thread *td; 10244a40e3d4SJohn Dyson struct mlockall_args *uap; 10254a40e3d4SJohn Dyson { 1026abd498aaSBruce M Simpson vm_map_t map; 1027abd498aaSBruce M Simpson int error; 1028abd498aaSBruce M Simpson 1029abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 1030abd498aaSBruce M Simpson error = 0; 1031abd498aaSBruce M Simpson 1032abd498aaSBruce M Simpson if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1033abd498aaSBruce M Simpson return (EINVAL); 1034abd498aaSBruce M Simpson 103511f7ddc5SBruce M Simpson #if 0 1036abd498aaSBruce M Simpson /* 1037abd498aaSBruce M Simpson * If wiring all pages in the process would cause it to exceed 1038abd498aaSBruce M Simpson * a hard resource limit, return ENOMEM. 1039abd498aaSBruce M Simpson */ 104091d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 1041abd498aaSBruce M Simpson if (map->size - ptoa(pmap_wired_count(vm_map_pmap(map)) > 104291d5354aSJohn Baldwin lim_cur(td->td_proc, RLIMIT_MEMLOCK))) { 104391d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1044abd498aaSBruce M Simpson return (ENOMEM); 104591d5354aSJohn Baldwin } 104691d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1047abd498aaSBruce M Simpson #else 1048acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MLOCK); 1049abd498aaSBruce M Simpson if (error) 1050abd498aaSBruce M Simpson return (error); 1051abd498aaSBruce M Simpson #endif 1052abd498aaSBruce M Simpson 1053abd498aaSBruce M Simpson if (uap->how & MCL_FUTURE) { 1054abd498aaSBruce M Simpson vm_map_lock(map); 1055abd498aaSBruce M Simpson vm_map_modflags(map, MAP_WIREFUTURE, 0); 1056abd498aaSBruce M Simpson vm_map_unlock(map); 1057abd498aaSBruce M Simpson error = 0; 1058abd498aaSBruce M Simpson } 1059abd498aaSBruce M Simpson 1060abd498aaSBruce M Simpson if (uap->how & MCL_CURRENT) { 1061abd498aaSBruce M Simpson /* 1062abd498aaSBruce M Simpson * P1003.1-2001 mandates that all currently mapped pages 1063abd498aaSBruce M Simpson * will be memory resident and locked (wired) upon return 1064abd498aaSBruce M Simpson * from mlockall(). vm_map_wire() will wire pages, by 1065abd498aaSBruce M Simpson * calling vm_fault_wire() for each page in the region. 1066abd498aaSBruce M Simpson */ 1067abd498aaSBruce M Simpson error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1068abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1069abd498aaSBruce M Simpson error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1070abd498aaSBruce M Simpson } 1071abd498aaSBruce M Simpson 1072abd498aaSBruce M Simpson return (error); 10734a40e3d4SJohn Dyson } 10744a40e3d4SJohn Dyson 10754a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1076fa721254SAlfred Perlstein struct munlockall_args { 1077abd498aaSBruce M Simpson register_t dummy; 10784a40e3d4SJohn Dyson }; 10794a40e3d4SJohn Dyson #endif 10804a40e3d4SJohn Dyson 1081d2c60af8SMatthew Dillon /* 1082d2c60af8SMatthew Dillon * MPSAFE 1083d2c60af8SMatthew Dillon */ 10844a40e3d4SJohn Dyson int 1085b40ce416SJulian Elischer munlockall(td, uap) 1086b40ce416SJulian Elischer struct thread *td; 10874a40e3d4SJohn Dyson struct munlockall_args *uap; 10884a40e3d4SJohn Dyson { 1089abd498aaSBruce M Simpson vm_map_t map; 1090abd498aaSBruce M Simpson int error; 1091abd498aaSBruce M Simpson 1092abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 1093acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 1094abd498aaSBruce M Simpson if (error) 1095abd498aaSBruce M Simpson return (error); 1096abd498aaSBruce M Simpson 1097abd498aaSBruce M Simpson /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1098abd498aaSBruce M Simpson vm_map_lock(map); 1099abd498aaSBruce M Simpson vm_map_modflags(map, 0, MAP_WIREFUTURE); 1100abd498aaSBruce M Simpson vm_map_unlock(map); 1101abd498aaSBruce M Simpson 1102abd498aaSBruce M Simpson /* Forcibly unwire all pages. */ 1103abd498aaSBruce M Simpson error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1104abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1105abd498aaSBruce M Simpson 1106abd498aaSBruce M Simpson return (error); 11074a40e3d4SJohn Dyson } 11084a40e3d4SJohn Dyson 11094a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1110df8bae1dSRodney W. Grimes struct munlock_args { 1111651bb817SAlexander Langer const void *addr; 1112df8bae1dSRodney W. Grimes size_t len; 1113df8bae1dSRodney W. Grimes }; 1114d2d3e875SBruce Evans #endif 1115d2c60af8SMatthew Dillon /* 1116d2c60af8SMatthew Dillon * MPSAFE 1117d2c60af8SMatthew Dillon */ 1118df8bae1dSRodney W. Grimes int 1119b40ce416SJulian Elischer munlock(td, uap) 1120b40ce416SJulian Elischer struct thread *td; 1121df8bae1dSRodney W. Grimes struct munlock_args *uap; 1122df8bae1dSRodney W. Grimes { 1123bb734798SDon Lewis vm_offset_t addr, end, last, start; 112416929939SDon Lewis vm_size_t size; 1125df8bae1dSRodney W. Grimes int error; 1126df8bae1dSRodney W. Grimes 1127acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 112847934cefSDon Lewis if (error) 112947934cefSDon Lewis return (error); 113016929939SDon Lewis addr = (vm_offset_t)uap->addr; 113116929939SDon Lewis size = uap->len; 1132bb734798SDon Lewis last = addr + size; 113316929939SDon Lewis start = trunc_page(addr); 1134bb734798SDon Lewis end = round_page(last); 1135bb734798SDon Lewis if (last < addr || end < addr) 1136df8bae1dSRodney W. Grimes return (EINVAL); 113716929939SDon Lewis error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 113816929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1139df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1140df8bae1dSRodney W. Grimes } 1141df8bae1dSRodney W. Grimes 1142df8bae1dSRodney W. Grimes /* 1143c8daea13SAlexander Kabaev * vm_mmap_vnode() 1144c8daea13SAlexander Kabaev * 1145c8daea13SAlexander Kabaev * MPSAFE 1146c8daea13SAlexander Kabaev * 1147c8daea13SAlexander Kabaev * Helper function for vm_mmap. Perform sanity check specific for mmap 1148c8daea13SAlexander Kabaev * operations on vnodes. 1149c8daea13SAlexander Kabaev */ 1150c8daea13SAlexander Kabaev int 1151c8daea13SAlexander Kabaev vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1152c8daea13SAlexander Kabaev vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1153c8daea13SAlexander Kabaev struct vnode *vp, vm_ooffset_t foff, vm_object_t *objp) 1154c8daea13SAlexander Kabaev { 1155c8daea13SAlexander Kabaev struct vattr va; 1156c8daea13SAlexander Kabaev void *handle; 1157c8daea13SAlexander Kabaev vm_object_t obj; 1158ae51ff11SJeff Roberson struct mount *mp; 115991a35e78SKonstantin Belousov struct cdevsw *dsw; 11600359a12eSAttilio Rao struct ucred *cred; 116123fc1a90SPoul-Henning Kamp int error, flags, type; 1162ae51ff11SJeff Roberson int vfslocked; 1163c8daea13SAlexander Kabaev 1164ae51ff11SJeff Roberson mp = vp->v_mount; 11650359a12eSAttilio Rao cred = td->td_ucred; 1166ae51ff11SJeff Roberson vfslocked = VFS_LOCK_GIANT(mp); 1167c8daea13SAlexander Kabaev if ((error = vget(vp, LK_EXCLUSIVE, td)) != 0) { 1168ae51ff11SJeff Roberson VFS_UNLOCK_GIANT(vfslocked); 1169c8daea13SAlexander Kabaev return (error); 1170c8daea13SAlexander Kabaev } 1171c8daea13SAlexander Kabaev flags = *flagsp; 11728516dd18SPoul-Henning Kamp obj = vp->v_object; 1173c8daea13SAlexander Kabaev if (vp->v_type == VREG) { 1174c8daea13SAlexander Kabaev /* 1175c8daea13SAlexander Kabaev * Get the proper underlying object 1176c8daea13SAlexander Kabaev */ 11778516dd18SPoul-Henning Kamp if (obj == NULL) { 1178c8daea13SAlexander Kabaev error = EINVAL; 1179c8daea13SAlexander Kabaev goto done; 1180c8daea13SAlexander Kabaev } 1181c8daea13SAlexander Kabaev if (obj->handle != vp) { 1182c8daea13SAlexander Kabaev vput(vp); 1183c8daea13SAlexander Kabaev vp = (struct vnode*)obj->handle; 1184c8daea13SAlexander Kabaev vget(vp, LK_EXCLUSIVE, td); 1185c8daea13SAlexander Kabaev } 1186c8daea13SAlexander Kabaev type = OBJT_VNODE; 1187c8daea13SAlexander Kabaev handle = vp; 1188c8daea13SAlexander Kabaev } else if (vp->v_type == VCHR) { 1189c8daea13SAlexander Kabaev type = OBJT_DEVICE; 1190c8daea13SAlexander Kabaev handle = vp->v_rdev; 1191c8daea13SAlexander Kabaev 119291a35e78SKonstantin Belousov dsw = dev_refthread(handle); 119391a35e78SKonstantin Belousov if (dsw == NULL) { 119491a35e78SKonstantin Belousov error = ENXIO; 119591a35e78SKonstantin Belousov goto done; 119691a35e78SKonstantin Belousov } 119791a35e78SKonstantin Belousov if (dsw->d_flags & D_MMAP_ANON) { 119891a35e78SKonstantin Belousov dev_relthread(handle); 1199c8daea13SAlexander Kabaev *maxprotp = VM_PROT_ALL; 1200c8daea13SAlexander Kabaev *flagsp |= MAP_ANON; 1201c8daea13SAlexander Kabaev error = 0; 1202c8daea13SAlexander Kabaev goto done; 1203c8daea13SAlexander Kabaev } 120491a35e78SKonstantin Belousov dev_relthread(handle); 1205c8daea13SAlexander Kabaev /* 1206c8daea13SAlexander Kabaev * cdevs does not provide private mappings of any kind. 1207c8daea13SAlexander Kabaev */ 1208ce7a036dSAlexander Kabaev if ((*maxprotp & VM_PROT_WRITE) == 0 && 1209ce7a036dSAlexander Kabaev (prot & PROT_WRITE) != 0) { 1210ce7a036dSAlexander Kabaev error = EACCES; 1211ce7a036dSAlexander Kabaev goto done; 1212ce7a036dSAlexander Kabaev } 121323fc1a90SPoul-Henning Kamp if (flags & (MAP_PRIVATE|MAP_COPY)) { 1214c8daea13SAlexander Kabaev error = EINVAL; 1215c8daea13SAlexander Kabaev goto done; 1216c8daea13SAlexander Kabaev } 1217c8daea13SAlexander Kabaev /* 1218c8daea13SAlexander Kabaev * Force device mappings to be shared. 1219c8daea13SAlexander Kabaev */ 1220c8daea13SAlexander Kabaev flags |= MAP_SHARED; 1221c8daea13SAlexander Kabaev } else { 1222c8daea13SAlexander Kabaev error = EINVAL; 1223c8daea13SAlexander Kabaev goto done; 1224c8daea13SAlexander Kabaev } 12250359a12eSAttilio Rao if ((error = VOP_GETATTR(vp, &va, cred))) 1226c8daea13SAlexander Kabaev goto done; 1227c92163dcSChristian S.J. Peron #ifdef MAC 12280359a12eSAttilio Rao error = mac_vnode_check_mmap(cred, vp, prot, flags); 1229c92163dcSChristian S.J. Peron if (error != 0) 1230c92163dcSChristian S.J. Peron goto done; 1231c92163dcSChristian S.J. Peron #endif 1232c8daea13SAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 1233c8daea13SAlexander Kabaev if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1234c8daea13SAlexander Kabaev if (prot & PROT_WRITE) { 1235c8daea13SAlexander Kabaev error = EPERM; 1236c8daea13SAlexander Kabaev goto done; 1237c8daea13SAlexander Kabaev } 1238c8daea13SAlexander Kabaev *maxprotp &= ~VM_PROT_WRITE; 1239c8daea13SAlexander Kabaev } 1240c8daea13SAlexander Kabaev } 1241c8daea13SAlexander Kabaev /* 1242c8daea13SAlexander Kabaev * If it is a regular file without any references 1243c8daea13SAlexander Kabaev * we do not need to sync it. 1244c8daea13SAlexander Kabaev * Adjust object size to be the size of actual file. 1245c8daea13SAlexander Kabaev */ 1246c8daea13SAlexander Kabaev if (vp->v_type == VREG) { 1247c8daea13SAlexander Kabaev objsize = round_page(va.va_size); 1248c8daea13SAlexander Kabaev if (va.va_nlink == 0) 1249c8daea13SAlexander Kabaev flags |= MAP_NOSYNC; 1250c8daea13SAlexander Kabaev } 1251c8daea13SAlexander Kabaev obj = vm_pager_allocate(type, handle, objsize, prot, foff); 1252c8daea13SAlexander Kabaev if (obj == NULL) { 1253c8daea13SAlexander Kabaev error = (type == OBJT_DEVICE ? EINVAL : ENOMEM); 1254c8daea13SAlexander Kabaev goto done; 1255c8daea13SAlexander Kabaev } 1256c8daea13SAlexander Kabaev *objp = obj; 1257c8daea13SAlexander Kabaev *flagsp = flags; 12580359a12eSAttilio Rao vfs_mark_atime(vp, cred); 12591e309003SDiomidis Spinellis 1260c8daea13SAlexander Kabaev done: 1261c8daea13SAlexander Kabaev vput(vp); 1262ae51ff11SJeff Roberson VFS_UNLOCK_GIANT(vfslocked); 1263c8daea13SAlexander Kabaev return (error); 1264c8daea13SAlexander Kabaev } 1265c8daea13SAlexander Kabaev 1266c8daea13SAlexander Kabaev /* 126798df9218SJohn Baldwin * vm_mmap_cdev() 126898df9218SJohn Baldwin * 126998df9218SJohn Baldwin * MPSAFE 127098df9218SJohn Baldwin * 127198df9218SJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 127298df9218SJohn Baldwin * operations on cdevs. 127398df9218SJohn Baldwin */ 127498df9218SJohn Baldwin int 127598df9218SJohn Baldwin vm_mmap_cdev(struct thread *td, vm_size_t objsize, 127698df9218SJohn Baldwin vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 127798df9218SJohn Baldwin struct cdev *cdev, vm_ooffset_t foff, vm_object_t *objp) 127898df9218SJohn Baldwin { 127998df9218SJohn Baldwin vm_object_t obj; 128091a35e78SKonstantin Belousov struct cdevsw *dsw; 128198df9218SJohn Baldwin int flags; 128298df9218SJohn Baldwin 128398df9218SJohn Baldwin flags = *flagsp; 128498df9218SJohn Baldwin 128591a35e78SKonstantin Belousov dsw = dev_refthread(cdev); 128691a35e78SKonstantin Belousov if (dsw == NULL) 128791a35e78SKonstantin Belousov return (ENXIO); 128891a35e78SKonstantin Belousov if (dsw->d_flags & D_MMAP_ANON) { 128991a35e78SKonstantin Belousov dev_relthread(cdev); 129098df9218SJohn Baldwin *maxprotp = VM_PROT_ALL; 129198df9218SJohn Baldwin *flagsp |= MAP_ANON; 129298df9218SJohn Baldwin return (0); 129398df9218SJohn Baldwin } 129491a35e78SKonstantin Belousov dev_relthread(cdev); 129598df9218SJohn Baldwin /* 129698df9218SJohn Baldwin * cdevs does not provide private mappings of any kind. 129798df9218SJohn Baldwin */ 129898df9218SJohn Baldwin if ((*maxprotp & VM_PROT_WRITE) == 0 && 129998df9218SJohn Baldwin (prot & PROT_WRITE) != 0) 130098df9218SJohn Baldwin return (EACCES); 130198df9218SJohn Baldwin if (flags & (MAP_PRIVATE|MAP_COPY)) 130298df9218SJohn Baldwin return (EINVAL); 130398df9218SJohn Baldwin /* 130498df9218SJohn Baldwin * Force device mappings to be shared. 130598df9218SJohn Baldwin */ 130698df9218SJohn Baldwin flags |= MAP_SHARED; 130798df9218SJohn Baldwin #ifdef MAC_XXX 130898df9218SJohn Baldwin error = mac_check_cdev_mmap(td->td_ucred, cdev, prot); 130998df9218SJohn Baldwin if (error != 0) 131098df9218SJohn Baldwin return (error); 131198df9218SJohn Baldwin #endif 131298df9218SJohn Baldwin obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, foff); 131398df9218SJohn Baldwin if (obj == NULL) 131498df9218SJohn Baldwin return (EINVAL); 131598df9218SJohn Baldwin *objp = obj; 131698df9218SJohn Baldwin *flagsp = flags; 131798df9218SJohn Baldwin return (0); 131898df9218SJohn Baldwin } 131998df9218SJohn Baldwin 132098df9218SJohn Baldwin /* 13218e38aeffSJohn Baldwin * vm_mmap_shm() 13228e38aeffSJohn Baldwin * 13238e38aeffSJohn Baldwin * MPSAFE 13248e38aeffSJohn Baldwin * 13258e38aeffSJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 13268e38aeffSJohn Baldwin * operations on shm file descriptors. 13278e38aeffSJohn Baldwin */ 13288e38aeffSJohn Baldwin int 13298e38aeffSJohn Baldwin vm_mmap_shm(struct thread *td, vm_size_t objsize, 13308e38aeffSJohn Baldwin vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 13318e38aeffSJohn Baldwin struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp) 13328e38aeffSJohn Baldwin { 13338e38aeffSJohn Baldwin int error; 13348e38aeffSJohn Baldwin 13358e38aeffSJohn Baldwin if ((*maxprotp & VM_PROT_WRITE) == 0 && 13368e38aeffSJohn Baldwin (prot & PROT_WRITE) != 0) 13378e38aeffSJohn Baldwin return (EACCES); 13388e38aeffSJohn Baldwin #ifdef MAC 13398e38aeffSJohn Baldwin error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp); 13408e38aeffSJohn Baldwin if (error != 0) 13418e38aeffSJohn Baldwin return (error); 13428e38aeffSJohn Baldwin #endif 13438e38aeffSJohn Baldwin error = shm_mmap(shmfd, objsize, foff, objp); 13448e38aeffSJohn Baldwin if (error) 13458e38aeffSJohn Baldwin return (error); 13468e38aeffSJohn Baldwin return (0); 13478e38aeffSJohn Baldwin } 13488e38aeffSJohn Baldwin 13498e38aeffSJohn Baldwin /* 1350d2c60af8SMatthew Dillon * vm_mmap() 1351d2c60af8SMatthew Dillon * 1352d2c60af8SMatthew Dillon * MPSAFE 1353d2c60af8SMatthew Dillon * 1354d2c60af8SMatthew Dillon * Internal version of mmap. Currently used by mmap, exec, and sys5 1355d2c60af8SMatthew Dillon * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. 1356df8bae1dSRodney W. Grimes */ 1357df8bae1dSRodney W. Grimes int 1358b9dcd593SBruce Evans vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1359b9dcd593SBruce Evans vm_prot_t maxprot, int flags, 136098df9218SJohn Baldwin objtype_t handle_type, void *handle, 1361b9dcd593SBruce Evans vm_ooffset_t foff) 1362df8bae1dSRodney W. Grimes { 1363df8bae1dSRodney W. Grimes boolean_t fitit; 13646bda842dSMatt Jacob vm_object_t object = NULL; 1365df8bae1dSRodney W. Grimes int rv = KERN_SUCCESS; 136620eec4bbSAlan Cox int docow, error; 1367b40ce416SJulian Elischer struct thread *td = curthread; 1368df8bae1dSRodney W. Grimes 1369df8bae1dSRodney W. Grimes if (size == 0) 1370df8bae1dSRodney W. Grimes return (0); 1371df8bae1dSRodney W. Grimes 1372749474f2SPeter Wemm size = round_page(size); 1373df8bae1dSRodney W. Grimes 137491d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 1375070f64feSMatthew Dillon if (td->td_proc->p_vmspace->vm_map.size + size > 137691d5354aSJohn Baldwin lim_cur(td->td_proc, RLIMIT_VMEM)) { 137791d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1378070f64feSMatthew Dillon return(ENOMEM); 1379070f64feSMatthew Dillon } 138091d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1381070f64feSMatthew Dillon 1382df8bae1dSRodney W. Grimes /* 1383bc9ad247SDavid Greenman * We currently can only deal with page aligned file offsets. 1384bc9ad247SDavid Greenman * The check is here rather than in the syscall because the 1385bc9ad247SDavid Greenman * kernel calls this function internally for other mmaping 1386bc9ad247SDavid Greenman * operations (such as in exec) and non-aligned offsets will 1387bc9ad247SDavid Greenman * cause pmap inconsistencies...so we want to be sure to 1388bc9ad247SDavid Greenman * disallow this in all cases. 1389bc9ad247SDavid Greenman */ 1390bc9ad247SDavid Greenman if (foff & PAGE_MASK) 1391bc9ad247SDavid Greenman return (EINVAL); 1392bc9ad247SDavid Greenman 139306cb7259SDavid Greenman if ((flags & MAP_FIXED) == 0) { 139406cb7259SDavid Greenman fitit = TRUE; 139506cb7259SDavid Greenman *addr = round_page(*addr); 139606cb7259SDavid Greenman } else { 139706cb7259SDavid Greenman if (*addr != trunc_page(*addr)) 139806cb7259SDavid Greenman return (EINVAL); 139906cb7259SDavid Greenman fitit = FALSE; 140006cb7259SDavid Greenman } 1401bc9ad247SDavid Greenman /* 140224a1cce3SDavid Greenman * Lookup/allocate object. 1403df8bae1dSRodney W. Grimes */ 140498df9218SJohn Baldwin switch (handle_type) { 140598df9218SJohn Baldwin case OBJT_DEVICE: 140698df9218SJohn Baldwin error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, 140798df9218SJohn Baldwin handle, foff, &object); 140898df9218SJohn Baldwin break; 140998df9218SJohn Baldwin case OBJT_VNODE: 1410c8daea13SAlexander Kabaev error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 1411c8daea13SAlexander Kabaev handle, foff, &object); 141298df9218SJohn Baldwin break; 14138e38aeffSJohn Baldwin case OBJT_SWAP: 14148e38aeffSJohn Baldwin error = vm_mmap_shm(td, size, prot, &maxprot, &flags, 14158e38aeffSJohn Baldwin handle, foff, &object); 14168e38aeffSJohn Baldwin break; 141798df9218SJohn Baldwin case OBJT_DEFAULT: 141898df9218SJohn Baldwin if (handle == NULL) { 141998df9218SJohn Baldwin error = 0; 142098df9218SJohn Baldwin break; 142198df9218SJohn Baldwin } 142298df9218SJohn Baldwin /* FALLTHROUGH */ 142398df9218SJohn Baldwin default: 142498df9218SJohn Baldwin error = EINVAL; 14256bda842dSMatt Jacob break; 142698df9218SJohn Baldwin } 142798df9218SJohn Baldwin if (error) 1428c8daea13SAlexander Kabaev return (error); 14295f55e841SDavid Greenman if (flags & MAP_ANON) { 1430c8daea13SAlexander Kabaev object = NULL; 1431c8daea13SAlexander Kabaev docow = 0; 14325f55e841SDavid Greenman /* 14335f55e841SDavid Greenman * Unnamed anonymous regions always start at 0. 14345f55e841SDavid Greenman */ 143567bf6868SJohn Dyson if (handle == 0) 14365f55e841SDavid Greenman foff = 0; 14375f55e841SDavid Greenman } else { 14384738fa09SAlan Cox docow = MAP_PREFAULT_PARTIAL; 143994328e90SJohn Dyson } 1440df8bae1dSRodney W. Grimes 14414f79d873SMatthew Dillon if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 14424738fa09SAlan Cox docow |= MAP_COPY_ON_WRITE; 14434f79d873SMatthew Dillon if (flags & MAP_NOSYNC) 14444f79d873SMatthew Dillon docow |= MAP_DISABLE_SYNCER; 14459730a5daSPaul Saab if (flags & MAP_NOCORE) 14469730a5daSPaul Saab docow |= MAP_DISABLE_COREDUMP; 14475850152dSJohn Dyson 1448d0aea04fSJohn Dyson #if defined(VM_PROT_READ_IS_EXEC) 1449d0aea04fSJohn Dyson if (prot & VM_PROT_READ) 1450d0aea04fSJohn Dyson prot |= VM_PROT_EXECUTE; 1451d0aea04fSJohn Dyson 1452d0aea04fSJohn Dyson if (maxprot & VM_PROT_READ) 1453d0aea04fSJohn Dyson maxprot |= VM_PROT_EXECUTE; 1454d0aea04fSJohn Dyson #endif 1455d0aea04fSJohn Dyson 14562267af78SJulian Elischer if (flags & MAP_STACK) 1457fd75d710SMarcel Moolenaar rv = vm_map_stack(map, *addr, size, prot, maxprot, 1458fd75d710SMarcel Moolenaar docow | MAP_STACK_GROWS_DOWN); 1459d239bd3cSKonstantin Belousov else if (fitit) 1460d0a83a83SAlan Cox rv = vm_map_find(map, object, foff, addr, size, 1461d0a83a83SAlan Cox object != NULL && object->type == OBJT_DEVICE ? 1462d0a83a83SAlan Cox VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, prot, maxprot, docow); 14632267af78SJulian Elischer else 1464b8ca4ef2SAlan Cox rv = vm_map_fixed(map, object, foff, *addr, size, 1465bd7e5f99SJohn Dyson prot, maxprot, docow); 1466bd7e5f99SJohn Dyson 1467d2c60af8SMatthew Dillon if (rv != KERN_SUCCESS) { 14687fb0c17eSDavid Greenman /* 146924a1cce3SDavid Greenman * Lose the object reference. Will destroy the 147024a1cce3SDavid Greenman * object if it's an unnamed anonymous mapping 147124a1cce3SDavid Greenman * or named anonymous without other references. 14727fb0c17eSDavid Greenman */ 1473df8bae1dSRodney W. Grimes vm_object_deallocate(object); 1474d2c60af8SMatthew Dillon } else if (flags & MAP_SHARED) { 1475df8bae1dSRodney W. Grimes /* 1476df8bae1dSRodney W. Grimes * Shared memory is also shared with children. 1477df8bae1dSRodney W. Grimes */ 1478df8bae1dSRodney W. Grimes rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 1479e4ca250dSJohn Baldwin if (rv != KERN_SUCCESS) 14807fb0c17eSDavid Greenman (void) vm_map_remove(map, *addr, *addr + size); 1481df8bae1dSRodney W. Grimes } 1482abd498aaSBruce M Simpson 1483abd498aaSBruce M Simpson /* 1484abd498aaSBruce M Simpson * If the process has requested that all future mappings 1485abd498aaSBruce M Simpson * be wired, then heed this. 1486abd498aaSBruce M Simpson */ 1487abd498aaSBruce M Simpson if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE)) 1488abd498aaSBruce M Simpson vm_map_wire(map, *addr, *addr + size, 1489abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES); 1490abd498aaSBruce M Simpson 1491df8bae1dSRodney W. Grimes switch (rv) { 1492df8bae1dSRodney W. Grimes case KERN_SUCCESS: 1493df8bae1dSRodney W. Grimes return (0); 1494df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 1495df8bae1dSRodney W. Grimes case KERN_NO_SPACE: 1496df8bae1dSRodney W. Grimes return (ENOMEM); 1497df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 1498df8bae1dSRodney W. Grimes return (EACCES); 1499df8bae1dSRodney W. Grimes default: 1500df8bae1dSRodney W. Grimes return (EINVAL); 1501df8bae1dSRodney W. Grimes } 1502df8bae1dSRodney W. Grimes } 1503