160727d8bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1988 University of Utah. 3df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 4df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 5df8bae1dSRodney W. Grimes * 6df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 7df8bae1dSRodney W. Grimes * the Systems Programming Group of the University of Utah Computer 8df8bae1dSRodney W. Grimes * Science Department. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 19df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 20df8bae1dSRodney W. Grimes * without specific prior written permission. 21df8bae1dSRodney W. Grimes * 22df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32df8bae1dSRodney W. Grimes * SUCH DAMAGE. 33df8bae1dSRodney W. Grimes * 34df8bae1dSRodney W. Grimes * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 35df8bae1dSRodney W. Grimes * 36df8bae1dSRodney W. Grimes * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 37df8bae1dSRodney W. Grimes */ 38df8bae1dSRodney W. Grimes 39df8bae1dSRodney W. Grimes /* 40df8bae1dSRodney W. Grimes * Mapped file (mmap) interface to VM 41df8bae1dSRodney W. Grimes */ 42df8bae1dSRodney W. Grimes 43874651b1SDavid E. O'Brien #include <sys/cdefs.h> 44874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 45874651b1SDavid E. O'Brien 465591b823SEivind Eklund #include "opt_compat.h" 4749874f6eSJoseph Koshy #include "opt_hwpmc_hooks.h" 483e732e7dSRobert Watson #include "opt_mac.h" 49e9822d92SJoerg Wunsch 50df8bae1dSRodney W. Grimes #include <sys/param.h> 51df8bae1dSRodney W. Grimes #include <sys/systm.h> 52fb919e4dSMark Murray #include <sys/kernel.h> 53fb919e4dSMark Murray #include <sys/lock.h> 5423955314SAlfred Perlstein #include <sys/mutex.h> 55d2d3e875SBruce Evans #include <sys/sysproto.h> 56df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 57acd3428bSRobert Watson #include <sys/priv.h> 58df8bae1dSRodney W. Grimes #include <sys/proc.h> 59070f64feSMatthew Dillon #include <sys/resource.h> 60070f64feSMatthew Dillon #include <sys/resourcevar.h> 61df8bae1dSRodney W. Grimes #include <sys/vnode.h> 623ac4d1efSBruce Evans #include <sys/fcntl.h> 63df8bae1dSRodney W. Grimes #include <sys/file.h> 64df8bae1dSRodney W. Grimes #include <sys/mman.h> 65b483c7f6SGuido van Rooij #include <sys/mount.h> 66df8bae1dSRodney W. Grimes #include <sys/conf.h> 674183b6b6SPeter Wemm #include <sys/stat.h> 68efeaf95aSDavid Greenman #include <sys/vmmeter.h> 691f6889a1SMatthew Dillon #include <sys/sysctl.h> 70df8bae1dSRodney W. Grimes 71aed55708SRobert Watson #include <security/mac/mac_framework.h> 72aed55708SRobert Watson 73df8bae1dSRodney W. Grimes #include <vm/vm.h> 74efeaf95aSDavid Greenman #include <vm/vm_param.h> 75efeaf95aSDavid Greenman #include <vm/pmap.h> 76efeaf95aSDavid Greenman #include <vm/vm_map.h> 77efeaf95aSDavid Greenman #include <vm/vm_object.h> 781c7c3c6aSMatthew Dillon #include <vm/vm_page.h> 79df8bae1dSRodney W. Grimes #include <vm/vm_pager.h> 80b5e8ce9fSBruce Evans #include <vm/vm_pageout.h> 81efeaf95aSDavid Greenman #include <vm/vm_extern.h> 82867a482dSJohn Dyson #include <vm/vm_page.h> 831f6889a1SMatthew Dillon #include <vm/vm_kern.h> 84df8bae1dSRodney W. Grimes 8549874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 8649874f6eSJoseph Koshy #include <sys/pmckern.h> 8749874f6eSJoseph Koshy #endif 8849874f6eSJoseph Koshy 89d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 90df8bae1dSRodney W. Grimes struct sbrk_args { 91df8bae1dSRodney W. Grimes int incr; 92df8bae1dSRodney W. Grimes }; 93d2d3e875SBruce Evans #endif 940d94caffSDavid Greenman 951f6889a1SMatthew Dillon static int max_proc_mmap; 961f6889a1SMatthew Dillon SYSCTL_INT(_vm, OID_AUTO, max_proc_mmap, CTLFLAG_RW, &max_proc_mmap, 0, ""); 971f6889a1SMatthew Dillon 981f6889a1SMatthew Dillon /* 991f6889a1SMatthew Dillon * Set the maximum number of vm_map_entry structures per process. Roughly 1001f6889a1SMatthew Dillon * speaking vm_map_entry structures are tiny, so allowing them to eat 1/100 1011f6889a1SMatthew Dillon * of our KVM malloc space still results in generous limits. We want a 1021f6889a1SMatthew Dillon * default that is good enough to prevent the kernel running out of resources 1031f6889a1SMatthew Dillon * if attacked from compromised user account but generous enough such that 1041f6889a1SMatthew Dillon * multi-threaded processes are not unduly inconvenienced. 1051f6889a1SMatthew Dillon */ 10611caded3SAlfred Perlstein static void vmmapentry_rsrc_init(void *); 1071f6889a1SMatthew Dillon SYSINIT(vmmersrc, SI_SUB_KVM_RSRC, SI_ORDER_FIRST, vmmapentry_rsrc_init, NULL) 1081f6889a1SMatthew Dillon 1091f6889a1SMatthew Dillon static void 1101f6889a1SMatthew Dillon vmmapentry_rsrc_init(dummy) 1111f6889a1SMatthew Dillon void *dummy; 1121f6889a1SMatthew Dillon { 1131f6889a1SMatthew Dillon max_proc_mmap = vm_kmem_size / sizeof(struct vm_map_entry); 1141f6889a1SMatthew Dillon max_proc_mmap /= 100; 1151f6889a1SMatthew Dillon } 1161f6889a1SMatthew Dillon 117c8daea13SAlexander Kabaev static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 118c8daea13SAlexander Kabaev int *, struct vnode *, vm_ooffset_t, vm_object_t *); 11998df9218SJohn Baldwin static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 12098df9218SJohn Baldwin int *, struct cdev *, vm_ooffset_t, vm_object_t *); 1218e38aeffSJohn Baldwin static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, 1228e38aeffSJohn Baldwin int *, struct shmfd *, vm_ooffset_t, vm_object_t *); 123c8daea13SAlexander Kabaev 124d2c60af8SMatthew Dillon /* 125d2c60af8SMatthew Dillon * MPSAFE 126d2c60af8SMatthew Dillon */ 127df8bae1dSRodney W. Grimes /* ARGSUSED */ 128df8bae1dSRodney W. Grimes int 129b40ce416SJulian Elischer sbrk(td, uap) 130b40ce416SJulian Elischer struct thread *td; 131df8bae1dSRodney W. Grimes struct sbrk_args *uap; 132df8bae1dSRodney W. Grimes { 133df8bae1dSRodney W. Grimes /* Not yet implemented */ 134df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 135df8bae1dSRodney W. Grimes } 136df8bae1dSRodney W. Grimes 137d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 138df8bae1dSRodney W. Grimes struct sstk_args { 139df8bae1dSRodney W. Grimes int incr; 140df8bae1dSRodney W. Grimes }; 141d2d3e875SBruce Evans #endif 1420d94caffSDavid Greenman 143d2c60af8SMatthew Dillon /* 144d2c60af8SMatthew Dillon * MPSAFE 145d2c60af8SMatthew Dillon */ 146df8bae1dSRodney W. Grimes /* ARGSUSED */ 147df8bae1dSRodney W. Grimes int 148b40ce416SJulian Elischer sstk(td, uap) 149b40ce416SJulian Elischer struct thread *td; 150df8bae1dSRodney W. Grimes struct sstk_args *uap; 151df8bae1dSRodney W. Grimes { 152df8bae1dSRodney W. Grimes /* Not yet implemented */ 153df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 154df8bae1dSRodney W. Grimes } 155df8bae1dSRodney W. Grimes 1561930e303SPoul-Henning Kamp #if defined(COMPAT_43) 157d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 158df8bae1dSRodney W. Grimes struct getpagesize_args { 159df8bae1dSRodney W. Grimes int dummy; 160df8bae1dSRodney W. Grimes }; 161d2d3e875SBruce Evans #endif 1620d94caffSDavid Greenman 163df8bae1dSRodney W. Grimes /* ARGSUSED */ 164df8bae1dSRodney W. Grimes int 165b40ce416SJulian Elischer ogetpagesize(td, uap) 166b40ce416SJulian Elischer struct thread *td; 167df8bae1dSRodney W. Grimes struct getpagesize_args *uap; 168df8bae1dSRodney W. Grimes { 1690cddd8f0SMatthew Dillon /* MP SAFE */ 170b40ce416SJulian Elischer td->td_retval[0] = PAGE_SIZE; 171df8bae1dSRodney W. Grimes return (0); 172df8bae1dSRodney W. Grimes } 1731930e303SPoul-Henning Kamp #endif /* COMPAT_43 */ 174df8bae1dSRodney W. Grimes 17554f42e4bSPeter Wemm 17654f42e4bSPeter Wemm /* 17754f42e4bSPeter Wemm * Memory Map (mmap) system call. Note that the file offset 17854f42e4bSPeter Wemm * and address are allowed to be NOT page aligned, though if 17954f42e4bSPeter Wemm * the MAP_FIXED flag it set, both must have the same remainder 18054f42e4bSPeter Wemm * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 18154f42e4bSPeter Wemm * page-aligned, the actual mapping starts at trunc_page(addr) 18254f42e4bSPeter Wemm * and the return value is adjusted up by the page offset. 183b4309055SMatthew Dillon * 184b4309055SMatthew Dillon * Generally speaking, only character devices which are themselves 185b4309055SMatthew Dillon * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 186b4309055SMatthew Dillon * there would be no cache coherency between a descriptor and a VM mapping 187b4309055SMatthew Dillon * both to the same character device. 188b4309055SMatthew Dillon * 189b4309055SMatthew Dillon * Block devices can be mmap'd no matter what they represent. Cache coherency 190b4309055SMatthew Dillon * is maintained as long as you do not write directly to the underlying 191b4309055SMatthew Dillon * character device. 19254f42e4bSPeter Wemm */ 193d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 194df8bae1dSRodney W. Grimes struct mmap_args { 195651bb817SAlexander Langer void *addr; 196df8bae1dSRodney W. Grimes size_t len; 197df8bae1dSRodney W. Grimes int prot; 198df8bae1dSRodney W. Grimes int flags; 199df8bae1dSRodney W. Grimes int fd; 200df8bae1dSRodney W. Grimes long pad; 201df8bae1dSRodney W. Grimes off_t pos; 202df8bae1dSRodney W. Grimes }; 203d2d3e875SBruce Evans #endif 204df8bae1dSRodney W. Grimes 205d2c60af8SMatthew Dillon /* 206d2c60af8SMatthew Dillon * MPSAFE 207d2c60af8SMatthew Dillon */ 208df8bae1dSRodney W. Grimes int 209b40ce416SJulian Elischer mmap(td, uap) 210b40ce416SJulian Elischer struct thread *td; 21154d92145SMatthew Dillon struct mmap_args *uap; 212df8bae1dSRodney W. Grimes { 21349874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 21449874f6eSJoseph Koshy struct pmckern_map_in pkm; 21549874f6eSJoseph Koshy #endif 216c8daea13SAlexander Kabaev struct file *fp; 217df8bae1dSRodney W. Grimes struct vnode *vp; 218df8bae1dSRodney W. Grimes vm_offset_t addr; 2199154ee6aSPeter Wemm vm_size_t size, pageoff; 220df8bae1dSRodney W. Grimes vm_prot_t prot, maxprot; 221651bb817SAlexander Langer void *handle; 22298df9218SJohn Baldwin objtype_t handle_type; 223df8bae1dSRodney W. Grimes int flags, error; 22454f42e4bSPeter Wemm off_t pos; 225b40ce416SJulian Elischer struct vmspace *vms = td->td_proc->p_vmspace; 226df8bae1dSRodney W. Grimes 22754f42e4bSPeter Wemm addr = (vm_offset_t) uap->addr; 22854f42e4bSPeter Wemm size = uap->len; 229df8bae1dSRodney W. Grimes prot = uap->prot & VM_PROT_ALL; 230df8bae1dSRodney W. Grimes flags = uap->flags; 23154f42e4bSPeter Wemm pos = uap->pos; 23254f42e4bSPeter Wemm 233426da3bcSAlfred Perlstein fp = NULL; 23454f42e4bSPeter Wemm /* make sure mapping fits into numeric range etc */ 235fc565456SDmitrij Tejblum if ((ssize_t) uap->len < 0 || 23654f42e4bSPeter Wemm ((flags & MAP_ANON) && uap->fd != -1)) 237df8bae1dSRodney W. Grimes return (EINVAL); 2389154ee6aSPeter Wemm 2392267af78SJulian Elischer if (flags & MAP_STACK) { 2402267af78SJulian Elischer if ((uap->fd != -1) || 2412267af78SJulian Elischer ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 2422267af78SJulian Elischer return (EINVAL); 2432267af78SJulian Elischer flags |= MAP_ANON; 2442267af78SJulian Elischer pos = 0; 2452907af2aSJulian Elischer } 2462907af2aSJulian Elischer 2479154ee6aSPeter Wemm /* 24854f42e4bSPeter Wemm * Align the file position to a page boundary, 24954f42e4bSPeter Wemm * and save its page offset component. 2509154ee6aSPeter Wemm */ 25154f42e4bSPeter Wemm pageoff = (pos & PAGE_MASK); 25254f42e4bSPeter Wemm pos -= pageoff; 25354f42e4bSPeter Wemm 25454f42e4bSPeter Wemm /* Adjust size for rounding (on both ends). */ 25554f42e4bSPeter Wemm size += pageoff; /* low end... */ 25654f42e4bSPeter Wemm size = (vm_size_t) round_page(size); /* hi end */ 2579154ee6aSPeter Wemm 258df8bae1dSRodney W. Grimes /* 2590d94caffSDavid Greenman * Check for illegal addresses. Watch out for address wrap... Note 2600d94caffSDavid Greenman * that VM_*_ADDRESS are not constants due to casts (argh). 261df8bae1dSRodney W. Grimes */ 262df8bae1dSRodney W. Grimes if (flags & MAP_FIXED) { 26354f42e4bSPeter Wemm /* 26454f42e4bSPeter Wemm * The specified address must have the same remainder 26554f42e4bSPeter Wemm * as the file offset taken modulo PAGE_SIZE, so it 26654f42e4bSPeter Wemm * should be aligned after adjustment by pageoff. 26754f42e4bSPeter Wemm */ 26854f42e4bSPeter Wemm addr -= pageoff; 26954f42e4bSPeter Wemm if (addr & PAGE_MASK) 27054f42e4bSPeter Wemm return (EINVAL); 27154f42e4bSPeter Wemm /* Address range must be all in user VM space. */ 27205ba50f5SJake Burkholder if (addr < vm_map_min(&vms->vm_map) || 27305ba50f5SJake Burkholder addr + size > vm_map_max(&vms->vm_map)) 274df8bae1dSRodney W. Grimes return (EINVAL); 275bbc0ec52SDavid Greenman if (addr + size < addr) 276df8bae1dSRodney W. Grimes return (EINVAL); 27791d5354aSJohn Baldwin } else { 278df8bae1dSRodney W. Grimes /* 27954f42e4bSPeter Wemm * XXX for non-fixed mappings where no hint is provided or 28054f42e4bSPeter Wemm * the hint would fall in the potential heap space, 28154f42e4bSPeter Wemm * place it after the end of the largest possible heap. 282df8bae1dSRodney W. Grimes * 28354f42e4bSPeter Wemm * There should really be a pmap call to determine a reasonable 28454f42e4bSPeter Wemm * location. 285df8bae1dSRodney W. Grimes */ 28691d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 28791d5354aSJohn Baldwin if (addr == 0 || 2881f6889a1SMatthew Dillon (addr >= round_page((vm_offset_t)vms->vm_taddr) && 289c460ac3aSPeter Wemm addr < round_page((vm_offset_t)vms->vm_daddr + 29091d5354aSJohn Baldwin lim_max(td->td_proc, RLIMIT_DATA)))) 291c460ac3aSPeter Wemm addr = round_page((vm_offset_t)vms->vm_daddr + 29291d5354aSJohn Baldwin lim_max(td->td_proc, RLIMIT_DATA)); 29391d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 29491d5354aSJohn Baldwin } 295df8bae1dSRodney W. Grimes if (flags & MAP_ANON) { 296df8bae1dSRodney W. Grimes /* 297df8bae1dSRodney W. Grimes * Mapping blank space is trivial. 298df8bae1dSRodney W. Grimes */ 299df8bae1dSRodney W. Grimes handle = NULL; 30098df9218SJohn Baldwin handle_type = OBJT_DEFAULT; 301df8bae1dSRodney W. Grimes maxprot = VM_PROT_ALL; 30254f42e4bSPeter Wemm pos = 0; 30330d4dd7eSAlexander Kabaev } else { 304df8bae1dSRodney W. Grimes /* 3058e38aeffSJohn Baldwin * Mapping file, get fp for validation and 3068e38aeffSJohn Baldwin * don't let the descriptor disappear on us if we block. 307df8bae1dSRodney W. Grimes */ 308a4db4953SAlfred Perlstein if ((error = fget(td, uap->fd, &fp)) != 0) 309426da3bcSAlfred Perlstein goto done; 3108e38aeffSJohn Baldwin if (fp->f_type == DTYPE_SHM) { 3118e38aeffSJohn Baldwin handle = fp->f_data; 3128e38aeffSJohn Baldwin handle_type = OBJT_SWAP; 3138e38aeffSJohn Baldwin maxprot = VM_PROT_NONE; 3148e38aeffSJohn Baldwin 3158e38aeffSJohn Baldwin /* FREAD should always be set. */ 3168e38aeffSJohn Baldwin if (fp->f_flag & FREAD) 3178e38aeffSJohn Baldwin maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; 3188e38aeffSJohn Baldwin if (fp->f_flag & FWRITE) 3198e38aeffSJohn Baldwin maxprot |= VM_PROT_WRITE; 3208e38aeffSJohn Baldwin goto map; 3218e38aeffSJohn Baldwin } 322e4ca250dSJohn Baldwin if (fp->f_type != DTYPE_VNODE) { 32389eae00bSTom Rhodes error = ENODEV; 324426da3bcSAlfred Perlstein goto done; 325e4ca250dSJohn Baldwin } 3268e38aeffSJohn Baldwin #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ 3278e38aeffSJohn Baldwin defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) 328279d7226SMatthew Dillon /* 329aa543039SGarrett Wollman * POSIX shared-memory objects are defined to have 330aa543039SGarrett Wollman * kernel persistence, and are not defined to support 331aa543039SGarrett Wollman * read(2)/write(2) -- or even open(2). Thus, we can 332aa543039SGarrett Wollman * use MAP_ASYNC to trade on-disk coherence for speed. 333aa543039SGarrett Wollman * The shm_open(3) library routine turns on the FPOSIXSHM 334aa543039SGarrett Wollman * flag to request this behavior. 335aa543039SGarrett Wollman */ 336aa543039SGarrett Wollman if (fp->f_flag & FPOSIXSHM) 337aa543039SGarrett Wollman flags |= MAP_NOSYNC; 3388e38aeffSJohn Baldwin #endif 3393b6d9652SPoul-Henning Kamp vp = fp->f_vnode; 340c8bdd56bSGuido van Rooij /* 341df8bae1dSRodney W. Grimes * Ensure that file and memory protections are 342df8bae1dSRodney W. Grimes * compatible. Note that we only worry about 343df8bae1dSRodney W. Grimes * writability if mapping is shared; in this case, 344df8bae1dSRodney W. Grimes * current and max prot are dictated by the open file. 345df8bae1dSRodney W. Grimes * XXX use the vnode instead? Problem is: what 3460d94caffSDavid Greenman * credentials do we use for determination? What if 3470d94caffSDavid Greenman * proc does a setuid? 348df8bae1dSRodney W. Grimes */ 3498eec77b0STim J. Robbins if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC) 350b483c7f6SGuido van Rooij maxprot = VM_PROT_NONE; 351b483c7f6SGuido van Rooij else 352b483c7f6SGuido van Rooij maxprot = VM_PROT_EXECUTE; 353279d7226SMatthew Dillon if (fp->f_flag & FREAD) { 354df8bae1dSRodney W. Grimes maxprot |= VM_PROT_READ; 355279d7226SMatthew Dillon } else if (prot & PROT_READ) { 356279d7226SMatthew Dillon error = EACCES; 357279d7226SMatthew Dillon goto done; 358279d7226SMatthew Dillon } 359c8bdd56bSGuido van Rooij /* 360c8bdd56bSGuido van Rooij * If we are sharing potential changes (either via 361c8bdd56bSGuido van Rooij * MAP_SHARED or via the implicit sharing of character 362c8bdd56bSGuido van Rooij * device mappings), and we are trying to get write 363c8bdd56bSGuido van Rooij * permission although we opened it without asking 364c8daea13SAlexander Kabaev * for it, bail out. 365c8bdd56bSGuido van Rooij */ 366ce7a036dSAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 36705feb99fSGuido van Rooij if ((fp->f_flag & FWRITE) != 0) { 368df8bae1dSRodney W. Grimes maxprot |= VM_PROT_WRITE; 369279d7226SMatthew Dillon } else if ((prot & PROT_WRITE) != 0) { 370279d7226SMatthew Dillon error = EACCES; 371279d7226SMatthew Dillon goto done; 372279d7226SMatthew Dillon } 373ce7a036dSAlexander Kabaev } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) { 37405feb99fSGuido van Rooij maxprot |= VM_PROT_WRITE; 375279d7226SMatthew Dillon } 376651bb817SAlexander Langer handle = (void *)vp; 37798df9218SJohn Baldwin handle_type = OBJT_VNODE; 37830d4dd7eSAlexander Kabaev } 3798e38aeffSJohn Baldwin map: 3801f6889a1SMatthew Dillon 3811f6889a1SMatthew Dillon /* 3821f6889a1SMatthew Dillon * Do not allow more then a certain number of vm_map_entry structures 3831f6889a1SMatthew Dillon * per process. Scale with the number of rforks sharing the map 3841f6889a1SMatthew Dillon * to make the limit reasonable for threads. 3851f6889a1SMatthew Dillon */ 3861f6889a1SMatthew Dillon if (max_proc_mmap && 3871f6889a1SMatthew Dillon vms->vm_map.nentries >= max_proc_mmap * vms->vm_refcnt) { 388279d7226SMatthew Dillon error = ENOMEM; 389279d7226SMatthew Dillon goto done; 3901f6889a1SMatthew Dillon } 3911f6889a1SMatthew Dillon 3921f6889a1SMatthew Dillon error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, 39398df9218SJohn Baldwin flags, handle_type, handle, pos); 39449874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 39549874f6eSJoseph Koshy /* inform hwpmc(4) if an executable is being mapped */ 39649874f6eSJoseph Koshy if (error == 0 && handle_type == OBJT_VNODE && 39749874f6eSJoseph Koshy (prot & PROT_EXEC)) { 39849874f6eSJoseph Koshy pkm.pm_file = handle; 39949874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 40049874f6eSJoseph Koshy PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); 40149874f6eSJoseph Koshy } 40249874f6eSJoseph Koshy #endif 403df8bae1dSRodney W. Grimes if (error == 0) 404b40ce416SJulian Elischer td->td_retval[0] = (register_t) (addr + pageoff); 405279d7226SMatthew Dillon done: 406279d7226SMatthew Dillon if (fp) 407b40ce416SJulian Elischer fdrop(fp, td); 408f6b5b182SJeff Roberson 409df8bae1dSRodney W. Grimes return (error); 410df8bae1dSRodney W. Grimes } 411df8bae1dSRodney W. Grimes 412c2815ad5SPeter Wemm int 413c2815ad5SPeter Wemm freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 414c2815ad5SPeter Wemm { 415c2815ad5SPeter Wemm struct mmap_args oargs; 416c2815ad5SPeter Wemm 417c2815ad5SPeter Wemm oargs.addr = uap->addr; 418c2815ad5SPeter Wemm oargs.len = uap->len; 419c2815ad5SPeter Wemm oargs.prot = uap->prot; 420c2815ad5SPeter Wemm oargs.flags = uap->flags; 421c2815ad5SPeter Wemm oargs.fd = uap->fd; 422c2815ad5SPeter Wemm oargs.pos = uap->pos; 423c2815ad5SPeter Wemm return (mmap(td, &oargs)); 424c2815ad5SPeter Wemm } 425c2815ad5SPeter Wemm 42605f0fdd2SPoul-Henning Kamp #ifdef COMPAT_43 427d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 42805f0fdd2SPoul-Henning Kamp struct ommap_args { 42905f0fdd2SPoul-Henning Kamp caddr_t addr; 43005f0fdd2SPoul-Henning Kamp int len; 43105f0fdd2SPoul-Henning Kamp int prot; 43205f0fdd2SPoul-Henning Kamp int flags; 43305f0fdd2SPoul-Henning Kamp int fd; 43405f0fdd2SPoul-Henning Kamp long pos; 43505f0fdd2SPoul-Henning Kamp }; 436d2d3e875SBruce Evans #endif 43705f0fdd2SPoul-Henning Kamp int 438b40ce416SJulian Elischer ommap(td, uap) 439b40ce416SJulian Elischer struct thread *td; 44054d92145SMatthew Dillon struct ommap_args *uap; 44105f0fdd2SPoul-Henning Kamp { 44205f0fdd2SPoul-Henning Kamp struct mmap_args nargs; 44305f0fdd2SPoul-Henning Kamp static const char cvtbsdprot[8] = { 44405f0fdd2SPoul-Henning Kamp 0, 44505f0fdd2SPoul-Henning Kamp PROT_EXEC, 44605f0fdd2SPoul-Henning Kamp PROT_WRITE, 44705f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE, 44805f0fdd2SPoul-Henning Kamp PROT_READ, 44905f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_READ, 45005f0fdd2SPoul-Henning Kamp PROT_WRITE | PROT_READ, 45105f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE | PROT_READ, 45205f0fdd2SPoul-Henning Kamp }; 4530d94caffSDavid Greenman 45405f0fdd2SPoul-Henning Kamp #define OMAP_ANON 0x0002 45505f0fdd2SPoul-Henning Kamp #define OMAP_COPY 0x0020 45605f0fdd2SPoul-Henning Kamp #define OMAP_SHARED 0x0010 45705f0fdd2SPoul-Henning Kamp #define OMAP_FIXED 0x0100 45805f0fdd2SPoul-Henning Kamp 45905f0fdd2SPoul-Henning Kamp nargs.addr = uap->addr; 46005f0fdd2SPoul-Henning Kamp nargs.len = uap->len; 46105f0fdd2SPoul-Henning Kamp nargs.prot = cvtbsdprot[uap->prot & 0x7]; 46205f0fdd2SPoul-Henning Kamp nargs.flags = 0; 46305f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_ANON) 46405f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_ANON; 46505f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_COPY) 46605f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_COPY; 46705f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_SHARED) 46805f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_SHARED; 46905f0fdd2SPoul-Henning Kamp else 47005f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_PRIVATE; 47105f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_FIXED) 47205f0fdd2SPoul-Henning Kamp nargs.flags |= MAP_FIXED; 47305f0fdd2SPoul-Henning Kamp nargs.fd = uap->fd; 47405f0fdd2SPoul-Henning Kamp nargs.pos = uap->pos; 475b40ce416SJulian Elischer return (mmap(td, &nargs)); 47605f0fdd2SPoul-Henning Kamp } 47705f0fdd2SPoul-Henning Kamp #endif /* COMPAT_43 */ 47805f0fdd2SPoul-Henning Kamp 47905f0fdd2SPoul-Henning Kamp 480d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 481df8bae1dSRodney W. Grimes struct msync_args { 482651bb817SAlexander Langer void *addr; 483c899450bSPeter Wemm size_t len; 484e6c6af11SDavid Greenman int flags; 485df8bae1dSRodney W. Grimes }; 486d2d3e875SBruce Evans #endif 487d2c60af8SMatthew Dillon /* 488d2c60af8SMatthew Dillon * MPSAFE 489d2c60af8SMatthew Dillon */ 490df8bae1dSRodney W. Grimes int 491b40ce416SJulian Elischer msync(td, uap) 492b40ce416SJulian Elischer struct thread *td; 493df8bae1dSRodney W. Grimes struct msync_args *uap; 494df8bae1dSRodney W. Grimes { 495df8bae1dSRodney W. Grimes vm_offset_t addr; 496dabee6feSPeter Wemm vm_size_t size, pageoff; 497e6c6af11SDavid Greenman int flags; 498df8bae1dSRodney W. Grimes vm_map_t map; 499df8bae1dSRodney W. Grimes int rv; 500df8bae1dSRodney W. Grimes 501df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 5029154ee6aSPeter Wemm size = uap->len; 503e6c6af11SDavid Greenman flags = uap->flags; 504e6c6af11SDavid Greenman 505dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 506dabee6feSPeter Wemm addr -= pageoff; 507dabee6feSPeter Wemm size += pageoff; 508dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5099154ee6aSPeter Wemm if (addr + size < addr) 510dabee6feSPeter Wemm return (EINVAL); 511dabee6feSPeter Wemm 512dabee6feSPeter Wemm if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 5131e62bc63SDavid Greenman return (EINVAL); 5141e62bc63SDavid Greenman 515b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 5169154ee6aSPeter Wemm 517df8bae1dSRodney W. Grimes /* 518df8bae1dSRodney W. Grimes * Clean the pages and interpret the return value. 519df8bae1dSRodney W. Grimes */ 520950f8459SAlan Cox rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 521e6c6af11SDavid Greenman (flags & MS_INVALIDATE) != 0); 522df8bae1dSRodney W. Grimes switch (rv) { 523df8bae1dSRodney W. Grimes case KERN_SUCCESS: 524d2c60af8SMatthew Dillon return (0); 525df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 526df8bae1dSRodney W. Grimes return (EINVAL); /* Sun returns ENOMEM? */ 527b7b7cd44SAlan Cox case KERN_INVALID_ARGUMENT: 528b7b7cd44SAlan Cox return (EBUSY); 529df8bae1dSRodney W. Grimes default: 530df8bae1dSRodney W. Grimes return (EINVAL); 531df8bae1dSRodney W. Grimes } 532df8bae1dSRodney W. Grimes } 533df8bae1dSRodney W. Grimes 534d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 535df8bae1dSRodney W. Grimes struct munmap_args { 536651bb817SAlexander Langer void *addr; 5379154ee6aSPeter Wemm size_t len; 538df8bae1dSRodney W. Grimes }; 539d2d3e875SBruce Evans #endif 540d2c60af8SMatthew Dillon /* 541d2c60af8SMatthew Dillon * MPSAFE 542d2c60af8SMatthew Dillon */ 543df8bae1dSRodney W. Grimes int 544b40ce416SJulian Elischer munmap(td, uap) 545b40ce416SJulian Elischer struct thread *td; 54654d92145SMatthew Dillon struct munmap_args *uap; 547df8bae1dSRodney W. Grimes { 54849874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 54949874f6eSJoseph Koshy struct pmckern_map_out pkm; 55049874f6eSJoseph Koshy vm_map_entry_t entry; 55149874f6eSJoseph Koshy #endif 552df8bae1dSRodney W. Grimes vm_offset_t addr; 553dabee6feSPeter Wemm vm_size_t size, pageoff; 554df8bae1dSRodney W. Grimes vm_map_t map; 555df8bae1dSRodney W. Grimes 556df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 5579154ee6aSPeter Wemm size = uap->len; 558d8834602SAlan Cox if (size == 0) 559d8834602SAlan Cox return (EINVAL); 560dabee6feSPeter Wemm 561dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 562dabee6feSPeter Wemm addr -= pageoff; 563dabee6feSPeter Wemm size += pageoff; 564dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5659154ee6aSPeter Wemm if (addr + size < addr) 566df8bae1dSRodney W. Grimes return (EINVAL); 5679154ee6aSPeter Wemm 568df8bae1dSRodney W. Grimes /* 56905ba50f5SJake Burkholder * Check for illegal addresses. Watch out for address wrap... 570df8bae1dSRodney W. Grimes */ 571b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 57205ba50f5SJake Burkholder if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 57305ba50f5SJake Burkholder return (EINVAL); 574d8834602SAlan Cox vm_map_lock(map); 575df8bae1dSRodney W. Grimes /* 576df8bae1dSRodney W. Grimes * Make sure entire range is allocated. 577df8bae1dSRodney W. Grimes */ 578d8834602SAlan Cox if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) { 579d8834602SAlan Cox vm_map_unlock(map); 580df8bae1dSRodney W. Grimes return (EINVAL); 581d8834602SAlan Cox } 58249874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 58349874f6eSJoseph Koshy /* 58449874f6eSJoseph Koshy * Inform hwpmc if the address range being unmapped contains 58549874f6eSJoseph Koshy * an executable region. 58649874f6eSJoseph Koshy */ 58749874f6eSJoseph Koshy if (vm_map_lookup_entry(map, addr, &entry)) { 58849874f6eSJoseph Koshy for (; 58949874f6eSJoseph Koshy entry != &map->header && entry->start < addr + size; 59049874f6eSJoseph Koshy entry = entry->next) { 59149874f6eSJoseph Koshy if (vm_map_check_protection(map, entry->start, 59249874f6eSJoseph Koshy entry->end, VM_PROT_EXECUTE) == TRUE) { 59349874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 59449874f6eSJoseph Koshy pkm.pm_size = (size_t) size; 59549874f6eSJoseph Koshy PMC_CALL_HOOK(td, PMC_FN_MUNMAP, 59649874f6eSJoseph Koshy (void *) &pkm); 59749874f6eSJoseph Koshy break; 59849874f6eSJoseph Koshy } 59949874f6eSJoseph Koshy } 60049874f6eSJoseph Koshy } 60149874f6eSJoseph Koshy #endif 602df8bae1dSRodney W. Grimes /* returns nothing but KERN_SUCCESS anyway */ 603d8834602SAlan Cox vm_map_delete(map, addr, addr + size); 604d8834602SAlan Cox vm_map_unlock(map); 605df8bae1dSRodney W. Grimes return (0); 606df8bae1dSRodney W. Grimes } 607df8bae1dSRodney W. Grimes 608d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 609df8bae1dSRodney W. Grimes struct mprotect_args { 610651bb817SAlexander Langer const void *addr; 6119154ee6aSPeter Wemm size_t len; 612df8bae1dSRodney W. Grimes int prot; 613df8bae1dSRodney W. Grimes }; 614d2d3e875SBruce Evans #endif 615d2c60af8SMatthew Dillon /* 616d2c60af8SMatthew Dillon * MPSAFE 617d2c60af8SMatthew Dillon */ 618df8bae1dSRodney W. Grimes int 619b40ce416SJulian Elischer mprotect(td, uap) 620b40ce416SJulian Elischer struct thread *td; 621df8bae1dSRodney W. Grimes struct mprotect_args *uap; 622df8bae1dSRodney W. Grimes { 623df8bae1dSRodney W. Grimes vm_offset_t addr; 624dabee6feSPeter Wemm vm_size_t size, pageoff; 62554d92145SMatthew Dillon vm_prot_t prot; 626df8bae1dSRodney W. Grimes 627df8bae1dSRodney W. Grimes addr = (vm_offset_t) uap->addr; 6289154ee6aSPeter Wemm size = uap->len; 629df8bae1dSRodney W. Grimes prot = uap->prot & VM_PROT_ALL; 630d0aea04fSJohn Dyson #if defined(VM_PROT_READ_IS_EXEC) 631d0aea04fSJohn Dyson if (prot & VM_PROT_READ) 632d0aea04fSJohn Dyson prot |= VM_PROT_EXECUTE; 633d0aea04fSJohn Dyson #endif 634df8bae1dSRodney W. Grimes 635dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 636dabee6feSPeter Wemm addr -= pageoff; 637dabee6feSPeter Wemm size += pageoff; 638dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6399154ee6aSPeter Wemm if (addr + size < addr) 640dabee6feSPeter Wemm return (EINVAL); 641dabee6feSPeter Wemm 64243285049SAlan Cox switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 64343285049SAlan Cox addr + size, prot, FALSE)) { 644df8bae1dSRodney W. Grimes case KERN_SUCCESS: 645df8bae1dSRodney W. Grimes return (0); 646df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 647df8bae1dSRodney W. Grimes return (EACCES); 648df8bae1dSRodney W. Grimes } 649df8bae1dSRodney W. Grimes return (EINVAL); 650df8bae1dSRodney W. Grimes } 651df8bae1dSRodney W. Grimes 652d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 653dabee6feSPeter Wemm struct minherit_args { 654651bb817SAlexander Langer void *addr; 6559154ee6aSPeter Wemm size_t len; 656dabee6feSPeter Wemm int inherit; 657dabee6feSPeter Wemm }; 658dabee6feSPeter Wemm #endif 659d2c60af8SMatthew Dillon /* 660d2c60af8SMatthew Dillon * MPSAFE 661d2c60af8SMatthew Dillon */ 662dabee6feSPeter Wemm int 663b40ce416SJulian Elischer minherit(td, uap) 664b40ce416SJulian Elischer struct thread *td; 665dabee6feSPeter Wemm struct minherit_args *uap; 666dabee6feSPeter Wemm { 667dabee6feSPeter Wemm vm_offset_t addr; 668dabee6feSPeter Wemm vm_size_t size, pageoff; 66954d92145SMatthew Dillon vm_inherit_t inherit; 670dabee6feSPeter Wemm 671dabee6feSPeter Wemm addr = (vm_offset_t)uap->addr; 6729154ee6aSPeter Wemm size = uap->len; 673dabee6feSPeter Wemm inherit = uap->inherit; 674dabee6feSPeter Wemm 675dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 676dabee6feSPeter Wemm addr -= pageoff; 677dabee6feSPeter Wemm size += pageoff; 678dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6799154ee6aSPeter Wemm if (addr + size < addr) 680dabee6feSPeter Wemm return (EINVAL); 681dabee6feSPeter Wemm 682e0be79afSAlan Cox switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 683e0be79afSAlan Cox addr + size, inherit)) { 684dabee6feSPeter Wemm case KERN_SUCCESS: 685dabee6feSPeter Wemm return (0); 686dabee6feSPeter Wemm case KERN_PROTECTION_FAILURE: 687dabee6feSPeter Wemm return (EACCES); 688dabee6feSPeter Wemm } 689dabee6feSPeter Wemm return (EINVAL); 690dabee6feSPeter Wemm } 691dabee6feSPeter Wemm 692dabee6feSPeter Wemm #ifndef _SYS_SYSPROTO_H_ 693df8bae1dSRodney W. Grimes struct madvise_args { 694651bb817SAlexander Langer void *addr; 6959154ee6aSPeter Wemm size_t len; 696df8bae1dSRodney W. Grimes int behav; 697df8bae1dSRodney W. Grimes }; 698d2d3e875SBruce Evans #endif 6990d94caffSDavid Greenman 700d2c60af8SMatthew Dillon /* 701d2c60af8SMatthew Dillon * MPSAFE 702d2c60af8SMatthew Dillon */ 703df8bae1dSRodney W. Grimes /* ARGSUSED */ 704df8bae1dSRodney W. Grimes int 705b40ce416SJulian Elischer madvise(td, uap) 706b40ce416SJulian Elischer struct thread *td; 707df8bae1dSRodney W. Grimes struct madvise_args *uap; 708df8bae1dSRodney W. Grimes { 709f35329acSJohn Dyson vm_offset_t start, end; 71005ba50f5SJake Burkholder vm_map_t map; 711f4cf2141SWes Peters struct proc *p; 712f4cf2141SWes Peters int error; 713b4309055SMatthew Dillon 714b4309055SMatthew Dillon /* 715f4cf2141SWes Peters * Check for our special case, advising the swap pager we are 716f4cf2141SWes Peters * "immortal." 717f4cf2141SWes Peters */ 718f4cf2141SWes Peters if (uap->behav == MADV_PROTECT) { 719acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MADV_PROTECT); 72069297bf8SJohn Baldwin if (error == 0) { 721f4cf2141SWes Peters p = td->td_proc; 722f4cf2141SWes Peters PROC_LOCK(p); 723f4cf2141SWes Peters p->p_flag |= P_PROTECTED; 724f4cf2141SWes Peters PROC_UNLOCK(p); 72569297bf8SJohn Baldwin } 726f4cf2141SWes Peters return (error); 727f4cf2141SWes Peters } 728f4cf2141SWes Peters /* 729b4309055SMatthew Dillon * Check for illegal behavior 730b4309055SMatthew Dillon */ 7319730a5daSPaul Saab if (uap->behav < 0 || uap->behav > MADV_CORE) 732b4309055SMatthew Dillon return (EINVAL); 733867a482dSJohn Dyson /* 734867a482dSJohn Dyson * Check for illegal addresses. Watch out for address wrap... Note 735867a482dSJohn Dyson * that VM_*_ADDRESS are not constants due to casts (argh). 736867a482dSJohn Dyson */ 73705ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 73805ba50f5SJake Burkholder if ((vm_offset_t)uap->addr < vm_map_min(map) || 73905ba50f5SJake Burkholder (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) 740867a482dSJohn Dyson return (EINVAL); 741867a482dSJohn Dyson if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 742867a482dSJohn Dyson return (EINVAL); 743867a482dSJohn Dyson 744867a482dSJohn Dyson /* 745867a482dSJohn Dyson * Since this routine is only advisory, we default to conservative 746867a482dSJohn Dyson * behavior. 747867a482dSJohn Dyson */ 748cd6eea25SDavid Greenman start = trunc_page((vm_offset_t) uap->addr); 749cd6eea25SDavid Greenman end = round_page((vm_offset_t) uap->addr + uap->len); 750867a482dSJohn Dyson 75105ba50f5SJake Burkholder if (vm_map_madvise(map, start, end, uap->behav)) 752094f6d26SAlan Cox return (EINVAL); 753094f6d26SAlan Cox return (0); 754df8bae1dSRodney W. Grimes } 755df8bae1dSRodney W. Grimes 756d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 757df8bae1dSRodney W. Grimes struct mincore_args { 758651bb817SAlexander Langer const void *addr; 7599154ee6aSPeter Wemm size_t len; 760df8bae1dSRodney W. Grimes char *vec; 761df8bae1dSRodney W. Grimes }; 762d2d3e875SBruce Evans #endif 7630d94caffSDavid Greenman 764d2c60af8SMatthew Dillon /* 765d2c60af8SMatthew Dillon * MPSAFE 766d2c60af8SMatthew Dillon */ 767df8bae1dSRodney W. Grimes /* ARGSUSED */ 768df8bae1dSRodney W. Grimes int 769b40ce416SJulian Elischer mincore(td, uap) 770b40ce416SJulian Elischer struct thread *td; 771df8bae1dSRodney W. Grimes struct mincore_args *uap; 772df8bae1dSRodney W. Grimes { 773867a482dSJohn Dyson vm_offset_t addr, first_addr; 774867a482dSJohn Dyson vm_offset_t end, cend; 775867a482dSJohn Dyson pmap_t pmap; 776867a482dSJohn Dyson vm_map_t map; 77702c04a2fSJohn Dyson char *vec; 778d2c60af8SMatthew Dillon int error = 0; 779867a482dSJohn Dyson int vecindex, lastvecindex; 78054d92145SMatthew Dillon vm_map_entry_t current; 781867a482dSJohn Dyson vm_map_entry_t entry; 782867a482dSJohn Dyson int mincoreinfo; 783dd2622a8SAlan Cox unsigned int timestamp; 784df8bae1dSRodney W. Grimes 785867a482dSJohn Dyson /* 786867a482dSJohn Dyson * Make sure that the addresses presented are valid for user 787867a482dSJohn Dyson * mode. 788867a482dSJohn Dyson */ 789867a482dSJohn Dyson first_addr = addr = trunc_page((vm_offset_t) uap->addr); 7909154ee6aSPeter Wemm end = addr + (vm_size_t)round_page(uap->len); 79105ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 79205ba50f5SJake Burkholder if (end > vm_map_max(map) || end < addr) 793455dd7d4SKonstantin Belousov return (ENOMEM); 79402c04a2fSJohn Dyson 795867a482dSJohn Dyson /* 796867a482dSJohn Dyson * Address of byte vector 797867a482dSJohn Dyson */ 79802c04a2fSJohn Dyson vec = uap->vec; 799867a482dSJohn Dyson 800b40ce416SJulian Elischer pmap = vmspace_pmap(td->td_proc->p_vmspace); 801867a482dSJohn Dyson 802eff50fcdSAlan Cox vm_map_lock_read(map); 803dd2622a8SAlan Cox RestartScan: 804dd2622a8SAlan Cox timestamp = map->timestamp; 805867a482dSJohn Dyson 806455dd7d4SKonstantin Belousov if (!vm_map_lookup_entry(map, addr, &entry)) { 807455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 808455dd7d4SKonstantin Belousov return (ENOMEM); 809455dd7d4SKonstantin Belousov } 810867a482dSJohn Dyson 811867a482dSJohn Dyson /* 812867a482dSJohn Dyson * Do this on a map entry basis so that if the pages are not 813867a482dSJohn Dyson * in the current processes address space, we can easily look 814867a482dSJohn Dyson * up the pages elsewhere. 815867a482dSJohn Dyson */ 816867a482dSJohn Dyson lastvecindex = -1; 817867a482dSJohn Dyson for (current = entry; 818867a482dSJohn Dyson (current != &map->header) && (current->start < end); 819867a482dSJohn Dyson current = current->next) { 820867a482dSJohn Dyson 821867a482dSJohn Dyson /* 822455dd7d4SKonstantin Belousov * check for contiguity 823455dd7d4SKonstantin Belousov */ 824455dd7d4SKonstantin Belousov if (current->end < end && 825455dd7d4SKonstantin Belousov (entry->next == &map->header || 826455dd7d4SKonstantin Belousov current->next->start > current->end)) { 827455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 828455dd7d4SKonstantin Belousov return (ENOMEM); 829455dd7d4SKonstantin Belousov } 830455dd7d4SKonstantin Belousov 831455dd7d4SKonstantin Belousov /* 832867a482dSJohn Dyson * ignore submaps (for now) or null objects 833867a482dSJohn Dyson */ 8349fdfe602SMatthew Dillon if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 835867a482dSJohn Dyson current->object.vm_object == NULL) 836867a482dSJohn Dyson continue; 837867a482dSJohn Dyson 838867a482dSJohn Dyson /* 839867a482dSJohn Dyson * limit this scan to the current map entry and the 840867a482dSJohn Dyson * limits for the mincore call 841867a482dSJohn Dyson */ 842867a482dSJohn Dyson if (addr < current->start) 843867a482dSJohn Dyson addr = current->start; 844867a482dSJohn Dyson cend = current->end; 845867a482dSJohn Dyson if (cend > end) 846867a482dSJohn Dyson cend = end; 847867a482dSJohn Dyson 848867a482dSJohn Dyson /* 849867a482dSJohn Dyson * scan this entry one page at a time 850867a482dSJohn Dyson */ 851867a482dSJohn Dyson while (addr < cend) { 852867a482dSJohn Dyson /* 853867a482dSJohn Dyson * Check pmap first, it is likely faster, also 854867a482dSJohn Dyson * it can provide info as to whether we are the 855867a482dSJohn Dyson * one referencing or modifying the page. 856867a482dSJohn Dyson */ 857867a482dSJohn Dyson mincoreinfo = pmap_mincore(pmap, addr); 858867a482dSJohn Dyson if (!mincoreinfo) { 859867a482dSJohn Dyson vm_pindex_t pindex; 860867a482dSJohn Dyson vm_ooffset_t offset; 861867a482dSJohn Dyson vm_page_t m; 862867a482dSJohn Dyson /* 863867a482dSJohn Dyson * calculate the page index into the object 864867a482dSJohn Dyson */ 865867a482dSJohn Dyson offset = current->offset + (addr - current->start); 866867a482dSJohn Dyson pindex = OFF_TO_IDX(offset); 867bc5b057fSAlan Cox VM_OBJECT_LOCK(current->object.vm_object); 868867a482dSJohn Dyson m = vm_page_lookup(current->object.vm_object, 869867a482dSJohn Dyson pindex); 870867a482dSJohn Dyson /* 871867a482dSJohn Dyson * if the page is resident, then gather information about 872867a482dSJohn Dyson * it. 873867a482dSJohn Dyson */ 874cafe836aSAlan Cox if (m != NULL && m->valid != 0) { 875867a482dSJohn Dyson mincoreinfo = MINCORE_INCORE; 8767ebcee37SAlan Cox vm_page_lock_queues(); 877867a482dSJohn Dyson if (m->dirty || 8780385347cSPeter Wemm pmap_is_modified(m)) 879867a482dSJohn Dyson mincoreinfo |= MINCORE_MODIFIED_OTHER; 880867a482dSJohn Dyson if ((m->flags & PG_REFERENCED) || 8810385347cSPeter Wemm pmap_ts_referenced(m)) { 882e69763a3SDoug Rabson vm_page_flag_set(m, PG_REFERENCED); 883867a482dSJohn Dyson mincoreinfo |= MINCORE_REFERENCED_OTHER; 88402c04a2fSJohn Dyson } 885e80b7b69SAlan Cox vm_page_unlock_queues(); 8869b5a5d81SJohn Dyson } 8877ebcee37SAlan Cox VM_OBJECT_UNLOCK(current->object.vm_object); 8887ebcee37SAlan Cox } 889867a482dSJohn Dyson 890867a482dSJohn Dyson /* 891dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 892dd2622a8SAlan Cox * the map, we release the lock. 893dd2622a8SAlan Cox */ 894dd2622a8SAlan Cox vm_map_unlock_read(map); 895dd2622a8SAlan Cox 896dd2622a8SAlan Cox /* 897867a482dSJohn Dyson * calculate index into user supplied byte vector 898867a482dSJohn Dyson */ 899867a482dSJohn Dyson vecindex = OFF_TO_IDX(addr - first_addr); 900867a482dSJohn Dyson 901867a482dSJohn Dyson /* 902867a482dSJohn Dyson * If we have skipped map entries, we need to make sure that 903867a482dSJohn Dyson * the byte vector is zeroed for those skipped entries. 904867a482dSJohn Dyson */ 905867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 906867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 907867a482dSJohn Dyson if (error) { 908d2c60af8SMatthew Dillon error = EFAULT; 909d2c60af8SMatthew Dillon goto done2; 910867a482dSJohn Dyson } 911867a482dSJohn Dyson ++lastvecindex; 912867a482dSJohn Dyson } 913867a482dSJohn Dyson 914867a482dSJohn Dyson /* 915867a482dSJohn Dyson * Pass the page information to the user 916867a482dSJohn Dyson */ 917867a482dSJohn Dyson error = subyte(vec + vecindex, mincoreinfo); 918867a482dSJohn Dyson if (error) { 919d2c60af8SMatthew Dillon error = EFAULT; 920d2c60af8SMatthew Dillon goto done2; 921867a482dSJohn Dyson } 922dd2622a8SAlan Cox 923dd2622a8SAlan Cox /* 924dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 925dd2622a8SAlan Cox * output may be invalid. 926dd2622a8SAlan Cox */ 927dd2622a8SAlan Cox vm_map_lock_read(map); 928dd2622a8SAlan Cox if (timestamp != map->timestamp) 929dd2622a8SAlan Cox goto RestartScan; 930dd2622a8SAlan Cox 931867a482dSJohn Dyson lastvecindex = vecindex; 93202c04a2fSJohn Dyson addr += PAGE_SIZE; 93302c04a2fSJohn Dyson } 934867a482dSJohn Dyson } 935867a482dSJohn Dyson 936867a482dSJohn Dyson /* 937dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 938dd2622a8SAlan Cox * the map, we release the lock. 939dd2622a8SAlan Cox */ 940dd2622a8SAlan Cox vm_map_unlock_read(map); 941dd2622a8SAlan Cox 942dd2622a8SAlan Cox /* 943867a482dSJohn Dyson * Zero the last entries in the byte vector. 944867a482dSJohn Dyson */ 945867a482dSJohn Dyson vecindex = OFF_TO_IDX(end - first_addr); 946867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 947867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 948867a482dSJohn Dyson if (error) { 949d2c60af8SMatthew Dillon error = EFAULT; 950d2c60af8SMatthew Dillon goto done2; 951867a482dSJohn Dyson } 952867a482dSJohn Dyson ++lastvecindex; 953867a482dSJohn Dyson } 954867a482dSJohn Dyson 955dd2622a8SAlan Cox /* 956dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 957dd2622a8SAlan Cox * output may be invalid. 958dd2622a8SAlan Cox */ 959dd2622a8SAlan Cox vm_map_lock_read(map); 960dd2622a8SAlan Cox if (timestamp != map->timestamp) 961dd2622a8SAlan Cox goto RestartScan; 962eff50fcdSAlan Cox vm_map_unlock_read(map); 963d2c60af8SMatthew Dillon done2: 964d2c60af8SMatthew Dillon return (error); 965df8bae1dSRodney W. Grimes } 966df8bae1dSRodney W. Grimes 967d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 968df8bae1dSRodney W. Grimes struct mlock_args { 969651bb817SAlexander Langer const void *addr; 970df8bae1dSRodney W. Grimes size_t len; 971df8bae1dSRodney W. Grimes }; 972d2d3e875SBruce Evans #endif 973d2c60af8SMatthew Dillon /* 974d2c60af8SMatthew Dillon * MPSAFE 975d2c60af8SMatthew Dillon */ 976df8bae1dSRodney W. Grimes int 977b40ce416SJulian Elischer mlock(td, uap) 978b40ce416SJulian Elischer struct thread *td; 979df8bae1dSRodney W. Grimes struct mlock_args *uap; 980df8bae1dSRodney W. Grimes { 981f0ea4612SDon Lewis struct proc *proc; 982bb734798SDon Lewis vm_offset_t addr, end, last, start; 983bb734798SDon Lewis vm_size_t npages, size; 984bb734798SDon Lewis int error; 985df8bae1dSRodney W. Grimes 986acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MLOCK); 98747934cefSDon Lewis if (error) 98847934cefSDon Lewis return (error); 98916929939SDon Lewis addr = (vm_offset_t)uap->addr; 99016929939SDon Lewis size = uap->len; 991bb734798SDon Lewis last = addr + size; 99216929939SDon Lewis start = trunc_page(addr); 993bb734798SDon Lewis end = round_page(last); 994bb734798SDon Lewis if (last < addr || end < addr) 995df8bae1dSRodney W. Grimes return (EINVAL); 99616929939SDon Lewis npages = atop(end - start); 99716929939SDon Lewis if (npages > vm_page_max_wired) 99816929939SDon Lewis return (ENOMEM); 999f0ea4612SDon Lewis proc = td->td_proc; 100047934cefSDon Lewis PROC_LOCK(proc); 1001bb734798SDon Lewis if (ptoa(npages + 1002bb734798SDon Lewis pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map))) > 1003bb734798SDon Lewis lim_cur(proc, RLIMIT_MEMLOCK)) { 100447934cefSDon Lewis PROC_UNLOCK(proc); 10054a40e3d4SJohn Dyson return (ENOMEM); 100691d5354aSJohn Baldwin } 100747934cefSDon Lewis PROC_UNLOCK(proc); 10082feb50bfSAttilio Rao if (npages + cnt.v_wire_count > vm_page_max_wired) 100916929939SDon Lewis return (EAGAIN); 101016929939SDon Lewis error = vm_map_wire(&proc->p_vmspace->vm_map, start, end, 101116929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1012df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1013df8bae1dSRodney W. Grimes } 1014df8bae1dSRodney W. Grimes 1015d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 10164a40e3d4SJohn Dyson struct mlockall_args { 10174a40e3d4SJohn Dyson int how; 10184a40e3d4SJohn Dyson }; 10194a40e3d4SJohn Dyson #endif 10204a40e3d4SJohn Dyson 1021d2c60af8SMatthew Dillon /* 1022d2c60af8SMatthew Dillon * MPSAFE 1023d2c60af8SMatthew Dillon */ 10244a40e3d4SJohn Dyson int 1025b40ce416SJulian Elischer mlockall(td, uap) 1026b40ce416SJulian Elischer struct thread *td; 10274a40e3d4SJohn Dyson struct mlockall_args *uap; 10284a40e3d4SJohn Dyson { 1029abd498aaSBruce M Simpson vm_map_t map; 1030abd498aaSBruce M Simpson int error; 1031abd498aaSBruce M Simpson 1032abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 1033abd498aaSBruce M Simpson error = 0; 1034abd498aaSBruce M Simpson 1035abd498aaSBruce M Simpson if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1036abd498aaSBruce M Simpson return (EINVAL); 1037abd498aaSBruce M Simpson 103811f7ddc5SBruce M Simpson #if 0 1039abd498aaSBruce M Simpson /* 1040abd498aaSBruce M Simpson * If wiring all pages in the process would cause it to exceed 1041abd498aaSBruce M Simpson * a hard resource limit, return ENOMEM. 1042abd498aaSBruce M Simpson */ 104391d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 1044abd498aaSBruce M Simpson if (map->size - ptoa(pmap_wired_count(vm_map_pmap(map)) > 104591d5354aSJohn Baldwin lim_cur(td->td_proc, RLIMIT_MEMLOCK))) { 104691d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1047abd498aaSBruce M Simpson return (ENOMEM); 104891d5354aSJohn Baldwin } 104991d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1050abd498aaSBruce M Simpson #else 1051acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MLOCK); 1052abd498aaSBruce M Simpson if (error) 1053abd498aaSBruce M Simpson return (error); 1054abd498aaSBruce M Simpson #endif 1055abd498aaSBruce M Simpson 1056abd498aaSBruce M Simpson if (uap->how & MCL_FUTURE) { 1057abd498aaSBruce M Simpson vm_map_lock(map); 1058abd498aaSBruce M Simpson vm_map_modflags(map, MAP_WIREFUTURE, 0); 1059abd498aaSBruce M Simpson vm_map_unlock(map); 1060abd498aaSBruce M Simpson error = 0; 1061abd498aaSBruce M Simpson } 1062abd498aaSBruce M Simpson 1063abd498aaSBruce M Simpson if (uap->how & MCL_CURRENT) { 1064abd498aaSBruce M Simpson /* 1065abd498aaSBruce M Simpson * P1003.1-2001 mandates that all currently mapped pages 1066abd498aaSBruce M Simpson * will be memory resident and locked (wired) upon return 1067abd498aaSBruce M Simpson * from mlockall(). vm_map_wire() will wire pages, by 1068abd498aaSBruce M Simpson * calling vm_fault_wire() for each page in the region. 1069abd498aaSBruce M Simpson */ 1070abd498aaSBruce M Simpson error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1071abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1072abd498aaSBruce M Simpson error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1073abd498aaSBruce M Simpson } 1074abd498aaSBruce M Simpson 1075abd498aaSBruce M Simpson return (error); 10764a40e3d4SJohn Dyson } 10774a40e3d4SJohn Dyson 10784a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1079fa721254SAlfred Perlstein struct munlockall_args { 1080abd498aaSBruce M Simpson register_t dummy; 10814a40e3d4SJohn Dyson }; 10824a40e3d4SJohn Dyson #endif 10834a40e3d4SJohn Dyson 1084d2c60af8SMatthew Dillon /* 1085d2c60af8SMatthew Dillon * MPSAFE 1086d2c60af8SMatthew Dillon */ 10874a40e3d4SJohn Dyson int 1088b40ce416SJulian Elischer munlockall(td, uap) 1089b40ce416SJulian Elischer struct thread *td; 10904a40e3d4SJohn Dyson struct munlockall_args *uap; 10914a40e3d4SJohn Dyson { 1092abd498aaSBruce M Simpson vm_map_t map; 1093abd498aaSBruce M Simpson int error; 1094abd498aaSBruce M Simpson 1095abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 1096acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 1097abd498aaSBruce M Simpson if (error) 1098abd498aaSBruce M Simpson return (error); 1099abd498aaSBruce M Simpson 1100abd498aaSBruce M Simpson /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1101abd498aaSBruce M Simpson vm_map_lock(map); 1102abd498aaSBruce M Simpson vm_map_modflags(map, 0, MAP_WIREFUTURE); 1103abd498aaSBruce M Simpson vm_map_unlock(map); 1104abd498aaSBruce M Simpson 1105abd498aaSBruce M Simpson /* Forcibly unwire all pages. */ 1106abd498aaSBruce M Simpson error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1107abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1108abd498aaSBruce M Simpson 1109abd498aaSBruce M Simpson return (error); 11104a40e3d4SJohn Dyson } 11114a40e3d4SJohn Dyson 11124a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1113df8bae1dSRodney W. Grimes struct munlock_args { 1114651bb817SAlexander Langer const void *addr; 1115df8bae1dSRodney W. Grimes size_t len; 1116df8bae1dSRodney W. Grimes }; 1117d2d3e875SBruce Evans #endif 1118d2c60af8SMatthew Dillon /* 1119d2c60af8SMatthew Dillon * MPSAFE 1120d2c60af8SMatthew Dillon */ 1121df8bae1dSRodney W. Grimes int 1122b40ce416SJulian Elischer munlock(td, uap) 1123b40ce416SJulian Elischer struct thread *td; 1124df8bae1dSRodney W. Grimes struct munlock_args *uap; 1125df8bae1dSRodney W. Grimes { 1126bb734798SDon Lewis vm_offset_t addr, end, last, start; 112716929939SDon Lewis vm_size_t size; 1128df8bae1dSRodney W. Grimes int error; 1129df8bae1dSRodney W. Grimes 1130acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 113147934cefSDon Lewis if (error) 113247934cefSDon Lewis return (error); 113316929939SDon Lewis addr = (vm_offset_t)uap->addr; 113416929939SDon Lewis size = uap->len; 1135bb734798SDon Lewis last = addr + size; 113616929939SDon Lewis start = trunc_page(addr); 1137bb734798SDon Lewis end = round_page(last); 1138bb734798SDon Lewis if (last < addr || end < addr) 1139df8bae1dSRodney W. Grimes return (EINVAL); 114016929939SDon Lewis error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 114116929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1142df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1143df8bae1dSRodney W. Grimes } 1144df8bae1dSRodney W. Grimes 1145df8bae1dSRodney W. Grimes /* 1146c8daea13SAlexander Kabaev * vm_mmap_vnode() 1147c8daea13SAlexander Kabaev * 1148c8daea13SAlexander Kabaev * MPSAFE 1149c8daea13SAlexander Kabaev * 1150c8daea13SAlexander Kabaev * Helper function for vm_mmap. Perform sanity check specific for mmap 1151c8daea13SAlexander Kabaev * operations on vnodes. 1152c8daea13SAlexander Kabaev */ 1153c8daea13SAlexander Kabaev int 1154c8daea13SAlexander Kabaev vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1155c8daea13SAlexander Kabaev vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 1156c8daea13SAlexander Kabaev struct vnode *vp, vm_ooffset_t foff, vm_object_t *objp) 1157c8daea13SAlexander Kabaev { 1158c8daea13SAlexander Kabaev struct vattr va; 1159c8daea13SAlexander Kabaev void *handle; 1160c8daea13SAlexander Kabaev vm_object_t obj; 1161ae51ff11SJeff Roberson struct mount *mp; 116223fc1a90SPoul-Henning Kamp int error, flags, type; 1163ae51ff11SJeff Roberson int vfslocked; 1164c8daea13SAlexander Kabaev 1165ae51ff11SJeff Roberson mp = vp->v_mount; 1166ae51ff11SJeff Roberson vfslocked = VFS_LOCK_GIANT(mp); 1167c8daea13SAlexander Kabaev if ((error = vget(vp, LK_EXCLUSIVE, td)) != 0) { 1168ae51ff11SJeff Roberson VFS_UNLOCK_GIANT(vfslocked); 1169c8daea13SAlexander Kabaev return (error); 1170c8daea13SAlexander Kabaev } 1171c8daea13SAlexander Kabaev flags = *flagsp; 11728516dd18SPoul-Henning Kamp obj = vp->v_object; 1173c8daea13SAlexander Kabaev if (vp->v_type == VREG) { 1174c8daea13SAlexander Kabaev /* 1175c8daea13SAlexander Kabaev * Get the proper underlying object 1176c8daea13SAlexander Kabaev */ 11778516dd18SPoul-Henning Kamp if (obj == NULL) { 1178c8daea13SAlexander Kabaev error = EINVAL; 1179c8daea13SAlexander Kabaev goto done; 1180c8daea13SAlexander Kabaev } 1181c8daea13SAlexander Kabaev if (obj->handle != vp) { 1182c8daea13SAlexander Kabaev vput(vp); 1183c8daea13SAlexander Kabaev vp = (struct vnode*)obj->handle; 1184c8daea13SAlexander Kabaev vget(vp, LK_EXCLUSIVE, td); 1185c8daea13SAlexander Kabaev } 1186c8daea13SAlexander Kabaev type = OBJT_VNODE; 1187c8daea13SAlexander Kabaev handle = vp; 1188c8daea13SAlexander Kabaev } else if (vp->v_type == VCHR) { 1189c8daea13SAlexander Kabaev type = OBJT_DEVICE; 1190c8daea13SAlexander Kabaev handle = vp->v_rdev; 1191c8daea13SAlexander Kabaev 1192891822a8SPoul-Henning Kamp /* XXX: lack thredref on device */ 1193c8daea13SAlexander Kabaev if(vp->v_rdev->si_devsw->d_flags & D_MMAP_ANON) { 1194c8daea13SAlexander Kabaev *maxprotp = VM_PROT_ALL; 1195c8daea13SAlexander Kabaev *flagsp |= MAP_ANON; 1196c8daea13SAlexander Kabaev error = 0; 1197c8daea13SAlexander Kabaev goto done; 1198c8daea13SAlexander Kabaev } 1199c8daea13SAlexander Kabaev /* 1200c8daea13SAlexander Kabaev * cdevs does not provide private mappings of any kind. 1201c8daea13SAlexander Kabaev */ 1202ce7a036dSAlexander Kabaev if ((*maxprotp & VM_PROT_WRITE) == 0 && 1203ce7a036dSAlexander Kabaev (prot & PROT_WRITE) != 0) { 1204ce7a036dSAlexander Kabaev error = EACCES; 1205ce7a036dSAlexander Kabaev goto done; 1206ce7a036dSAlexander Kabaev } 120723fc1a90SPoul-Henning Kamp if (flags & (MAP_PRIVATE|MAP_COPY)) { 1208c8daea13SAlexander Kabaev error = EINVAL; 1209c8daea13SAlexander Kabaev goto done; 1210c8daea13SAlexander Kabaev } 1211c8daea13SAlexander Kabaev /* 1212c8daea13SAlexander Kabaev * Force device mappings to be shared. 1213c8daea13SAlexander Kabaev */ 1214c8daea13SAlexander Kabaev flags |= MAP_SHARED; 1215c8daea13SAlexander Kabaev } else { 1216c8daea13SAlexander Kabaev error = EINVAL; 1217c8daea13SAlexander Kabaev goto done; 1218c8daea13SAlexander Kabaev } 1219c8daea13SAlexander Kabaev if ((error = VOP_GETATTR(vp, &va, td->td_ucred, td))) { 1220c8daea13SAlexander Kabaev goto done; 1221c8daea13SAlexander Kabaev } 1222c92163dcSChristian S.J. Peron #ifdef MAC 122330d239bcSRobert Watson error = mac_vnode_check_mmap(td->td_ucred, vp, prot, flags); 1224c92163dcSChristian S.J. Peron if (error != 0) 1225c92163dcSChristian S.J. Peron goto done; 1226c92163dcSChristian S.J. Peron #endif 1227c8daea13SAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 1228c8daea13SAlexander Kabaev if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 1229c8daea13SAlexander Kabaev if (prot & PROT_WRITE) { 1230c8daea13SAlexander Kabaev error = EPERM; 1231c8daea13SAlexander Kabaev goto done; 1232c8daea13SAlexander Kabaev } 1233c8daea13SAlexander Kabaev *maxprotp &= ~VM_PROT_WRITE; 1234c8daea13SAlexander Kabaev } 1235c8daea13SAlexander Kabaev } 1236c8daea13SAlexander Kabaev /* 1237c8daea13SAlexander Kabaev * If it is a regular file without any references 1238c8daea13SAlexander Kabaev * we do not need to sync it. 1239c8daea13SAlexander Kabaev * Adjust object size to be the size of actual file. 1240c8daea13SAlexander Kabaev */ 1241c8daea13SAlexander Kabaev if (vp->v_type == VREG) { 1242c8daea13SAlexander Kabaev objsize = round_page(va.va_size); 1243c8daea13SAlexander Kabaev if (va.va_nlink == 0) 1244c8daea13SAlexander Kabaev flags |= MAP_NOSYNC; 1245c8daea13SAlexander Kabaev } 1246c8daea13SAlexander Kabaev obj = vm_pager_allocate(type, handle, objsize, prot, foff); 1247c8daea13SAlexander Kabaev if (obj == NULL) { 1248c8daea13SAlexander Kabaev error = (type == OBJT_DEVICE ? EINVAL : ENOMEM); 1249c8daea13SAlexander Kabaev goto done; 1250c8daea13SAlexander Kabaev } 1251c8daea13SAlexander Kabaev *objp = obj; 1252c8daea13SAlexander Kabaev *flagsp = flags; 12539f5c1d19SDiomidis Spinellis vfs_mark_atime(vp, td); 12541e309003SDiomidis Spinellis 1255c8daea13SAlexander Kabaev done: 1256c8daea13SAlexander Kabaev vput(vp); 1257ae51ff11SJeff Roberson VFS_UNLOCK_GIANT(vfslocked); 1258c8daea13SAlexander Kabaev return (error); 1259c8daea13SAlexander Kabaev } 1260c8daea13SAlexander Kabaev 1261c8daea13SAlexander Kabaev /* 126298df9218SJohn Baldwin * vm_mmap_cdev() 126398df9218SJohn Baldwin * 126498df9218SJohn Baldwin * MPSAFE 126598df9218SJohn Baldwin * 126698df9218SJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 126798df9218SJohn Baldwin * operations on cdevs. 126898df9218SJohn Baldwin */ 126998df9218SJohn Baldwin int 127098df9218SJohn Baldwin vm_mmap_cdev(struct thread *td, vm_size_t objsize, 127198df9218SJohn Baldwin vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 127298df9218SJohn Baldwin struct cdev *cdev, vm_ooffset_t foff, vm_object_t *objp) 127398df9218SJohn Baldwin { 127498df9218SJohn Baldwin vm_object_t obj; 127598df9218SJohn Baldwin int flags; 127698df9218SJohn Baldwin 127798df9218SJohn Baldwin flags = *flagsp; 127898df9218SJohn Baldwin 127998df9218SJohn Baldwin /* XXX: lack thredref on device */ 128098df9218SJohn Baldwin if (cdev->si_devsw->d_flags & D_MMAP_ANON) { 128198df9218SJohn Baldwin *maxprotp = VM_PROT_ALL; 128298df9218SJohn Baldwin *flagsp |= MAP_ANON; 128398df9218SJohn Baldwin return (0); 128498df9218SJohn Baldwin } 128598df9218SJohn Baldwin /* 128698df9218SJohn Baldwin * cdevs does not provide private mappings of any kind. 128798df9218SJohn Baldwin */ 128898df9218SJohn Baldwin if ((*maxprotp & VM_PROT_WRITE) == 0 && 128998df9218SJohn Baldwin (prot & PROT_WRITE) != 0) 129098df9218SJohn Baldwin return (EACCES); 129198df9218SJohn Baldwin if (flags & (MAP_PRIVATE|MAP_COPY)) 129298df9218SJohn Baldwin return (EINVAL); 129398df9218SJohn Baldwin /* 129498df9218SJohn Baldwin * Force device mappings to be shared. 129598df9218SJohn Baldwin */ 129698df9218SJohn Baldwin flags |= MAP_SHARED; 129798df9218SJohn Baldwin #ifdef MAC_XXX 129898df9218SJohn Baldwin error = mac_check_cdev_mmap(td->td_ucred, cdev, prot); 129998df9218SJohn Baldwin if (error != 0) 130098df9218SJohn Baldwin return (error); 130198df9218SJohn Baldwin #endif 130298df9218SJohn Baldwin obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, foff); 130398df9218SJohn Baldwin if (obj == NULL) 130498df9218SJohn Baldwin return (EINVAL); 130598df9218SJohn Baldwin *objp = obj; 130698df9218SJohn Baldwin *flagsp = flags; 130798df9218SJohn Baldwin return (0); 130898df9218SJohn Baldwin } 130998df9218SJohn Baldwin 131098df9218SJohn Baldwin /* 13118e38aeffSJohn Baldwin * vm_mmap_shm() 13128e38aeffSJohn Baldwin * 13138e38aeffSJohn Baldwin * MPSAFE 13148e38aeffSJohn Baldwin * 13158e38aeffSJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 13168e38aeffSJohn Baldwin * operations on shm file descriptors. 13178e38aeffSJohn Baldwin */ 13188e38aeffSJohn Baldwin int 13198e38aeffSJohn Baldwin vm_mmap_shm(struct thread *td, vm_size_t objsize, 13208e38aeffSJohn Baldwin vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 13218e38aeffSJohn Baldwin struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp) 13228e38aeffSJohn Baldwin { 13238e38aeffSJohn Baldwin int error; 13248e38aeffSJohn Baldwin 13258e38aeffSJohn Baldwin if ((*maxprotp & VM_PROT_WRITE) == 0 && 13268e38aeffSJohn Baldwin (prot & PROT_WRITE) != 0) 13278e38aeffSJohn Baldwin return (EACCES); 13288e38aeffSJohn Baldwin #ifdef MAC 13298e38aeffSJohn Baldwin error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp); 13308e38aeffSJohn Baldwin if (error != 0) 13318e38aeffSJohn Baldwin return (error); 13328e38aeffSJohn Baldwin #endif 13338e38aeffSJohn Baldwin error = shm_mmap(shmfd, objsize, foff, objp); 13348e38aeffSJohn Baldwin if (error) 13358e38aeffSJohn Baldwin return (error); 13368e38aeffSJohn Baldwin return (0); 13378e38aeffSJohn Baldwin } 13388e38aeffSJohn Baldwin 13398e38aeffSJohn Baldwin /* 1340d2c60af8SMatthew Dillon * vm_mmap() 1341d2c60af8SMatthew Dillon * 1342d2c60af8SMatthew Dillon * MPSAFE 1343d2c60af8SMatthew Dillon * 1344d2c60af8SMatthew Dillon * Internal version of mmap. Currently used by mmap, exec, and sys5 1345d2c60af8SMatthew Dillon * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. 1346df8bae1dSRodney W. Grimes */ 1347df8bae1dSRodney W. Grimes int 1348b9dcd593SBruce Evans vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1349b9dcd593SBruce Evans vm_prot_t maxprot, int flags, 135098df9218SJohn Baldwin objtype_t handle_type, void *handle, 1351b9dcd593SBruce Evans vm_ooffset_t foff) 1352df8bae1dSRodney W. Grimes { 1353df8bae1dSRodney W. Grimes boolean_t fitit; 13546bda842dSMatt Jacob vm_object_t object = NULL; 1355df8bae1dSRodney W. Grimes int rv = KERN_SUCCESS; 135620eec4bbSAlan Cox int docow, error; 1357b40ce416SJulian Elischer struct thread *td = curthread; 1358df8bae1dSRodney W. Grimes 1359df8bae1dSRodney W. Grimes if (size == 0) 1360df8bae1dSRodney W. Grimes return (0); 1361df8bae1dSRodney W. Grimes 1362749474f2SPeter Wemm size = round_page(size); 1363df8bae1dSRodney W. Grimes 136491d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 1365070f64feSMatthew Dillon if (td->td_proc->p_vmspace->vm_map.size + size > 136691d5354aSJohn Baldwin lim_cur(td->td_proc, RLIMIT_VMEM)) { 136791d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1368070f64feSMatthew Dillon return(ENOMEM); 1369070f64feSMatthew Dillon } 137091d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1371070f64feSMatthew Dillon 1372df8bae1dSRodney W. Grimes /* 1373bc9ad247SDavid Greenman * We currently can only deal with page aligned file offsets. 1374bc9ad247SDavid Greenman * The check is here rather than in the syscall because the 1375bc9ad247SDavid Greenman * kernel calls this function internally for other mmaping 1376bc9ad247SDavid Greenman * operations (such as in exec) and non-aligned offsets will 1377bc9ad247SDavid Greenman * cause pmap inconsistencies...so we want to be sure to 1378bc9ad247SDavid Greenman * disallow this in all cases. 1379bc9ad247SDavid Greenman */ 1380bc9ad247SDavid Greenman if (foff & PAGE_MASK) 1381bc9ad247SDavid Greenman return (EINVAL); 1382bc9ad247SDavid Greenman 138306cb7259SDavid Greenman if ((flags & MAP_FIXED) == 0) { 138406cb7259SDavid Greenman fitit = TRUE; 138506cb7259SDavid Greenman *addr = round_page(*addr); 138606cb7259SDavid Greenman } else { 138706cb7259SDavid Greenman if (*addr != trunc_page(*addr)) 138806cb7259SDavid Greenman return (EINVAL); 138906cb7259SDavid Greenman fitit = FALSE; 139006cb7259SDavid Greenman } 1391bc9ad247SDavid Greenman /* 139224a1cce3SDavid Greenman * Lookup/allocate object. 1393df8bae1dSRodney W. Grimes */ 139498df9218SJohn Baldwin switch (handle_type) { 139598df9218SJohn Baldwin case OBJT_DEVICE: 139698df9218SJohn Baldwin error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, 139798df9218SJohn Baldwin handle, foff, &object); 139898df9218SJohn Baldwin break; 139998df9218SJohn Baldwin case OBJT_VNODE: 1400c8daea13SAlexander Kabaev error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 1401c8daea13SAlexander Kabaev handle, foff, &object); 140298df9218SJohn Baldwin break; 14038e38aeffSJohn Baldwin case OBJT_SWAP: 14048e38aeffSJohn Baldwin error = vm_mmap_shm(td, size, prot, &maxprot, &flags, 14058e38aeffSJohn Baldwin handle, foff, &object); 14068e38aeffSJohn Baldwin break; 140798df9218SJohn Baldwin case OBJT_DEFAULT: 140898df9218SJohn Baldwin if (handle == NULL) { 140998df9218SJohn Baldwin error = 0; 141098df9218SJohn Baldwin break; 141198df9218SJohn Baldwin } 141298df9218SJohn Baldwin /* FALLTHROUGH */ 141398df9218SJohn Baldwin default: 141498df9218SJohn Baldwin error = EINVAL; 14156bda842dSMatt Jacob break; 141698df9218SJohn Baldwin } 141798df9218SJohn Baldwin if (error) 1418c8daea13SAlexander Kabaev return (error); 14195f55e841SDavid Greenman if (flags & MAP_ANON) { 1420c8daea13SAlexander Kabaev object = NULL; 1421c8daea13SAlexander Kabaev docow = 0; 14225f55e841SDavid Greenman /* 14235f55e841SDavid Greenman * Unnamed anonymous regions always start at 0. 14245f55e841SDavid Greenman */ 142567bf6868SJohn Dyson if (handle == 0) 14265f55e841SDavid Greenman foff = 0; 14275f55e841SDavid Greenman } else { 14284738fa09SAlan Cox docow = MAP_PREFAULT_PARTIAL; 142994328e90SJohn Dyson } 1430df8bae1dSRodney W. Grimes 14314f79d873SMatthew Dillon if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 14324738fa09SAlan Cox docow |= MAP_COPY_ON_WRITE; 14334f79d873SMatthew Dillon if (flags & MAP_NOSYNC) 14344f79d873SMatthew Dillon docow |= MAP_DISABLE_SYNCER; 14359730a5daSPaul Saab if (flags & MAP_NOCORE) 14369730a5daSPaul Saab docow |= MAP_DISABLE_COREDUMP; 14375850152dSJohn Dyson 1438d0aea04fSJohn Dyson #if defined(VM_PROT_READ_IS_EXEC) 1439d0aea04fSJohn Dyson if (prot & VM_PROT_READ) 1440d0aea04fSJohn Dyson prot |= VM_PROT_EXECUTE; 1441d0aea04fSJohn Dyson 1442d0aea04fSJohn Dyson if (maxprot & VM_PROT_READ) 1443d0aea04fSJohn Dyson maxprot |= VM_PROT_EXECUTE; 1444d0aea04fSJohn Dyson #endif 1445d0aea04fSJohn Dyson 1446e4ca250dSJohn Baldwin if (fitit) 14470a0a85b3SJohn Dyson *addr = pmap_addr_hint(object, *addr, size); 14480a0a85b3SJohn Dyson 14492267af78SJulian Elischer if (flags & MAP_STACK) 1450fd75d710SMarcel Moolenaar rv = vm_map_stack(map, *addr, size, prot, maxprot, 1451fd75d710SMarcel Moolenaar docow | MAP_STACK_GROWS_DOWN); 1452d239bd3cSKonstantin Belousov else if (fitit) 1453d239bd3cSKonstantin Belousov rv = vm_map_find(map, object, foff, addr, size, TRUE, 1454d239bd3cSKonstantin Belousov prot, maxprot, docow); 14552267af78SJulian Elischer else 1456d239bd3cSKonstantin Belousov rv = vm_map_fixed(map, object, foff, addr, size, 1457bd7e5f99SJohn Dyson prot, maxprot, docow); 1458bd7e5f99SJohn Dyson 1459d2c60af8SMatthew Dillon if (rv != KERN_SUCCESS) { 14607fb0c17eSDavid Greenman /* 146124a1cce3SDavid Greenman * Lose the object reference. Will destroy the 146224a1cce3SDavid Greenman * object if it's an unnamed anonymous mapping 146324a1cce3SDavid Greenman * or named anonymous without other references. 14647fb0c17eSDavid Greenman */ 1465df8bae1dSRodney W. Grimes vm_object_deallocate(object); 1466d2c60af8SMatthew Dillon } else if (flags & MAP_SHARED) { 1467df8bae1dSRodney W. Grimes /* 1468df8bae1dSRodney W. Grimes * Shared memory is also shared with children. 1469df8bae1dSRodney W. Grimes */ 1470df8bae1dSRodney W. Grimes rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 1471e4ca250dSJohn Baldwin if (rv != KERN_SUCCESS) 14727fb0c17eSDavid Greenman (void) vm_map_remove(map, *addr, *addr + size); 1473df8bae1dSRodney W. Grimes } 1474abd498aaSBruce M Simpson 1475abd498aaSBruce M Simpson /* 1476abd498aaSBruce M Simpson * If the process has requested that all future mappings 1477abd498aaSBruce M Simpson * be wired, then heed this. 1478abd498aaSBruce M Simpson */ 1479abd498aaSBruce M Simpson if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE)) 1480abd498aaSBruce M Simpson vm_map_wire(map, *addr, *addr + size, 1481abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES); 1482abd498aaSBruce M Simpson 1483df8bae1dSRodney W. Grimes switch (rv) { 1484df8bae1dSRodney W. Grimes case KERN_SUCCESS: 1485df8bae1dSRodney W. Grimes return (0); 1486df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 1487df8bae1dSRodney W. Grimes case KERN_NO_SPACE: 1488df8bae1dSRodney W. Grimes return (ENOMEM); 1489df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 1490df8bae1dSRodney W. Grimes return (EACCES); 1491df8bae1dSRodney W. Grimes default: 1492df8bae1dSRodney W. Grimes return (EINVAL); 1493df8bae1dSRodney W. Grimes } 1494df8bae1dSRodney W. Grimes } 1495