160727d8bSWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1988 University of Utah. 5df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 6df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 7df8bae1dSRodney W. Grimes * 8df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 9df8bae1dSRodney W. Grimes * the Systems Programming Group of the University of Utah Computer 10df8bae1dSRodney W. Grimes * Science Department. 11df8bae1dSRodney W. Grimes * 12df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 13df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 14df8bae1dSRodney W. Grimes * are met: 15df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 17df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 18df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 19df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 20fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 21df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 22df8bae1dSRodney W. Grimes * without specific prior written permission. 23df8bae1dSRodney W. Grimes * 24df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34df8bae1dSRodney W. Grimes * SUCH DAMAGE. 35df8bae1dSRodney W. Grimes * 36df8bae1dSRodney W. Grimes * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 37df8bae1dSRodney W. Grimes * 38df8bae1dSRodney W. Grimes * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 39df8bae1dSRodney W. Grimes */ 40df8bae1dSRodney W. Grimes 41df8bae1dSRodney W. Grimes /* 42df8bae1dSRodney W. Grimes * Mapped file (mmap) interface to VM 43df8bae1dSRodney W. Grimes */ 44df8bae1dSRodney W. Grimes 45874651b1SDavid E. O'Brien #include <sys/cdefs.h> 46874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 47874651b1SDavid E. O'Brien 485591b823SEivind Eklund #include "opt_compat.h" 4949874f6eSJoseph Koshy #include "opt_hwpmc_hooks.h" 503d653db0SAlan Cox #include "opt_vm.h" 51e9822d92SJoerg Wunsch 52df8bae1dSRodney W. Grimes #include <sys/param.h> 53df8bae1dSRodney W. Grimes #include <sys/systm.h> 544a144410SRobert Watson #include <sys/capsicum.h> 55a9d2f8d8SRobert Watson #include <sys/kernel.h> 56fb919e4dSMark Murray #include <sys/lock.h> 5723955314SAlfred Perlstein #include <sys/mutex.h> 58d2d3e875SBruce Evans #include <sys/sysproto.h> 59df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 60acd3428bSRobert Watson #include <sys/priv.h> 61df8bae1dSRodney W. Grimes #include <sys/proc.h> 6255648840SJohn Baldwin #include <sys/procctl.h> 631ba5ad42SEdward Tomasz Napierala #include <sys/racct.h> 64070f64feSMatthew Dillon #include <sys/resource.h> 65070f64feSMatthew Dillon #include <sys/resourcevar.h> 6689f6b863SAttilio Rao #include <sys/rwlock.h> 677e19eda4SAndrey Zonov #include <sys/sysctl.h> 68df8bae1dSRodney W. Grimes #include <sys/vnode.h> 693ac4d1efSBruce Evans #include <sys/fcntl.h> 70df8bae1dSRodney W. Grimes #include <sys/file.h> 71df8bae1dSRodney W. Grimes #include <sys/mman.h> 72b483c7f6SGuido van Rooij #include <sys/mount.h> 73df8bae1dSRodney W. Grimes #include <sys/conf.h> 744183b6b6SPeter Wemm #include <sys/stat.h> 7555648840SJohn Baldwin #include <sys/syscallsubr.h> 76497a8238SKonstantin Belousov #include <sys/sysent.h> 77efeaf95aSDavid Greenman #include <sys/vmmeter.h> 78df8bae1dSRodney W. Grimes 7951d1f690SRobert Watson #include <security/audit/audit.h> 80aed55708SRobert Watson #include <security/mac/mac_framework.h> 81aed55708SRobert Watson 82df8bae1dSRodney W. Grimes #include <vm/vm.h> 83efeaf95aSDavid Greenman #include <vm/vm_param.h> 84efeaf95aSDavid Greenman #include <vm/pmap.h> 85efeaf95aSDavid Greenman #include <vm/vm_map.h> 86efeaf95aSDavid Greenman #include <vm/vm_object.h> 871c7c3c6aSMatthew Dillon #include <vm/vm_page.h> 88df8bae1dSRodney W. Grimes #include <vm/vm_pager.h> 89b5e8ce9fSBruce Evans #include <vm/vm_pageout.h> 90efeaf95aSDavid Greenman #include <vm/vm_extern.h> 91867a482dSJohn Dyson #include <vm/vm_page.h> 9284110e7eSKonstantin Belousov #include <vm/vnode_pager.h> 93df8bae1dSRodney W. Grimes 9449874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 9549874f6eSJoseph Koshy #include <sys/pmckern.h> 9649874f6eSJoseph Koshy #endif 9749874f6eSJoseph Koshy 987e19eda4SAndrey Zonov int old_mlock = 0; 99af3b2549SHans Petter Selasky SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0, 1007e19eda4SAndrey Zonov "Do not apply RLIMIT_MEMLOCK on mlockall"); 1017e19eda4SAndrey Zonov 102edb572a3SJohn Baldwin #ifdef MAP_32BIT 103edb572a3SJohn Baldwin #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) 104d2d3e875SBruce Evans #endif 1050d94caffSDavid Greenman 106edb572a3SJohn Baldwin #ifndef _SYS_SYSPROTO_H_ 107edb572a3SJohn Baldwin struct sbrk_args { 108edb572a3SJohn Baldwin int incr; 109edb572a3SJohn Baldwin }; 110edb572a3SJohn Baldwin #endif 111edb572a3SJohn Baldwin 112df8bae1dSRodney W. Grimes int 11304e89ffbSKonstantin Belousov sys_sbrk(struct thread *td, struct sbrk_args *uap) 114df8bae1dSRodney W. Grimes { 115df8bae1dSRodney W. Grimes /* Not yet implemented */ 116df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 117df8bae1dSRodney W. Grimes } 118df8bae1dSRodney W. Grimes 119d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 120df8bae1dSRodney W. Grimes struct sstk_args { 121df8bae1dSRodney W. Grimes int incr; 122df8bae1dSRodney W. Grimes }; 123d2d3e875SBruce Evans #endif 1240d94caffSDavid Greenman 125df8bae1dSRodney W. Grimes int 12604e89ffbSKonstantin Belousov sys_sstk(struct thread *td, struct sstk_args *uap) 127df8bae1dSRodney W. Grimes { 128df8bae1dSRodney W. Grimes /* Not yet implemented */ 129df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 130df8bae1dSRodney W. Grimes } 131df8bae1dSRodney W. Grimes 1321930e303SPoul-Henning Kamp #if defined(COMPAT_43) 133d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 134df8bae1dSRodney W. Grimes struct getpagesize_args { 135df8bae1dSRodney W. Grimes int dummy; 136df8bae1dSRodney W. Grimes }; 137d2d3e875SBruce Evans #endif 1380d94caffSDavid Greenman 139df8bae1dSRodney W. Grimes int 14004e89ffbSKonstantin Belousov ogetpagesize(struct thread *td, struct getpagesize_args *uap) 141df8bae1dSRodney W. Grimes { 14204e89ffbSKonstantin Belousov 143b40ce416SJulian Elischer td->td_retval[0] = PAGE_SIZE; 144df8bae1dSRodney W. Grimes return (0); 145df8bae1dSRodney W. Grimes } 1461930e303SPoul-Henning Kamp #endif /* COMPAT_43 */ 147df8bae1dSRodney W. Grimes 14854f42e4bSPeter Wemm 14954f42e4bSPeter Wemm /* 15054f42e4bSPeter Wemm * Memory Map (mmap) system call. Note that the file offset 15154f42e4bSPeter Wemm * and address are allowed to be NOT page aligned, though if 15254f42e4bSPeter Wemm * the MAP_FIXED flag it set, both must have the same remainder 15354f42e4bSPeter Wemm * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 15454f42e4bSPeter Wemm * page-aligned, the actual mapping starts at trunc_page(addr) 15554f42e4bSPeter Wemm * and the return value is adjusted up by the page offset. 156b4309055SMatthew Dillon * 157b4309055SMatthew Dillon * Generally speaking, only character devices which are themselves 158b4309055SMatthew Dillon * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 159b4309055SMatthew Dillon * there would be no cache coherency between a descriptor and a VM mapping 160b4309055SMatthew Dillon * both to the same character device. 16154f42e4bSPeter Wemm */ 162d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 163df8bae1dSRodney W. Grimes struct mmap_args { 164651bb817SAlexander Langer void *addr; 165df8bae1dSRodney W. Grimes size_t len; 166df8bae1dSRodney W. Grimes int prot; 167df8bae1dSRodney W. Grimes int flags; 168df8bae1dSRodney W. Grimes int fd; 169df8bae1dSRodney W. Grimes long pad; 170df8bae1dSRodney W. Grimes off_t pos; 171df8bae1dSRodney W. Grimes }; 172d2d3e875SBruce Evans #endif 173df8bae1dSRodney W. Grimes 174df8bae1dSRodney W. Grimes int 17569cdfcefSEdward Tomasz Napierala sys_mmap(struct thread *td, struct mmap_args *uap) 17669cdfcefSEdward Tomasz Napierala { 17769cdfcefSEdward Tomasz Napierala 178496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot, 179496ab053SKonstantin Belousov uap->flags, uap->fd, uap->pos)); 18069cdfcefSEdward Tomasz Napierala } 18169cdfcefSEdward Tomasz Napierala 18269cdfcefSEdward Tomasz Napierala int 183496ab053SKonstantin Belousov kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags, 184496ab053SKonstantin Belousov int fd, off_t pos) 185df8bae1dSRodney W. Grimes { 186496ab053SKonstantin Belousov struct vmspace *vms; 187c8daea13SAlexander Kabaev struct file *fp; 188496ab053SKonstantin Belousov vm_offset_t addr; 18969cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 1907077c426SJohn Baldwin vm_prot_t cap_maxprot; 19169cdfcefSEdward Tomasz Napierala int align, error; 192a9d2f8d8SRobert Watson cap_rights_t rights; 193df8bae1dSRodney W. Grimes 194496ab053SKonstantin Belousov vms = td->td_proc->p_vmspace; 195426da3bcSAlfred Perlstein fp = NULL; 19669cdfcefSEdward Tomasz Napierala AUDIT_ARG_FD(fd); 197496ab053SKonstantin Belousov addr = addr0; 19827bfa958SSimon L. B. Nielsen 1997707ccabSKonstantin Belousov /* 2005817298fSJohn Baldwin * Ignore old flags that used to be defined but did not do anything. 2015817298fSJohn Baldwin */ 2025817298fSJohn Baldwin flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040); 2035817298fSJohn Baldwin 2045817298fSJohn Baldwin /* 2057707ccabSKonstantin Belousov * Enforce the constraints. 2067707ccabSKonstantin Belousov * Mapping of length 0 is only allowed for old binaries. 2077707ccabSKonstantin Belousov * Anonymous mapping shall specify -1 as filedescriptor and 2087707ccabSKonstantin Belousov * zero position for new code. Be nice to ancient a.out 2097707ccabSKonstantin Belousov * binaries and correct pos for anonymous mapping, since old 2107707ccabSKonstantin Belousov * ld.so sometimes issues anonymous map requests with non-zero 2117707ccabSKonstantin Belousov * pos. 2127707ccabSKonstantin Belousov */ 2137707ccabSKonstantin Belousov if (!SV_CURPROC_FLAG(SV_AOUT)) { 21469cdfcefSEdward Tomasz Napierala if ((size == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) || 21569cdfcefSEdward Tomasz Napierala ((flags & MAP_ANON) != 0 && (fd != -1 || pos != 0))) 216df8bae1dSRodney W. Grimes return (EINVAL); 2177707ccabSKonstantin Belousov } else { 2187707ccabSKonstantin Belousov if ((flags & MAP_ANON) != 0) 2197707ccabSKonstantin Belousov pos = 0; 2207707ccabSKonstantin Belousov } 2219154ee6aSPeter Wemm 2222267af78SJulian Elischer if (flags & MAP_STACK) { 22369cdfcefSEdward Tomasz Napierala if ((fd != -1) || 2242267af78SJulian Elischer ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 2252267af78SJulian Elischer return (EINVAL); 2262267af78SJulian Elischer flags |= MAP_ANON; 2272267af78SJulian Elischer pos = 0; 2282907af2aSJulian Elischer } 2295817298fSJohn Baldwin if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE | 2305817298fSJohn Baldwin MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE | 23119bd0d9cSKonstantin Belousov MAP_PREFAULT_READ | MAP_GUARD | 2325fd3f8b3SJohn Baldwin #ifdef MAP_32BIT 2335fd3f8b3SJohn Baldwin MAP_32BIT | 2345fd3f8b3SJohn Baldwin #endif 2355fd3f8b3SJohn Baldwin MAP_ALIGNMENT_MASK)) != 0) 2365fd3f8b3SJohn Baldwin return (EINVAL); 23711c42bccSKonstantin Belousov if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL) 23811c42bccSKonstantin Belousov return (EINVAL); 23910204535SKonstantin Belousov if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED | MAP_PRIVATE)) 2405fd3f8b3SJohn Baldwin return (EINVAL); 2415fd3f8b3SJohn Baldwin if (prot != PROT_NONE && 2425fd3f8b3SJohn Baldwin (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) 2435fd3f8b3SJohn Baldwin return (EINVAL); 24419bd0d9cSKonstantin Belousov if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 || 24519bd0d9cSKonstantin Belousov pos != 0 || (flags & (MAP_SHARED | MAP_PRIVATE | MAP_PREFAULT | 24619bd0d9cSKonstantin Belousov MAP_PREFAULT_READ | MAP_ANON | MAP_STACK)) != 0)) 24719bd0d9cSKonstantin Belousov return (EINVAL); 2482907af2aSJulian Elischer 2499154ee6aSPeter Wemm /* 25054f42e4bSPeter Wemm * Align the file position to a page boundary, 25154f42e4bSPeter Wemm * and save its page offset component. 2529154ee6aSPeter Wemm */ 25354f42e4bSPeter Wemm pageoff = (pos & PAGE_MASK); 25454f42e4bSPeter Wemm pos -= pageoff; 25554f42e4bSPeter Wemm 25654f42e4bSPeter Wemm /* Adjust size for rounding (on both ends). */ 25754f42e4bSPeter Wemm size += pageoff; /* low end... */ 25854f42e4bSPeter Wemm size = (vm_size_t) round_page(size); /* hi end */ 2599154ee6aSPeter Wemm 2605aa60b6fSJohn Baldwin /* Ensure alignment is at least a page and fits in a pointer. */ 2615aa60b6fSJohn Baldwin align = flags & MAP_ALIGNMENT_MASK; 2625aa60b6fSJohn Baldwin if (align != 0 && align != MAP_ALIGNED_SUPER && 2635aa60b6fSJohn Baldwin (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY || 2645aa60b6fSJohn Baldwin align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT)) 2655aa60b6fSJohn Baldwin return (EINVAL); 2665aa60b6fSJohn Baldwin 267df8bae1dSRodney W. Grimes /* 2680d94caffSDavid Greenman * Check for illegal addresses. Watch out for address wrap... Note 2690d94caffSDavid Greenman * that VM_*_ADDRESS are not constants due to casts (argh). 270df8bae1dSRodney W. Grimes */ 271df8bae1dSRodney W. Grimes if (flags & MAP_FIXED) { 27254f42e4bSPeter Wemm /* 27354f42e4bSPeter Wemm * The specified address must have the same remainder 27454f42e4bSPeter Wemm * as the file offset taken modulo PAGE_SIZE, so it 27554f42e4bSPeter Wemm * should be aligned after adjustment by pageoff. 27654f42e4bSPeter Wemm */ 27754f42e4bSPeter Wemm addr -= pageoff; 27854f42e4bSPeter Wemm if (addr & PAGE_MASK) 27954f42e4bSPeter Wemm return (EINVAL); 28027bfa958SSimon L. B. Nielsen 28154f42e4bSPeter Wemm /* Address range must be all in user VM space. */ 28205ba50f5SJake Burkholder if (addr < vm_map_min(&vms->vm_map) || 28305ba50f5SJake Burkholder addr + size > vm_map_max(&vms->vm_map)) 284df8bae1dSRodney W. Grimes return (EINVAL); 285bbc0ec52SDavid Greenman if (addr + size < addr) 286df8bae1dSRodney W. Grimes return (EINVAL); 287edb572a3SJohn Baldwin #ifdef MAP_32BIT 288edb572a3SJohn Baldwin if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR) 289edb572a3SJohn Baldwin return (EINVAL); 290edb572a3SJohn Baldwin } else if (flags & MAP_32BIT) { 291edb572a3SJohn Baldwin /* 292edb572a3SJohn Baldwin * For MAP_32BIT, override the hint if it is too high and 293edb572a3SJohn Baldwin * do not bother moving the mapping past the heap (since 294edb572a3SJohn Baldwin * the heap is usually above 2GB). 295edb572a3SJohn Baldwin */ 296edb572a3SJohn Baldwin if (addr + size > MAP_32BIT_MAX_ADDR) 297edb572a3SJohn Baldwin addr = 0; 298edb572a3SJohn Baldwin #endif 29991d5354aSJohn Baldwin } else { 300df8bae1dSRodney W. Grimes /* 30154f42e4bSPeter Wemm * XXX for non-fixed mappings where no hint is provided or 30254f42e4bSPeter Wemm * the hint would fall in the potential heap space, 30354f42e4bSPeter Wemm * place it after the end of the largest possible heap. 304df8bae1dSRodney W. Grimes * 30554f42e4bSPeter Wemm * There should really be a pmap call to determine a reasonable 30654f42e4bSPeter Wemm * location. 307df8bae1dSRodney W. Grimes */ 30891d5354aSJohn Baldwin if (addr == 0 || 3091f6889a1SMatthew Dillon (addr >= round_page((vm_offset_t)vms->vm_taddr) && 310c460ac3aSPeter Wemm addr < round_page((vm_offset_t)vms->vm_daddr + 311cd336badSMateusz Guzik lim_max(td, RLIMIT_DATA)))) 312c460ac3aSPeter Wemm addr = round_page((vm_offset_t)vms->vm_daddr + 313cd336badSMateusz Guzik lim_max(td, RLIMIT_DATA)); 31491d5354aSJohn Baldwin } 3157077c426SJohn Baldwin if (size == 0) { 3167077c426SJohn Baldwin /* 3177077c426SJohn Baldwin * Return success without mapping anything for old 3187077c426SJohn Baldwin * binaries that request a page-aligned mapping of 3197077c426SJohn Baldwin * length 0. For modern binaries, this function 3207077c426SJohn Baldwin * returns an error earlier. 3217077c426SJohn Baldwin */ 3227077c426SJohn Baldwin error = 0; 32319bd0d9cSKonstantin Belousov } else if ((flags & MAP_GUARD) != 0) { 32419bd0d9cSKonstantin Belousov error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE, 32519bd0d9cSKonstantin Belousov VM_PROT_NONE, flags, NULL, pos, FALSE, td); 32619bd0d9cSKonstantin Belousov } else if ((flags & MAP_ANON) != 0) { 327df8bae1dSRodney W. Grimes /* 328df8bae1dSRodney W. Grimes * Mapping blank space is trivial. 3297077c426SJohn Baldwin * 3307077c426SJohn Baldwin * This relies on VM_PROT_* matching PROT_*. 331df8bae1dSRodney W. Grimes */ 3327077c426SJohn Baldwin error = vm_mmap_object(&vms->vm_map, &addr, size, prot, 3337077c426SJohn Baldwin VM_PROT_ALL, flags, NULL, pos, FALSE, td); 33430d4dd7eSAlexander Kabaev } else { 335df8bae1dSRodney W. Grimes /* 336a9d2f8d8SRobert Watson * Mapping file, get fp for validation and don't let the 337a9d2f8d8SRobert Watson * descriptor disappear on us if we block. Check capability 338a9d2f8d8SRobert Watson * rights, but also return the maximum rights to be combined 339a9d2f8d8SRobert Watson * with maxprot later. 340df8bae1dSRodney W. Grimes */ 3417008be5bSPawel Jakub Dawidek cap_rights_init(&rights, CAP_MMAP); 342a9d2f8d8SRobert Watson if (prot & PROT_READ) 3437008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_R); 344a9d2f8d8SRobert Watson if ((flags & MAP_SHARED) != 0) { 345a9d2f8d8SRobert Watson if (prot & PROT_WRITE) 3467008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_W); 347a9d2f8d8SRobert Watson } 348a9d2f8d8SRobert Watson if (prot & PROT_EXEC) 3497008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_X); 35069cdfcefSEdward Tomasz Napierala error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp); 3517008be5bSPawel Jakub Dawidek if (error != 0) 352426da3bcSAlfred Perlstein goto done; 35310204535SKonstantin Belousov if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 && 35410204535SKonstantin Belousov td->td_proc->p_osrel >= P_OSREL_MAP_FSTRICT) { 35510204535SKonstantin Belousov error = EINVAL; 35610204535SKonstantin Belousov goto done; 35710204535SKonstantin Belousov } 3585fd3f8b3SJohn Baldwin 3595fd3f8b3SJohn Baldwin /* This relies on VM_PROT_* matching PROT_*. */ 3607077c426SJohn Baldwin error = fo_mmap(fp, &vms->vm_map, &addr, size, prot, 3617077c426SJohn Baldwin cap_maxprot, flags, pos, td); 36249874f6eSJoseph Koshy } 3637077c426SJohn Baldwin 364df8bae1dSRodney W. Grimes if (error == 0) 365b40ce416SJulian Elischer td->td_retval[0] = (register_t) (addr + pageoff); 366279d7226SMatthew Dillon done: 367279d7226SMatthew Dillon if (fp) 368b40ce416SJulian Elischer fdrop(fp, td); 369f6b5b182SJeff Roberson 370df8bae1dSRodney W. Grimes return (error); 371df8bae1dSRodney W. Grimes } 372df8bae1dSRodney W. Grimes 3730538aafcSKonstantin Belousov #if defined(COMPAT_FREEBSD6) 374c2815ad5SPeter Wemm int 375c2815ad5SPeter Wemm freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 376c2815ad5SPeter Wemm { 377c2815ad5SPeter Wemm 378496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot, 379496ab053SKonstantin Belousov uap->flags, uap->fd, uap->pos)); 380c2815ad5SPeter Wemm } 3810538aafcSKonstantin Belousov #endif 382c2815ad5SPeter Wemm 38305f0fdd2SPoul-Henning Kamp #ifdef COMPAT_43 384d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 38505f0fdd2SPoul-Henning Kamp struct ommap_args { 38605f0fdd2SPoul-Henning Kamp caddr_t addr; 38705f0fdd2SPoul-Henning Kamp int len; 38805f0fdd2SPoul-Henning Kamp int prot; 38905f0fdd2SPoul-Henning Kamp int flags; 39005f0fdd2SPoul-Henning Kamp int fd; 39105f0fdd2SPoul-Henning Kamp long pos; 39205f0fdd2SPoul-Henning Kamp }; 393d2d3e875SBruce Evans #endif 39405f0fdd2SPoul-Henning Kamp int 39569cdfcefSEdward Tomasz Napierala ommap(struct thread *td, struct ommap_args *uap) 39605f0fdd2SPoul-Henning Kamp { 39705f0fdd2SPoul-Henning Kamp static const char cvtbsdprot[8] = { 39805f0fdd2SPoul-Henning Kamp 0, 39905f0fdd2SPoul-Henning Kamp PROT_EXEC, 40005f0fdd2SPoul-Henning Kamp PROT_WRITE, 40105f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE, 40205f0fdd2SPoul-Henning Kamp PROT_READ, 40305f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_READ, 40405f0fdd2SPoul-Henning Kamp PROT_WRITE | PROT_READ, 40505f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE | PROT_READ, 40605f0fdd2SPoul-Henning Kamp }; 40769cdfcefSEdward Tomasz Napierala int flags, prot; 4080d94caffSDavid Greenman 40905f0fdd2SPoul-Henning Kamp #define OMAP_ANON 0x0002 41005f0fdd2SPoul-Henning Kamp #define OMAP_COPY 0x0020 41105f0fdd2SPoul-Henning Kamp #define OMAP_SHARED 0x0010 41205f0fdd2SPoul-Henning Kamp #define OMAP_FIXED 0x0100 41305f0fdd2SPoul-Henning Kamp 41469cdfcefSEdward Tomasz Napierala prot = cvtbsdprot[uap->prot & 0x7]; 415ee4116b8SKonstantin Belousov #ifdef COMPAT_FREEBSD32 416e7d939bdSMarcel Moolenaar #if defined(__amd64__) 417ee4116b8SKonstantin Belousov if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && 41869cdfcefSEdward Tomasz Napierala prot != 0) 41969cdfcefSEdward Tomasz Napierala prot |= PROT_EXEC; 420ee4116b8SKonstantin Belousov #endif 421ee4116b8SKonstantin Belousov #endif 42269cdfcefSEdward Tomasz Napierala flags = 0; 42305f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_ANON) 42469cdfcefSEdward Tomasz Napierala flags |= MAP_ANON; 42505f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_COPY) 42669cdfcefSEdward Tomasz Napierala flags |= MAP_COPY; 42705f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_SHARED) 42869cdfcefSEdward Tomasz Napierala flags |= MAP_SHARED; 42905f0fdd2SPoul-Henning Kamp else 43069cdfcefSEdward Tomasz Napierala flags |= MAP_PRIVATE; 43105f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_FIXED) 43269cdfcefSEdward Tomasz Napierala flags |= MAP_FIXED; 433496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, flags, 434496ab053SKonstantin Belousov uap->fd, uap->pos)); 43505f0fdd2SPoul-Henning Kamp } 43605f0fdd2SPoul-Henning Kamp #endif /* COMPAT_43 */ 43705f0fdd2SPoul-Henning Kamp 43805f0fdd2SPoul-Henning Kamp 439d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 440df8bae1dSRodney W. Grimes struct msync_args { 441651bb817SAlexander Langer void *addr; 442c899450bSPeter Wemm size_t len; 443e6c6af11SDavid Greenman int flags; 444df8bae1dSRodney W. Grimes }; 445d2d3e875SBruce Evans #endif 446df8bae1dSRodney W. Grimes int 44769cdfcefSEdward Tomasz Napierala sys_msync(struct thread *td, struct msync_args *uap) 448df8bae1dSRodney W. Grimes { 44969cdfcefSEdward Tomasz Napierala 450496ab053SKonstantin Belousov return (kern_msync(td, (uintptr_t)uap->addr, uap->len, uap->flags)); 45169cdfcefSEdward Tomasz Napierala } 45269cdfcefSEdward Tomasz Napierala 45369cdfcefSEdward Tomasz Napierala int 454496ab053SKonstantin Belousov kern_msync(struct thread *td, uintptr_t addr0, size_t size, int flags) 45569cdfcefSEdward Tomasz Napierala { 456496ab053SKonstantin Belousov vm_offset_t addr; 45769cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 458df8bae1dSRodney W. Grimes vm_map_t map; 459df8bae1dSRodney W. Grimes int rv; 460df8bae1dSRodney W. Grimes 461496ab053SKonstantin Belousov addr = addr0; 462dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 463dabee6feSPeter Wemm addr -= pageoff; 464dabee6feSPeter Wemm size += pageoff; 465dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 4669154ee6aSPeter Wemm if (addr + size < addr) 467dabee6feSPeter Wemm return (EINVAL); 468dabee6feSPeter Wemm 469dabee6feSPeter Wemm if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 4701e62bc63SDavid Greenman return (EINVAL); 4711e62bc63SDavid Greenman 472b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 4739154ee6aSPeter Wemm 474df8bae1dSRodney W. Grimes /* 475df8bae1dSRodney W. Grimes * Clean the pages and interpret the return value. 476df8bae1dSRodney W. Grimes */ 477950f8459SAlan Cox rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 478e6c6af11SDavid Greenman (flags & MS_INVALIDATE) != 0); 479df8bae1dSRodney W. Grimes switch (rv) { 480df8bae1dSRodney W. Grimes case KERN_SUCCESS: 481d2c60af8SMatthew Dillon return (0); 482df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 483e103f5b1SPeter Holm return (ENOMEM); 484b7b7cd44SAlan Cox case KERN_INVALID_ARGUMENT: 485b7b7cd44SAlan Cox return (EBUSY); 486126d6082SKonstantin Belousov case KERN_FAILURE: 487126d6082SKonstantin Belousov return (EIO); 488df8bae1dSRodney W. Grimes default: 489df8bae1dSRodney W. Grimes return (EINVAL); 490df8bae1dSRodney W. Grimes } 491df8bae1dSRodney W. Grimes } 492df8bae1dSRodney W. Grimes 493d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 494df8bae1dSRodney W. Grimes struct munmap_args { 495651bb817SAlexander Langer void *addr; 4969154ee6aSPeter Wemm size_t len; 497df8bae1dSRodney W. Grimes }; 498d2d3e875SBruce Evans #endif 499df8bae1dSRodney W. Grimes int 50069cdfcefSEdward Tomasz Napierala sys_munmap(struct thread *td, struct munmap_args *uap) 50169cdfcefSEdward Tomasz Napierala { 50269cdfcefSEdward Tomasz Napierala 503496ab053SKonstantin Belousov return (kern_munmap(td, (uintptr_t)uap->addr, uap->len)); 50469cdfcefSEdward Tomasz Napierala } 50569cdfcefSEdward Tomasz Napierala 50669cdfcefSEdward Tomasz Napierala int 507496ab053SKonstantin Belousov kern_munmap(struct thread *td, uintptr_t addr0, size_t size) 508df8bae1dSRodney W. Grimes { 50949874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 51049874f6eSJoseph Koshy struct pmckern_map_out pkm; 51149874f6eSJoseph Koshy vm_map_entry_t entry; 512736ff8c3SMateusz Guzik bool pmc_handled; 51349874f6eSJoseph Koshy #endif 514496ab053SKonstantin Belousov vm_offset_t addr; 51569cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 516df8bae1dSRodney W. Grimes vm_map_t map; 517df8bae1dSRodney W. Grimes 518d8834602SAlan Cox if (size == 0) 519d8834602SAlan Cox return (EINVAL); 520dabee6feSPeter Wemm 521496ab053SKonstantin Belousov addr = addr0; 522dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 523dabee6feSPeter Wemm addr -= pageoff; 524dabee6feSPeter Wemm size += pageoff; 525dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5269154ee6aSPeter Wemm if (addr + size < addr) 527df8bae1dSRodney W. Grimes return (EINVAL); 5289154ee6aSPeter Wemm 529df8bae1dSRodney W. Grimes /* 53005ba50f5SJake Burkholder * Check for illegal addresses. Watch out for address wrap... 531df8bae1dSRodney W. Grimes */ 532b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 53305ba50f5SJake Burkholder if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 53405ba50f5SJake Burkholder return (EINVAL); 535d8834602SAlan Cox vm_map_lock(map); 53649874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 537736ff8c3SMateusz Guzik pmc_handled = false; 538736ff8c3SMateusz Guzik if (PMC_HOOK_INSTALLED(PMC_FN_MUNMAP)) { 539736ff8c3SMateusz Guzik pmc_handled = true; 54049874f6eSJoseph Koshy /* 54149874f6eSJoseph Koshy * Inform hwpmc if the address range being unmapped contains 54249874f6eSJoseph Koshy * an executable region. 54349874f6eSJoseph Koshy */ 5440d419640SRyan Stone pkm.pm_address = (uintptr_t) NULL; 54549874f6eSJoseph Koshy if (vm_map_lookup_entry(map, addr, &entry)) { 546*1c5196c3SKonstantin Belousov for (; entry->start < addr + size; 54749874f6eSJoseph Koshy entry = entry->next) { 54849874f6eSJoseph Koshy if (vm_map_check_protection(map, entry->start, 54949874f6eSJoseph Koshy entry->end, VM_PROT_EXECUTE) == TRUE) { 55049874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 55149874f6eSJoseph Koshy pkm.pm_size = (size_t) size; 55249874f6eSJoseph Koshy break; 55349874f6eSJoseph Koshy } 55449874f6eSJoseph Koshy } 55549874f6eSJoseph Koshy } 556736ff8c3SMateusz Guzik } 55749874f6eSJoseph Koshy #endif 558655c3490SKonstantin Belousov vm_map_delete(map, addr, addr + size); 5590d419640SRyan Stone 5600d419640SRyan Stone #ifdef HWPMC_HOOKS 561736ff8c3SMateusz Guzik if (__predict_false(pmc_handled)) { 5620d419640SRyan Stone /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 5630d419640SRyan Stone vm_map_lock_downgrade(map); 564d473d3a1SRyan Stone if (pkm.pm_address != (uintptr_t) NULL) 5650d419640SRyan Stone PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 5660d419640SRyan Stone vm_map_unlock_read(map); 567736ff8c3SMateusz Guzik } else 5680d419640SRyan Stone #endif 569736ff8c3SMateusz Guzik vm_map_unlock(map); 570736ff8c3SMateusz Guzik 5710d419640SRyan Stone /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 572df8bae1dSRodney W. Grimes return (0); 573df8bae1dSRodney W. Grimes } 574df8bae1dSRodney W. Grimes 575d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 576df8bae1dSRodney W. Grimes struct mprotect_args { 577651bb817SAlexander Langer const void *addr; 5789154ee6aSPeter Wemm size_t len; 579df8bae1dSRodney W. Grimes int prot; 580df8bae1dSRodney W. Grimes }; 581d2d3e875SBruce Evans #endif 582df8bae1dSRodney W. Grimes int 58369cdfcefSEdward Tomasz Napierala sys_mprotect(struct thread *td, struct mprotect_args *uap) 584df8bae1dSRodney W. Grimes { 585df8bae1dSRodney W. Grimes 586496ab053SKonstantin Belousov return (kern_mprotect(td, (uintptr_t)uap->addr, uap->len, uap->prot)); 58769cdfcefSEdward Tomasz Napierala } 588df8bae1dSRodney W. Grimes 58969cdfcefSEdward Tomasz Napierala int 590496ab053SKonstantin Belousov kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot) 59169cdfcefSEdward Tomasz Napierala { 592496ab053SKonstantin Belousov vm_offset_t addr; 59369cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 59469cdfcefSEdward Tomasz Napierala 595496ab053SKonstantin Belousov addr = addr0; 59669cdfcefSEdward Tomasz Napierala prot = (prot & VM_PROT_ALL); 597dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 598dabee6feSPeter Wemm addr -= pageoff; 599dabee6feSPeter Wemm size += pageoff; 600dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6019154ee6aSPeter Wemm if (addr + size < addr) 602dabee6feSPeter Wemm return (EINVAL); 603dabee6feSPeter Wemm 60443285049SAlan Cox switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 60543285049SAlan Cox addr + size, prot, FALSE)) { 606df8bae1dSRodney W. Grimes case KERN_SUCCESS: 607df8bae1dSRodney W. Grimes return (0); 608df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 609df8bae1dSRodney W. Grimes return (EACCES); 6103364c323SKonstantin Belousov case KERN_RESOURCE_SHORTAGE: 6113364c323SKonstantin Belousov return (ENOMEM); 612df8bae1dSRodney W. Grimes } 613df8bae1dSRodney W. Grimes return (EINVAL); 614df8bae1dSRodney W. Grimes } 615df8bae1dSRodney W. Grimes 616d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 617dabee6feSPeter Wemm struct minherit_args { 618651bb817SAlexander Langer void *addr; 6199154ee6aSPeter Wemm size_t len; 620dabee6feSPeter Wemm int inherit; 621dabee6feSPeter Wemm }; 622dabee6feSPeter Wemm #endif 623dabee6feSPeter Wemm int 62404e89ffbSKonstantin Belousov sys_minherit(struct thread *td, struct minherit_args *uap) 625dabee6feSPeter Wemm { 626dabee6feSPeter Wemm vm_offset_t addr; 627dabee6feSPeter Wemm vm_size_t size, pageoff; 62854d92145SMatthew Dillon vm_inherit_t inherit; 629dabee6feSPeter Wemm 630dabee6feSPeter Wemm addr = (vm_offset_t)uap->addr; 6319154ee6aSPeter Wemm size = uap->len; 632dabee6feSPeter Wemm inherit = uap->inherit; 633dabee6feSPeter Wemm 634dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 635dabee6feSPeter Wemm addr -= pageoff; 636dabee6feSPeter Wemm size += pageoff; 637dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6389154ee6aSPeter Wemm if (addr + size < addr) 639dabee6feSPeter Wemm return (EINVAL); 640dabee6feSPeter Wemm 641e0be79afSAlan Cox switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 642e0be79afSAlan Cox addr + size, inherit)) { 643dabee6feSPeter Wemm case KERN_SUCCESS: 644dabee6feSPeter Wemm return (0); 645dabee6feSPeter Wemm case KERN_PROTECTION_FAILURE: 646dabee6feSPeter Wemm return (EACCES); 647dabee6feSPeter Wemm } 648dabee6feSPeter Wemm return (EINVAL); 649dabee6feSPeter Wemm } 650dabee6feSPeter Wemm 651dabee6feSPeter Wemm #ifndef _SYS_SYSPROTO_H_ 652df8bae1dSRodney W. Grimes struct madvise_args { 653651bb817SAlexander Langer void *addr; 6549154ee6aSPeter Wemm size_t len; 655df8bae1dSRodney W. Grimes int behav; 656df8bae1dSRodney W. Grimes }; 657d2d3e875SBruce Evans #endif 6580d94caffSDavid Greenman 659df8bae1dSRodney W. Grimes int 66004e89ffbSKonstantin Belousov sys_madvise(struct thread *td, struct madvise_args *uap) 661df8bae1dSRodney W. Grimes { 66269cdfcefSEdward Tomasz Napierala 663496ab053SKonstantin Belousov return (kern_madvise(td, (uintptr_t)uap->addr, uap->len, uap->behav)); 66469cdfcefSEdward Tomasz Napierala } 66569cdfcefSEdward Tomasz Napierala 66669cdfcefSEdward Tomasz Napierala int 667496ab053SKonstantin Belousov kern_madvise(struct thread *td, uintptr_t addr0, size_t len, int behav) 66869cdfcefSEdward Tomasz Napierala { 66905ba50f5SJake Burkholder vm_map_t map; 670496ab053SKonstantin Belousov vm_offset_t addr, end, start; 67155648840SJohn Baldwin int flags; 672b4309055SMatthew Dillon 673b4309055SMatthew Dillon /* 674f4cf2141SWes Peters * Check for our special case, advising the swap pager we are 675f4cf2141SWes Peters * "immortal." 676f4cf2141SWes Peters */ 67769cdfcefSEdward Tomasz Napierala if (behav == MADV_PROTECT) { 67855648840SJohn Baldwin flags = PPROT_SET; 67955648840SJohn Baldwin return (kern_procctl(td, P_PID, td->td_proc->p_pid, 68055648840SJohn Baldwin PROC_SPROTECT, &flags)); 68169297bf8SJohn Baldwin } 68255648840SJohn Baldwin 683f4cf2141SWes Peters /* 684b4309055SMatthew Dillon * Check for illegal behavior 685b4309055SMatthew Dillon */ 68669cdfcefSEdward Tomasz Napierala if (behav < 0 || behav > MADV_CORE) 687b4309055SMatthew Dillon return (EINVAL); 688867a482dSJohn Dyson /* 689867a482dSJohn Dyson * Check for illegal addresses. Watch out for address wrap... Note 690867a482dSJohn Dyson * that VM_*_ADDRESS are not constants due to casts (argh). 691867a482dSJohn Dyson */ 69205ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 693496ab053SKonstantin Belousov addr = addr0; 69469cdfcefSEdward Tomasz Napierala if (addr < vm_map_min(map) || addr + len > vm_map_max(map)) 695867a482dSJohn Dyson return (EINVAL); 69669cdfcefSEdward Tomasz Napierala if ((addr + len) < addr) 697867a482dSJohn Dyson return (EINVAL); 698867a482dSJohn Dyson 699867a482dSJohn Dyson /* 700867a482dSJohn Dyson * Since this routine is only advisory, we default to conservative 701867a482dSJohn Dyson * behavior. 702867a482dSJohn Dyson */ 70369cdfcefSEdward Tomasz Napierala start = trunc_page(addr); 70469cdfcefSEdward Tomasz Napierala end = round_page(addr + len); 705867a482dSJohn Dyson 70669cdfcefSEdward Tomasz Napierala if (vm_map_madvise(map, start, end, behav)) 707094f6d26SAlan Cox return (EINVAL); 708094f6d26SAlan Cox return (0); 709df8bae1dSRodney W. Grimes } 710df8bae1dSRodney W. Grimes 711d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 712df8bae1dSRodney W. Grimes struct mincore_args { 713651bb817SAlexander Langer const void *addr; 7149154ee6aSPeter Wemm size_t len; 715df8bae1dSRodney W. Grimes char *vec; 716df8bae1dSRodney W. Grimes }; 717d2d3e875SBruce Evans #endif 7180d94caffSDavid Greenman 719df8bae1dSRodney W. Grimes int 72004e89ffbSKonstantin Belousov sys_mincore(struct thread *td, struct mincore_args *uap) 721df8bae1dSRodney W. Grimes { 72246dc8e9dSDmitry Chagin 72346dc8e9dSDmitry Chagin return (kern_mincore(td, (uintptr_t)uap->addr, uap->len, uap->vec)); 72446dc8e9dSDmitry Chagin } 72546dc8e9dSDmitry Chagin 72646dc8e9dSDmitry Chagin int 72746dc8e9dSDmitry Chagin kern_mincore(struct thread *td, uintptr_t addr0, size_t len, char *vec) 72846dc8e9dSDmitry Chagin { 729867a482dSJohn Dyson vm_offset_t addr, first_addr; 730867a482dSJohn Dyson vm_offset_t end, cend; 731867a482dSJohn Dyson pmap_t pmap; 732867a482dSJohn Dyson vm_map_t map; 733d2c60af8SMatthew Dillon int error = 0; 734867a482dSJohn Dyson int vecindex, lastvecindex; 73554d92145SMatthew Dillon vm_map_entry_t current; 736867a482dSJohn Dyson vm_map_entry_t entry; 737567e51e1SAlan Cox vm_object_t object; 738567e51e1SAlan Cox vm_paddr_t locked_pa; 739567e51e1SAlan Cox vm_page_t m; 740567e51e1SAlan Cox vm_pindex_t pindex; 741867a482dSJohn Dyson int mincoreinfo; 742dd2622a8SAlan Cox unsigned int timestamp; 743567e51e1SAlan Cox boolean_t locked; 744df8bae1dSRodney W. Grimes 745867a482dSJohn Dyson /* 746867a482dSJohn Dyson * Make sure that the addresses presented are valid for user 747867a482dSJohn Dyson * mode. 748867a482dSJohn Dyson */ 74946dc8e9dSDmitry Chagin first_addr = addr = trunc_page(addr0); 75046dc8e9dSDmitry Chagin end = addr + (vm_size_t)round_page(len); 75105ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 75205ba50f5SJake Burkholder if (end > vm_map_max(map) || end < addr) 753455dd7d4SKonstantin Belousov return (ENOMEM); 75402c04a2fSJohn Dyson 755b40ce416SJulian Elischer pmap = vmspace_pmap(td->td_proc->p_vmspace); 756867a482dSJohn Dyson 757eff50fcdSAlan Cox vm_map_lock_read(map); 758dd2622a8SAlan Cox RestartScan: 759dd2622a8SAlan Cox timestamp = map->timestamp; 760867a482dSJohn Dyson 761455dd7d4SKonstantin Belousov if (!vm_map_lookup_entry(map, addr, &entry)) { 762455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 763455dd7d4SKonstantin Belousov return (ENOMEM); 764455dd7d4SKonstantin Belousov } 765867a482dSJohn Dyson 766867a482dSJohn Dyson /* 767867a482dSJohn Dyson * Do this on a map entry basis so that if the pages are not 768867a482dSJohn Dyson * in the current processes address space, we can easily look 769867a482dSJohn Dyson * up the pages elsewhere. 770867a482dSJohn Dyson */ 771867a482dSJohn Dyson lastvecindex = -1; 772*1c5196c3SKonstantin Belousov for (current = entry; current->start < end; current = current->next) { 773867a482dSJohn Dyson 774867a482dSJohn Dyson /* 775455dd7d4SKonstantin Belousov * check for contiguity 776455dd7d4SKonstantin Belousov */ 777*1c5196c3SKonstantin Belousov if (current->end < end && current->next->start > current->end) { 778455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 779455dd7d4SKonstantin Belousov return (ENOMEM); 780455dd7d4SKonstantin Belousov } 781455dd7d4SKonstantin Belousov 782455dd7d4SKonstantin Belousov /* 783867a482dSJohn Dyson * ignore submaps (for now) or null objects 784867a482dSJohn Dyson */ 7859fdfe602SMatthew Dillon if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 786867a482dSJohn Dyson current->object.vm_object == NULL) 787867a482dSJohn Dyson continue; 788867a482dSJohn Dyson 789867a482dSJohn Dyson /* 790867a482dSJohn Dyson * limit this scan to the current map entry and the 791867a482dSJohn Dyson * limits for the mincore call 792867a482dSJohn Dyson */ 793867a482dSJohn Dyson if (addr < current->start) 794867a482dSJohn Dyson addr = current->start; 795867a482dSJohn Dyson cend = current->end; 796867a482dSJohn Dyson if (cend > end) 797867a482dSJohn Dyson cend = end; 798867a482dSJohn Dyson 799867a482dSJohn Dyson /* 800867a482dSJohn Dyson * scan this entry one page at a time 801867a482dSJohn Dyson */ 802867a482dSJohn Dyson while (addr < cend) { 803867a482dSJohn Dyson /* 804867a482dSJohn Dyson * Check pmap first, it is likely faster, also 805867a482dSJohn Dyson * it can provide info as to whether we are the 806867a482dSJohn Dyson * one referencing or modifying the page. 807867a482dSJohn Dyson */ 808567e51e1SAlan Cox object = NULL; 809567e51e1SAlan Cox locked_pa = 0; 810567e51e1SAlan Cox retry: 811567e51e1SAlan Cox m = NULL; 812567e51e1SAlan Cox mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 813567e51e1SAlan Cox if (locked_pa != 0) { 814867a482dSJohn Dyson /* 815567e51e1SAlan Cox * The page is mapped by this process but not 816567e51e1SAlan Cox * both accessed and modified. It is also 817567e51e1SAlan Cox * managed. Acquire the object lock so that 818567e51e1SAlan Cox * other mappings might be examined. 819867a482dSJohn Dyson */ 820567e51e1SAlan Cox m = PHYS_TO_VM_PAGE(locked_pa); 821567e51e1SAlan Cox if (m->object != object) { 822567e51e1SAlan Cox if (object != NULL) 82389f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 824567e51e1SAlan Cox object = m->object; 82589f6b863SAttilio Rao locked = VM_OBJECT_TRYWLOCK(object); 826567e51e1SAlan Cox vm_page_unlock(m); 827567e51e1SAlan Cox if (!locked) { 82889f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 8292965a453SKip Macy vm_page_lock(m); 830567e51e1SAlan Cox goto retry; 831567e51e1SAlan Cox } 832567e51e1SAlan Cox } else 833567e51e1SAlan Cox vm_page_unlock(m); 834567e51e1SAlan Cox KASSERT(m->valid == VM_PAGE_BITS_ALL, 835567e51e1SAlan Cox ("mincore: page %p is mapped but invalid", 836567e51e1SAlan Cox m)); 837567e51e1SAlan Cox } else if (mincoreinfo == 0) { 838567e51e1SAlan Cox /* 839567e51e1SAlan Cox * The page is not mapped by this process. If 840567e51e1SAlan Cox * the object implements managed pages, then 841567e51e1SAlan Cox * determine if the page is resident so that 842567e51e1SAlan Cox * the mappings might be examined. 843567e51e1SAlan Cox */ 844567e51e1SAlan Cox if (current->object.vm_object != object) { 845567e51e1SAlan Cox if (object != NULL) 84689f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 847567e51e1SAlan Cox object = current->object.vm_object; 84889f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 849567e51e1SAlan Cox } 850567e51e1SAlan Cox if (object->type == OBJT_DEFAULT || 851567e51e1SAlan Cox object->type == OBJT_SWAP || 852567e51e1SAlan Cox object->type == OBJT_VNODE) { 853567e51e1SAlan Cox pindex = OFF_TO_IDX(current->offset + 854567e51e1SAlan Cox (addr - current->start)); 855567e51e1SAlan Cox m = vm_page_lookup(object, pindex); 856567e51e1SAlan Cox if (m != NULL && m->valid == 0) 857567e51e1SAlan Cox m = NULL; 858567e51e1SAlan Cox if (m != NULL) 859567e51e1SAlan Cox mincoreinfo = MINCORE_INCORE; 860567e51e1SAlan Cox } 861567e51e1SAlan Cox } 862567e51e1SAlan Cox if (m != NULL) { 863567e51e1SAlan Cox /* Examine other mappings to the page. */ 864567e51e1SAlan Cox if (m->dirty == 0 && pmap_is_modified(m)) 865567e51e1SAlan Cox vm_page_dirty(m); 866567e51e1SAlan Cox if (m->dirty != 0) 867867a482dSJohn Dyson mincoreinfo |= MINCORE_MODIFIED_OTHER; 868c46b90e9SAlan Cox /* 8693407fefeSKonstantin Belousov * The first test for PGA_REFERENCED is an 870c46b90e9SAlan Cox * optimization. The second test is 871c46b90e9SAlan Cox * required because a concurrent pmap 872c46b90e9SAlan Cox * operation could clear the last reference 8733407fefeSKonstantin Belousov * and set PGA_REFERENCED before the call to 874c46b90e9SAlan Cox * pmap_is_referenced(). 875c46b90e9SAlan Cox */ 8763407fefeSKonstantin Belousov if ((m->aflags & PGA_REFERENCED) != 0 || 877c46b90e9SAlan Cox pmap_is_referenced(m) || 8783407fefeSKonstantin Belousov (m->aflags & PGA_REFERENCED) != 0) 879867a482dSJohn Dyson mincoreinfo |= MINCORE_REFERENCED_OTHER; 8809b5a5d81SJohn Dyson } 881567e51e1SAlan Cox if (object != NULL) 88289f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 883867a482dSJohn Dyson 884867a482dSJohn Dyson /* 885dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 886dd2622a8SAlan Cox * the map, we release the lock. 887dd2622a8SAlan Cox */ 888dd2622a8SAlan Cox vm_map_unlock_read(map); 889dd2622a8SAlan Cox 890dd2622a8SAlan Cox /* 891867a482dSJohn Dyson * calculate index into user supplied byte vector 892867a482dSJohn Dyson */ 893d1780e8dSKonstantin Belousov vecindex = atop(addr - first_addr); 894867a482dSJohn Dyson 895867a482dSJohn Dyson /* 896867a482dSJohn Dyson * If we have skipped map entries, we need to make sure that 897867a482dSJohn Dyson * the byte vector is zeroed for those skipped entries. 898867a482dSJohn Dyson */ 899867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 9006a87d217SJohn Baldwin ++lastvecindex; 901867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 902867a482dSJohn Dyson if (error) { 903d2c60af8SMatthew Dillon error = EFAULT; 904d2c60af8SMatthew Dillon goto done2; 905867a482dSJohn Dyson } 906867a482dSJohn Dyson } 907867a482dSJohn Dyson 908867a482dSJohn Dyson /* 909867a482dSJohn Dyson * Pass the page information to the user 910867a482dSJohn Dyson */ 911867a482dSJohn Dyson error = subyte(vec + vecindex, mincoreinfo); 912867a482dSJohn Dyson if (error) { 913d2c60af8SMatthew Dillon error = EFAULT; 914d2c60af8SMatthew Dillon goto done2; 915867a482dSJohn Dyson } 916dd2622a8SAlan Cox 917dd2622a8SAlan Cox /* 918dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 919dd2622a8SAlan Cox * output may be invalid. 920dd2622a8SAlan Cox */ 921dd2622a8SAlan Cox vm_map_lock_read(map); 922dd2622a8SAlan Cox if (timestamp != map->timestamp) 923dd2622a8SAlan Cox goto RestartScan; 924dd2622a8SAlan Cox 925867a482dSJohn Dyson lastvecindex = vecindex; 92602c04a2fSJohn Dyson addr += PAGE_SIZE; 92702c04a2fSJohn Dyson } 928867a482dSJohn Dyson } 929867a482dSJohn Dyson 930867a482dSJohn Dyson /* 931dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 932dd2622a8SAlan Cox * the map, we release the lock. 933dd2622a8SAlan Cox */ 934dd2622a8SAlan Cox vm_map_unlock_read(map); 935dd2622a8SAlan Cox 936dd2622a8SAlan Cox /* 937867a482dSJohn Dyson * Zero the last entries in the byte vector. 938867a482dSJohn Dyson */ 939d1780e8dSKonstantin Belousov vecindex = atop(end - first_addr); 940867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 9416a87d217SJohn Baldwin ++lastvecindex; 942867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 943867a482dSJohn Dyson if (error) { 944d2c60af8SMatthew Dillon error = EFAULT; 945d2c60af8SMatthew Dillon goto done2; 946867a482dSJohn Dyson } 947867a482dSJohn Dyson } 948867a482dSJohn Dyson 949dd2622a8SAlan Cox /* 950dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 951dd2622a8SAlan Cox * output may be invalid. 952dd2622a8SAlan Cox */ 953dd2622a8SAlan Cox vm_map_lock_read(map); 954dd2622a8SAlan Cox if (timestamp != map->timestamp) 955dd2622a8SAlan Cox goto RestartScan; 956eff50fcdSAlan Cox vm_map_unlock_read(map); 957d2c60af8SMatthew Dillon done2: 958d2c60af8SMatthew Dillon return (error); 959df8bae1dSRodney W. Grimes } 960df8bae1dSRodney W. Grimes 961d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 962df8bae1dSRodney W. Grimes struct mlock_args { 963651bb817SAlexander Langer const void *addr; 964df8bae1dSRodney W. Grimes size_t len; 965df8bae1dSRodney W. Grimes }; 966d2d3e875SBruce Evans #endif 967df8bae1dSRodney W. Grimes int 96804e89ffbSKonstantin Belousov sys_mlock(struct thread *td, struct mlock_args *uap) 969df8bae1dSRodney W. Grimes { 970995d7069SGleb Smirnoff 971496ab053SKonstantin Belousov return (kern_mlock(td->td_proc, td->td_ucred, 972496ab053SKonstantin Belousov __DECONST(uintptr_t, uap->addr), uap->len)); 973995d7069SGleb Smirnoff } 974995d7069SGleb Smirnoff 975995d7069SGleb Smirnoff int 976496ab053SKonstantin Belousov kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len) 977995d7069SGleb Smirnoff { 978bb734798SDon Lewis vm_offset_t addr, end, last, start; 979bb734798SDon Lewis vm_size_t npages, size; 9803ac7d297SAndrey Zonov vm_map_t map; 9811ba5ad42SEdward Tomasz Napierala unsigned long nsize; 982bb734798SDon Lewis int error; 983df8bae1dSRodney W. Grimes 984995d7069SGleb Smirnoff error = priv_check_cred(cred, PRIV_VM_MLOCK, 0); 98547934cefSDon Lewis if (error) 98647934cefSDon Lewis return (error); 987496ab053SKonstantin Belousov addr = addr0; 988995d7069SGleb Smirnoff size = len; 989bb734798SDon Lewis last = addr + size; 99016929939SDon Lewis start = trunc_page(addr); 991bb734798SDon Lewis end = round_page(last); 992bb734798SDon Lewis if (last < addr || end < addr) 993df8bae1dSRodney W. Grimes return (EINVAL); 99416929939SDon Lewis npages = atop(end - start); 99516929939SDon Lewis if (npages > vm_page_max_wired) 99616929939SDon Lewis return (ENOMEM); 9973ac7d297SAndrey Zonov map = &proc->p_vmspace->vm_map; 99847934cefSDon Lewis PROC_LOCK(proc); 9993ac7d297SAndrey Zonov nsize = ptoa(npages + pmap_wired_count(map->pmap)); 1000f6f6d240SMateusz Guzik if (nsize > lim_cur_proc(proc, RLIMIT_MEMLOCK)) { 100147934cefSDon Lewis PROC_UNLOCK(proc); 10024a40e3d4SJohn Dyson return (ENOMEM); 100391d5354aSJohn Baldwin } 100447934cefSDon Lewis PROC_UNLOCK(proc); 100544f1c916SBryan Drewery if (npages + vm_cnt.v_wire_count > vm_page_max_wired) 100616929939SDon Lewis return (EAGAIN); 1007afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10084b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 10091ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 10101ba5ad42SEdward Tomasz Napierala error = racct_set(proc, RACCT_MEMLOCK, nsize); 10111ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 10121ba5ad42SEdward Tomasz Napierala if (error != 0) 10131ba5ad42SEdward Tomasz Napierala return (ENOMEM); 10144b5c9cf6SEdward Tomasz Napierala } 1015afcc55f3SEdward Tomasz Napierala #endif 10163ac7d297SAndrey Zonov error = vm_map_wire(map, start, end, 101716929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1018afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10194b5c9cf6SEdward Tomasz Napierala if (racct_enable && error != KERN_SUCCESS) { 10201ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 10211ba5ad42SEdward Tomasz Napierala racct_set(proc, RACCT_MEMLOCK, 10223ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 10231ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 10241ba5ad42SEdward Tomasz Napierala } 1025afcc55f3SEdward Tomasz Napierala #endif 1026df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1027df8bae1dSRodney W. Grimes } 1028df8bae1dSRodney W. Grimes 1029d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 10304a40e3d4SJohn Dyson struct mlockall_args { 10314a40e3d4SJohn Dyson int how; 10324a40e3d4SJohn Dyson }; 10334a40e3d4SJohn Dyson #endif 10344a40e3d4SJohn Dyson 10354a40e3d4SJohn Dyson int 103604e89ffbSKonstantin Belousov sys_mlockall(struct thread *td, struct mlockall_args *uap) 10374a40e3d4SJohn Dyson { 1038abd498aaSBruce M Simpson vm_map_t map; 1039abd498aaSBruce M Simpson int error; 1040abd498aaSBruce M Simpson 1041abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 10427e19eda4SAndrey Zonov error = priv_check(td, PRIV_VM_MLOCK); 10437e19eda4SAndrey Zonov if (error) 10447e19eda4SAndrey Zonov return (error); 1045abd498aaSBruce M Simpson 1046abd498aaSBruce M Simpson if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1047abd498aaSBruce M Simpson return (EINVAL); 1048abd498aaSBruce M Simpson 1049abd498aaSBruce M Simpson /* 1050abd498aaSBruce M Simpson * If wiring all pages in the process would cause it to exceed 1051abd498aaSBruce M Simpson * a hard resource limit, return ENOMEM. 1052abd498aaSBruce M Simpson */ 10537e19eda4SAndrey Zonov if (!old_mlock && uap->how & MCL_CURRENT) { 105491d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 1055f6f6d240SMateusz Guzik if (map->size > lim_cur(td, RLIMIT_MEMLOCK)) { 105691d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1057abd498aaSBruce M Simpson return (ENOMEM); 105891d5354aSJohn Baldwin } 105991d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 10607e19eda4SAndrey Zonov } 1061afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10624b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 10631ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 10641ba5ad42SEdward Tomasz Napierala error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 10651ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 10661ba5ad42SEdward Tomasz Napierala if (error != 0) 10671ba5ad42SEdward Tomasz Napierala return (ENOMEM); 10684b5c9cf6SEdward Tomasz Napierala } 1069afcc55f3SEdward Tomasz Napierala #endif 1070abd498aaSBruce M Simpson 1071abd498aaSBruce M Simpson if (uap->how & MCL_FUTURE) { 1072abd498aaSBruce M Simpson vm_map_lock(map); 1073abd498aaSBruce M Simpson vm_map_modflags(map, MAP_WIREFUTURE, 0); 1074abd498aaSBruce M Simpson vm_map_unlock(map); 1075abd498aaSBruce M Simpson error = 0; 1076abd498aaSBruce M Simpson } 1077abd498aaSBruce M Simpson 1078abd498aaSBruce M Simpson if (uap->how & MCL_CURRENT) { 1079abd498aaSBruce M Simpson /* 1080abd498aaSBruce M Simpson * P1003.1-2001 mandates that all currently mapped pages 1081abd498aaSBruce M Simpson * will be memory resident and locked (wired) upon return 1082abd498aaSBruce M Simpson * from mlockall(). vm_map_wire() will wire pages, by 1083abd498aaSBruce M Simpson * calling vm_fault_wire() for each page in the region. 1084abd498aaSBruce M Simpson */ 1085abd498aaSBruce M Simpson error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1086abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1087abd498aaSBruce M Simpson error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1088abd498aaSBruce M Simpson } 1089afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10904b5c9cf6SEdward Tomasz Napierala if (racct_enable && error != KERN_SUCCESS) { 10911ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 10921ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 10933ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 10941ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 10951ba5ad42SEdward Tomasz Napierala } 1096afcc55f3SEdward Tomasz Napierala #endif 1097abd498aaSBruce M Simpson 1098abd498aaSBruce M Simpson return (error); 10994a40e3d4SJohn Dyson } 11004a40e3d4SJohn Dyson 11014a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1102fa721254SAlfred Perlstein struct munlockall_args { 1103abd498aaSBruce M Simpson register_t dummy; 11044a40e3d4SJohn Dyson }; 11054a40e3d4SJohn Dyson #endif 11064a40e3d4SJohn Dyson 11074a40e3d4SJohn Dyson int 110804e89ffbSKonstantin Belousov sys_munlockall(struct thread *td, struct munlockall_args *uap) 11094a40e3d4SJohn Dyson { 1110abd498aaSBruce M Simpson vm_map_t map; 1111abd498aaSBruce M Simpson int error; 1112abd498aaSBruce M Simpson 1113abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 1114acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 1115abd498aaSBruce M Simpson if (error) 1116abd498aaSBruce M Simpson return (error); 1117abd498aaSBruce M Simpson 1118abd498aaSBruce M Simpson /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1119abd498aaSBruce M Simpson vm_map_lock(map); 1120abd498aaSBruce M Simpson vm_map_modflags(map, 0, MAP_WIREFUTURE); 1121abd498aaSBruce M Simpson vm_map_unlock(map); 1122abd498aaSBruce M Simpson 1123abd498aaSBruce M Simpson /* Forcibly unwire all pages. */ 1124abd498aaSBruce M Simpson error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1125abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1126afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11274b5c9cf6SEdward Tomasz Napierala if (racct_enable && error == KERN_SUCCESS) { 11281ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11291ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 0); 11301ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11311ba5ad42SEdward Tomasz Napierala } 1132afcc55f3SEdward Tomasz Napierala #endif 1133abd498aaSBruce M Simpson 1134abd498aaSBruce M Simpson return (error); 11354a40e3d4SJohn Dyson } 11364a40e3d4SJohn Dyson 11374a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1138df8bae1dSRodney W. Grimes struct munlock_args { 1139651bb817SAlexander Langer const void *addr; 1140df8bae1dSRodney W. Grimes size_t len; 1141df8bae1dSRodney W. Grimes }; 1142d2d3e875SBruce Evans #endif 1143df8bae1dSRodney W. Grimes int 114469cdfcefSEdward Tomasz Napierala sys_munlock(struct thread *td, struct munlock_args *uap) 1145df8bae1dSRodney W. Grimes { 114669cdfcefSEdward Tomasz Napierala 1147496ab053SKonstantin Belousov return (kern_munlock(td, (uintptr_t)uap->addr, uap->len)); 114869cdfcefSEdward Tomasz Napierala } 114969cdfcefSEdward Tomasz Napierala 115069cdfcefSEdward Tomasz Napierala int 1151496ab053SKonstantin Belousov kern_munlock(struct thread *td, uintptr_t addr0, size_t size) 115269cdfcefSEdward Tomasz Napierala { 1153496ab053SKonstantin Belousov vm_offset_t addr, end, last, start; 1154fc2b1679SJeremie Le Hen #ifdef RACCT 1155c92b5069SJeremie Le Hen vm_map_t map; 1156fc2b1679SJeremie Le Hen #endif 1157df8bae1dSRodney W. Grimes int error; 1158df8bae1dSRodney W. Grimes 1159acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 116047934cefSDon Lewis if (error) 116147934cefSDon Lewis return (error); 1162496ab053SKonstantin Belousov addr = addr0; 1163bb734798SDon Lewis last = addr + size; 116416929939SDon Lewis start = trunc_page(addr); 1165bb734798SDon Lewis end = round_page(last); 1166bb734798SDon Lewis if (last < addr || end < addr) 1167df8bae1dSRodney W. Grimes return (EINVAL); 116816929939SDon Lewis error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 116916929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1170afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11714b5c9cf6SEdward Tomasz Napierala if (racct_enable && error == KERN_SUCCESS) { 11721ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 1173c92b5069SJeremie Le Hen map = &td->td_proc->p_vmspace->vm_map; 1174c92b5069SJeremie Le Hen racct_set(td->td_proc, RACCT_MEMLOCK, 1175c92b5069SJeremie Le Hen ptoa(pmap_wired_count(map->pmap))); 11761ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11771ba5ad42SEdward Tomasz Napierala } 1178afcc55f3SEdward Tomasz Napierala #endif 1179df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1180df8bae1dSRodney W. Grimes } 1181df8bae1dSRodney W. Grimes 1182df8bae1dSRodney W. Grimes /* 1183c8daea13SAlexander Kabaev * vm_mmap_vnode() 1184c8daea13SAlexander Kabaev * 1185c8daea13SAlexander Kabaev * Helper function for vm_mmap. Perform sanity check specific for mmap 1186c8daea13SAlexander Kabaev * operations on vnodes. 1187c8daea13SAlexander Kabaev */ 1188c8daea13SAlexander Kabaev int 1189c8daea13SAlexander Kabaev vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1190c8daea13SAlexander Kabaev vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 119184110e7eSKonstantin Belousov struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 119284110e7eSKonstantin Belousov boolean_t *writecounted) 1193c8daea13SAlexander Kabaev { 1194c8daea13SAlexander Kabaev struct vattr va; 1195c8daea13SAlexander Kabaev vm_object_t obj; 1196bd0e1bebSMark Johnston vm_ooffset_t foff; 11970359a12eSAttilio Rao struct ucred *cred; 11985050aa86SKonstantin Belousov int error, flags, locktype; 1199c8daea13SAlexander Kabaev 12000359a12eSAttilio Rao cred = td->td_ucred; 120184110e7eSKonstantin Belousov if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) 120284110e7eSKonstantin Belousov locktype = LK_EXCLUSIVE; 120384110e7eSKonstantin Belousov else 120484110e7eSKonstantin Belousov locktype = LK_SHARED; 12055050aa86SKonstantin Belousov if ((error = vget(vp, locktype, td)) != 0) 1206c8daea13SAlexander Kabaev return (error); 12070df42647SRobert Watson AUDIT_ARG_VNODE1(vp); 120864345f0bSJohn Baldwin foff = *foffp; 1209c8daea13SAlexander Kabaev flags = *flagsp; 12108516dd18SPoul-Henning Kamp obj = vp->v_object; 1211c8daea13SAlexander Kabaev if (vp->v_type == VREG) { 1212c8daea13SAlexander Kabaev /* 1213c8daea13SAlexander Kabaev * Get the proper underlying object 1214c8daea13SAlexander Kabaev */ 12158516dd18SPoul-Henning Kamp if (obj == NULL) { 1216c8daea13SAlexander Kabaev error = EINVAL; 1217c8daea13SAlexander Kabaev goto done; 1218c8daea13SAlexander Kabaev } 1219e5f299ffSKonstantin Belousov if (obj->type == OBJT_VNODE && obj->handle != vp) { 1220c8daea13SAlexander Kabaev vput(vp); 1221c8daea13SAlexander Kabaev vp = (struct vnode *)obj->handle; 122284110e7eSKonstantin Belousov /* 122384110e7eSKonstantin Belousov * Bypass filesystems obey the mpsafety of the 122453f5f8a0SKonstantin Belousov * underlying fs. Tmpfs never bypasses. 122584110e7eSKonstantin Belousov */ 122684110e7eSKonstantin Belousov error = vget(vp, locktype, td); 12275050aa86SKonstantin Belousov if (error != 0) 122884110e7eSKonstantin Belousov return (error); 122984110e7eSKonstantin Belousov } 123084110e7eSKonstantin Belousov if (locktype == LK_EXCLUSIVE) { 123184110e7eSKonstantin Belousov *writecounted = TRUE; 123284110e7eSKonstantin Belousov vnode_pager_update_writecount(obj, 0, objsize); 123384110e7eSKonstantin Belousov } 1234c8daea13SAlexander Kabaev } else { 1235c8daea13SAlexander Kabaev error = EINVAL; 1236c8daea13SAlexander Kabaev goto done; 1237c8daea13SAlexander Kabaev } 12380359a12eSAttilio Rao if ((error = VOP_GETATTR(vp, &va, cred))) 1239c8daea13SAlexander Kabaev goto done; 1240c92163dcSChristian S.J. Peron #ifdef MAC 12417077c426SJohn Baldwin /* This relies on VM_PROT_* matching PROT_*. */ 12427077c426SJohn Baldwin error = mac_vnode_check_mmap(cred, vp, (int)prot, flags); 1243c92163dcSChristian S.J. Peron if (error != 0) 1244c92163dcSChristian S.J. Peron goto done; 1245c92163dcSChristian S.J. Peron #endif 1246c8daea13SAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 1247c8daea13SAlexander Kabaev if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 12487077c426SJohn Baldwin if (prot & VM_PROT_WRITE) { 1249c8daea13SAlexander Kabaev error = EPERM; 1250c8daea13SAlexander Kabaev goto done; 1251c8daea13SAlexander Kabaev } 1252c8daea13SAlexander Kabaev *maxprotp &= ~VM_PROT_WRITE; 1253c8daea13SAlexander Kabaev } 1254c8daea13SAlexander Kabaev } 1255c8daea13SAlexander Kabaev /* 1256c8daea13SAlexander Kabaev * If it is a regular file without any references 1257c8daea13SAlexander Kabaev * we do not need to sync it. 1258c8daea13SAlexander Kabaev * Adjust object size to be the size of actual file. 1259c8daea13SAlexander Kabaev */ 1260c8daea13SAlexander Kabaev objsize = round_page(va.va_size); 1261c8daea13SAlexander Kabaev if (va.va_nlink == 0) 1262c8daea13SAlexander Kabaev flags |= MAP_NOSYNC; 12633d653db0SAlan Cox if (obj->type == OBJT_VNODE) { 1264e5f299ffSKonstantin Belousov obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, 1265e5f299ffSKonstantin Belousov cred); 1266c8daea13SAlexander Kabaev if (obj == NULL) { 126764345f0bSJohn Baldwin error = ENOMEM; 1268c8daea13SAlexander Kabaev goto done; 1269c8daea13SAlexander Kabaev } 12703d653db0SAlan Cox } else { 12713d653db0SAlan Cox KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP, 12723d653db0SAlan Cox ("wrong object type")); 12733d653db0SAlan Cox VM_OBJECT_WLOCK(obj); 12743d653db0SAlan Cox vm_object_reference_locked(obj); 12753d653db0SAlan Cox #if VM_NRESERVLEVEL > 0 12763d653db0SAlan Cox vm_object_color(obj, 0); 12773d653db0SAlan Cox #endif 12783d653db0SAlan Cox VM_OBJECT_WUNLOCK(obj); 12793d653db0SAlan Cox } 1280c8daea13SAlexander Kabaev *objp = obj; 1281c8daea13SAlexander Kabaev *flagsp = flags; 128264345f0bSJohn Baldwin 12830359a12eSAttilio Rao vfs_mark_atime(vp, cred); 12841e309003SDiomidis Spinellis 1285c8daea13SAlexander Kabaev done: 1286bafa6cfcSKonstantin Belousov if (error != 0 && *writecounted) { 1287bafa6cfcSKonstantin Belousov *writecounted = FALSE; 1288bafa6cfcSKonstantin Belousov vnode_pager_update_writecount(obj, objsize, 0); 1289bafa6cfcSKonstantin Belousov } 1290c8daea13SAlexander Kabaev vput(vp); 1291c8daea13SAlexander Kabaev return (error); 1292c8daea13SAlexander Kabaev } 1293c8daea13SAlexander Kabaev 1294c8daea13SAlexander Kabaev /* 129598df9218SJohn Baldwin * vm_mmap_cdev() 129698df9218SJohn Baldwin * 129798df9218SJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 129898df9218SJohn Baldwin * operations on cdevs. 129998df9218SJohn Baldwin */ 130098df9218SJohn Baldwin int 13017077c426SJohn Baldwin vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot, 13027077c426SJohn Baldwin vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw, 13037077c426SJohn Baldwin vm_ooffset_t *foff, vm_object_t *objp) 130498df9218SJohn Baldwin { 130598df9218SJohn Baldwin vm_object_t obj; 13067077c426SJohn Baldwin int error, flags; 130798df9218SJohn Baldwin 130898df9218SJohn Baldwin flags = *flagsp; 130998df9218SJohn Baldwin 131091a35e78SKonstantin Belousov if (dsw->d_flags & D_MMAP_ANON) { 13117077c426SJohn Baldwin *objp = NULL; 13127077c426SJohn Baldwin *foff = 0; 131398df9218SJohn Baldwin *maxprotp = VM_PROT_ALL; 131498df9218SJohn Baldwin *flagsp |= MAP_ANON; 131598df9218SJohn Baldwin return (0); 131698df9218SJohn Baldwin } 131798df9218SJohn Baldwin /* 131864345f0bSJohn Baldwin * cdevs do not provide private mappings of any kind. 131998df9218SJohn Baldwin */ 132098df9218SJohn Baldwin if ((*maxprotp & VM_PROT_WRITE) == 0 && 13217077c426SJohn Baldwin (prot & VM_PROT_WRITE) != 0) 132298df9218SJohn Baldwin return (EACCES); 13237077c426SJohn Baldwin if (flags & (MAP_PRIVATE|MAP_COPY)) 132498df9218SJohn Baldwin return (EINVAL); 132598df9218SJohn Baldwin /* 132698df9218SJohn Baldwin * Force device mappings to be shared. 132798df9218SJohn Baldwin */ 132898df9218SJohn Baldwin flags |= MAP_SHARED; 132998df9218SJohn Baldwin #ifdef MAC_XXX 13307077c426SJohn Baldwin error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot); 13317077c426SJohn Baldwin if (error != 0) 133298df9218SJohn Baldwin return (error); 133398df9218SJohn Baldwin #endif 133464345f0bSJohn Baldwin /* 133564345f0bSJohn Baldwin * First, try d_mmap_single(). If that is not implemented 133664345f0bSJohn Baldwin * (returns ENODEV), fall back to using the device pager. 133764345f0bSJohn Baldwin * Note that d_mmap_single() must return a reference to the 133864345f0bSJohn Baldwin * object (it needs to bump the reference count of the object 133964345f0bSJohn Baldwin * it returns somehow). 134064345f0bSJohn Baldwin * 134164345f0bSJohn Baldwin * XXX assumes VM_PROT_* == PROT_* 134264345f0bSJohn Baldwin */ 134364345f0bSJohn Baldwin error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 134464345f0bSJohn Baldwin if (error != ENODEV) 134564345f0bSJohn Baldwin return (error); 13463364c323SKonstantin Belousov obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 13473364c323SKonstantin Belousov td->td_ucred); 134898df9218SJohn Baldwin if (obj == NULL) 134998df9218SJohn Baldwin return (EINVAL); 135098df9218SJohn Baldwin *objp = obj; 135198df9218SJohn Baldwin *flagsp = flags; 135298df9218SJohn Baldwin return (0); 135398df9218SJohn Baldwin } 135498df9218SJohn Baldwin 135598df9218SJohn Baldwin /* 1356d2c60af8SMatthew Dillon * vm_mmap() 1357d2c60af8SMatthew Dillon * 13587077c426SJohn Baldwin * Internal version of mmap used by exec, sys5 shared memory, and 13597077c426SJohn Baldwin * various device drivers. Handle is either a vnode pointer, a 13607077c426SJohn Baldwin * character device, or NULL for MAP_ANON. 1361df8bae1dSRodney W. Grimes */ 1362df8bae1dSRodney W. Grimes int 1363b9dcd593SBruce Evans vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1364b9dcd593SBruce Evans vm_prot_t maxprot, int flags, 136598df9218SJohn Baldwin objtype_t handle_type, void *handle, 1366b9dcd593SBruce Evans vm_ooffset_t foff) 1367df8bae1dSRodney W. Grimes { 13687077c426SJohn Baldwin vm_object_t object; 1369b40ce416SJulian Elischer struct thread *td = curthread; 13707077c426SJohn Baldwin int error; 137184110e7eSKonstantin Belousov boolean_t writecounted; 1372df8bae1dSRodney W. Grimes 1373df8bae1dSRodney W. Grimes if (size == 0) 13747077c426SJohn Baldwin return (EINVAL); 1375df8bae1dSRodney W. Grimes 1376749474f2SPeter Wemm size = round_page(size); 1377010ba384SMark Johnston object = NULL; 13787077c426SJohn Baldwin writecounted = FALSE; 13797077c426SJohn Baldwin 13807077c426SJohn Baldwin /* 13817077c426SJohn Baldwin * Lookup/allocate object. 13827077c426SJohn Baldwin */ 13837077c426SJohn Baldwin switch (handle_type) { 13847077c426SJohn Baldwin case OBJT_DEVICE: { 13857077c426SJohn Baldwin struct cdevsw *dsw; 13867077c426SJohn Baldwin struct cdev *cdev; 13877077c426SJohn Baldwin int ref; 13887077c426SJohn Baldwin 13897077c426SJohn Baldwin cdev = handle; 13907077c426SJohn Baldwin dsw = dev_refthread(cdev, &ref); 13917077c426SJohn Baldwin if (dsw == NULL) 13927077c426SJohn Baldwin return (ENXIO); 13937077c426SJohn Baldwin error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev, 13947077c426SJohn Baldwin dsw, &foff, &object); 13957077c426SJohn Baldwin dev_relthread(cdev, ref); 13967077c426SJohn Baldwin break; 13977077c426SJohn Baldwin } 13987077c426SJohn Baldwin case OBJT_VNODE: 13997077c426SJohn Baldwin error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 14007077c426SJohn Baldwin handle, &foff, &object, &writecounted); 14017077c426SJohn Baldwin break; 14027077c426SJohn Baldwin case OBJT_DEFAULT: 14037077c426SJohn Baldwin if (handle == NULL) { 14047077c426SJohn Baldwin error = 0; 14057077c426SJohn Baldwin break; 14067077c426SJohn Baldwin } 14077077c426SJohn Baldwin /* FALLTHROUGH */ 14087077c426SJohn Baldwin default: 14097077c426SJohn Baldwin error = EINVAL; 14107077c426SJohn Baldwin break; 14117077c426SJohn Baldwin } 14127077c426SJohn Baldwin if (error) 14137077c426SJohn Baldwin return (error); 14147077c426SJohn Baldwin 14157077c426SJohn Baldwin error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 14167077c426SJohn Baldwin foff, writecounted, td); 14177077c426SJohn Baldwin if (error != 0 && object != NULL) { 14187077c426SJohn Baldwin /* 14197077c426SJohn Baldwin * If this mapping was accounted for in the vnode's 14207077c426SJohn Baldwin * writecount, then undo that now. 14217077c426SJohn Baldwin */ 14227077c426SJohn Baldwin if (writecounted) 14237077c426SJohn Baldwin vnode_pager_release_writecount(object, 0, size); 14247077c426SJohn Baldwin vm_object_deallocate(object); 14257077c426SJohn Baldwin } 14267077c426SJohn Baldwin return (error); 14277077c426SJohn Baldwin } 14287077c426SJohn Baldwin 14297077c426SJohn Baldwin /* 14307077c426SJohn Baldwin * Internal version of mmap that maps a specific VM object into an 14317077c426SJohn Baldwin * map. Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap. 14327077c426SJohn Baldwin */ 14337077c426SJohn Baldwin int 14347077c426SJohn Baldwin vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 14357077c426SJohn Baldwin vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff, 14367077c426SJohn Baldwin boolean_t writecounted, struct thread *td) 14377077c426SJohn Baldwin { 14386a97a3f7SKonstantin Belousov boolean_t curmap, fitit; 14396a97a3f7SKonstantin Belousov vm_offset_t max_addr; 14407077c426SJohn Baldwin int docow, error, findspace, rv; 1441df8bae1dSRodney W. Grimes 14426a97a3f7SKonstantin Belousov curmap = map == &td->td_proc->p_vmspace->vm_map; 14436a97a3f7SKonstantin Belousov if (curmap) { 144491d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 1445f6f6d240SMateusz Guzik if (map->size + size > lim_cur_proc(td->td_proc, RLIMIT_VMEM)) { 144691d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1447070f64feSMatthew Dillon return (ENOMEM); 1448070f64feSMatthew Dillon } 1449a6492969SAlan Cox if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 14501ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 14511ba5ad42SEdward Tomasz Napierala return (ENOMEM); 14521ba5ad42SEdward Tomasz Napierala } 14537e19eda4SAndrey Zonov if (!old_mlock && map->flags & MAP_WIREFUTURE) { 14543ac7d297SAndrey Zonov if (ptoa(pmap_wired_count(map->pmap)) + size > 1455f6f6d240SMateusz Guzik lim_cur_proc(td->td_proc, RLIMIT_MEMLOCK)) { 14567e19eda4SAndrey Zonov racct_set_force(td->td_proc, RACCT_VMEM, 14577e19eda4SAndrey Zonov map->size); 14587e19eda4SAndrey Zonov PROC_UNLOCK(td->td_proc); 14597e19eda4SAndrey Zonov return (ENOMEM); 14607e19eda4SAndrey Zonov } 14617e19eda4SAndrey Zonov error = racct_set(td->td_proc, RACCT_MEMLOCK, 14623ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap)) + size); 14637e19eda4SAndrey Zonov if (error != 0) { 14647e19eda4SAndrey Zonov racct_set_force(td->td_proc, RACCT_VMEM, 14657e19eda4SAndrey Zonov map->size); 14667e19eda4SAndrey Zonov PROC_UNLOCK(td->td_proc); 14677e19eda4SAndrey Zonov return (error); 14687e19eda4SAndrey Zonov } 14697e19eda4SAndrey Zonov } 147091d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 1471a6492969SAlan Cox } 1472070f64feSMatthew Dillon 1473df8bae1dSRodney W. Grimes /* 1474bc9ad247SDavid Greenman * We currently can only deal with page aligned file offsets. 14757077c426SJohn Baldwin * The mmap() system call already enforces this by subtracting 14767077c426SJohn Baldwin * the page offset from the file offset, but checking here 14777077c426SJohn Baldwin * catches errors in device drivers (e.g. d_single_mmap() 14787077c426SJohn Baldwin * callbacks) and other internal mapping requests (such as in 14797077c426SJohn Baldwin * exec). 1480bc9ad247SDavid Greenman */ 1481bc9ad247SDavid Greenman if (foff & PAGE_MASK) 1482bc9ad247SDavid Greenman return (EINVAL); 1483bc9ad247SDavid Greenman 148406cb7259SDavid Greenman if ((flags & MAP_FIXED) == 0) { 148506cb7259SDavid Greenman fitit = TRUE; 148606cb7259SDavid Greenman *addr = round_page(*addr); 148706cb7259SDavid Greenman } else { 148806cb7259SDavid Greenman if (*addr != trunc_page(*addr)) 148906cb7259SDavid Greenman return (EINVAL); 149006cb7259SDavid Greenman fitit = FALSE; 149106cb7259SDavid Greenman } 149284110e7eSKonstantin Belousov 14935f55e841SDavid Greenman if (flags & MAP_ANON) { 14947077c426SJohn Baldwin if (object != NULL || foff != 0) 14957077c426SJohn Baldwin return (EINVAL); 1496c8daea13SAlexander Kabaev docow = 0; 149774ffb9afSAlan Cox } else if (flags & MAP_PREFAULT_READ) 149874ffb9afSAlan Cox docow = MAP_PREFAULT; 149974ffb9afSAlan Cox else 15004738fa09SAlan Cox docow = MAP_PREFAULT_PARTIAL; 1501df8bae1dSRodney W. Grimes 15024f79d873SMatthew Dillon if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 15034738fa09SAlan Cox docow |= MAP_COPY_ON_WRITE; 15044f79d873SMatthew Dillon if (flags & MAP_NOSYNC) 15054f79d873SMatthew Dillon docow |= MAP_DISABLE_SYNCER; 15069730a5daSPaul Saab if (flags & MAP_NOCORE) 15079730a5daSPaul Saab docow |= MAP_DISABLE_COREDUMP; 15088211bd45SKonstantin Belousov /* Shared memory is also shared with children. */ 15098211bd45SKonstantin Belousov if (flags & MAP_SHARED) 15108211bd45SKonstantin Belousov docow |= MAP_INHERIT_SHARE; 151184110e7eSKonstantin Belousov if (writecounted) 151284110e7eSKonstantin Belousov docow |= MAP_VN_WRITECOUNT; 15134648ba0aSKonstantin Belousov if (flags & MAP_STACK) { 15144648ba0aSKonstantin Belousov if (object != NULL) 15154648ba0aSKonstantin Belousov return (EINVAL); 15164648ba0aSKonstantin Belousov docow |= MAP_STACK_GROWS_DOWN; 15174648ba0aSKonstantin Belousov } 151811c42bccSKonstantin Belousov if ((flags & MAP_EXCL) != 0) 151911c42bccSKonstantin Belousov docow |= MAP_CHECK_EXCL; 152019bd0d9cSKonstantin Belousov if ((flags & MAP_GUARD) != 0) 152119bd0d9cSKonstantin Belousov docow |= MAP_CREATE_GUARD; 15225850152dSJohn Dyson 15234648ba0aSKonstantin Belousov if (fitit) { 15245aa60b6fSJohn Baldwin if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER) 15255aa60b6fSJohn Baldwin findspace = VMFS_SUPER_SPACE; 15265aa60b6fSJohn Baldwin else if ((flags & MAP_ALIGNMENT_MASK) != 0) 15275aa60b6fSJohn Baldwin findspace = VMFS_ALIGNED_SPACE(flags >> 15285aa60b6fSJohn Baldwin MAP_ALIGNMENT_SHIFT); 15292267af78SJulian Elischer else 15305aa60b6fSJohn Baldwin findspace = VMFS_OPTIMAL_SPACE; 15316a97a3f7SKonstantin Belousov max_addr = 0; 1532edb572a3SJohn Baldwin #ifdef MAP_32BIT 15336a97a3f7SKonstantin Belousov if ((flags & MAP_32BIT) != 0) 15346a97a3f7SKonstantin Belousov max_addr = MAP_32BIT_MAX_ADDR; 1535edb572a3SJohn Baldwin #endif 15366a97a3f7SKonstantin Belousov if (curmap) { 15376a97a3f7SKonstantin Belousov rv = vm_map_find_min(map, object, foff, addr, size, 15386a97a3f7SKonstantin Belousov round_page((vm_offset_t)td->td_proc->p_vmspace-> 15396a97a3f7SKonstantin Belousov vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr, 15406a97a3f7SKonstantin Belousov findspace, prot, maxprot, docow); 15416a97a3f7SKonstantin Belousov } else { 15426a97a3f7SKonstantin Belousov rv = vm_map_find(map, object, foff, addr, size, 15436a97a3f7SKonstantin Belousov max_addr, findspace, prot, maxprot, docow); 15446a97a3f7SKonstantin Belousov } 15454648ba0aSKonstantin Belousov } else { 1546b8ca4ef2SAlan Cox rv = vm_map_fixed(map, object, foff, *addr, size, 1547bd7e5f99SJohn Dyson prot, maxprot, docow); 15484648ba0aSKonstantin Belousov } 1549bd7e5f99SJohn Dyson 1550f9230ad6SAlan Cox if (rv == KERN_SUCCESS) { 15517fb0c17eSDavid Greenman /* 1552f9230ad6SAlan Cox * If the process has requested that all future mappings 1553f9230ad6SAlan Cox * be wired, then heed this. 1554f9230ad6SAlan Cox */ 15551472f4f4SKonstantin Belousov if (map->flags & MAP_WIREFUTURE) { 1556f9230ad6SAlan Cox vm_map_wire(map, *addr, *addr + size, 15571472f4f4SKonstantin Belousov VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? 15581472f4f4SKonstantin Belousov VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); 15591472f4f4SKonstantin Belousov } 1560df8bae1dSRodney W. Grimes } 15612e32165cSKonstantin Belousov return (vm_mmap_to_errno(rv)); 15622e32165cSKonstantin Belousov } 15632e32165cSKonstantin Belousov 1564f9230ad6SAlan Cox /* 1565f9230ad6SAlan Cox * Translate a Mach VM return code to zero on success or the appropriate errno 1566f9230ad6SAlan Cox * on failure. 1567f9230ad6SAlan Cox */ 15682e32165cSKonstantin Belousov int 15692e32165cSKonstantin Belousov vm_mmap_to_errno(int rv) 15702e32165cSKonstantin Belousov { 15712e32165cSKonstantin Belousov 1572df8bae1dSRodney W. Grimes switch (rv) { 1573df8bae1dSRodney W. Grimes case KERN_SUCCESS: 1574df8bae1dSRodney W. Grimes return (0); 1575df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 1576df8bae1dSRodney W. Grimes case KERN_NO_SPACE: 1577df8bae1dSRodney W. Grimes return (ENOMEM); 1578df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 1579df8bae1dSRodney W. Grimes return (EACCES); 1580df8bae1dSRodney W. Grimes default: 1581df8bae1dSRodney W. Grimes return (EINVAL); 1582df8bae1dSRodney W. Grimes } 1583df8bae1dSRodney W. Grimes } 1584