160727d8bSWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1988 University of Utah. 5df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 6df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 7df8bae1dSRodney W. Grimes * 8df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 9df8bae1dSRodney W. Grimes * the Systems Programming Group of the University of Utah Computer 10df8bae1dSRodney W. Grimes * Science Department. 11df8bae1dSRodney W. Grimes * 12df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 13df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 14df8bae1dSRodney W. Grimes * are met: 15df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 17df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 18df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 19df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 20fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 21df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 22df8bae1dSRodney W. Grimes * without specific prior written permission. 23df8bae1dSRodney W. Grimes * 24df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34df8bae1dSRodney W. Grimes * SUCH DAMAGE. 35df8bae1dSRodney W. Grimes * 36df8bae1dSRodney W. Grimes * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 37df8bae1dSRodney W. Grimes * 38df8bae1dSRodney W. Grimes * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 39df8bae1dSRodney W. Grimes */ 40df8bae1dSRodney W. Grimes 41df8bae1dSRodney W. Grimes /* 42df8bae1dSRodney W. Grimes * Mapped file (mmap) interface to VM 43df8bae1dSRodney W. Grimes */ 44df8bae1dSRodney W. Grimes 45874651b1SDavid E. O'Brien #include <sys/cdefs.h> 46874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 47874651b1SDavid E. O'Brien 4849874f6eSJoseph Koshy #include "opt_hwpmc_hooks.h" 493d653db0SAlan Cox #include "opt_vm.h" 50e9822d92SJoerg Wunsch 51df8bae1dSRodney W. Grimes #include <sys/param.h> 52df8bae1dSRodney W. Grimes #include <sys/systm.h> 534a144410SRobert Watson #include <sys/capsicum.h> 54a9d2f8d8SRobert Watson #include <sys/kernel.h> 55fb919e4dSMark Murray #include <sys/lock.h> 5623955314SAlfred Perlstein #include <sys/mutex.h> 57d2d3e875SBruce Evans #include <sys/sysproto.h> 58df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 59acd3428bSRobert Watson #include <sys/priv.h> 60df8bae1dSRodney W. Grimes #include <sys/proc.h> 6155648840SJohn Baldwin #include <sys/procctl.h> 621ba5ad42SEdward Tomasz Napierala #include <sys/racct.h> 63070f64feSMatthew Dillon #include <sys/resource.h> 64070f64feSMatthew Dillon #include <sys/resourcevar.h> 6589f6b863SAttilio Rao #include <sys/rwlock.h> 667e19eda4SAndrey Zonov #include <sys/sysctl.h> 67df8bae1dSRodney W. Grimes #include <sys/vnode.h> 683ac4d1efSBruce Evans #include <sys/fcntl.h> 69df8bae1dSRodney W. Grimes #include <sys/file.h> 70df8bae1dSRodney W. Grimes #include <sys/mman.h> 71b483c7f6SGuido van Rooij #include <sys/mount.h> 72df8bae1dSRodney W. Grimes #include <sys/conf.h> 734183b6b6SPeter Wemm #include <sys/stat.h> 7455648840SJohn Baldwin #include <sys/syscallsubr.h> 75497a8238SKonstantin Belousov #include <sys/sysent.h> 76efeaf95aSDavid Greenman #include <sys/vmmeter.h> 77a7f67facSKonstantin Belousov #if defined(__amd64__) || defined(__i386__) /* for i386_read_exec */ 78a7f67facSKonstantin Belousov #include <machine/md_var.h> 79a7f67facSKonstantin Belousov #endif 80df8bae1dSRodney W. Grimes 8151d1f690SRobert Watson #include <security/audit/audit.h> 82aed55708SRobert Watson #include <security/mac/mac_framework.h> 83aed55708SRobert Watson 84df8bae1dSRodney W. Grimes #include <vm/vm.h> 85efeaf95aSDavid Greenman #include <vm/vm_param.h> 86efeaf95aSDavid Greenman #include <vm/pmap.h> 87efeaf95aSDavid Greenman #include <vm/vm_map.h> 88efeaf95aSDavid Greenman #include <vm/vm_object.h> 891c7c3c6aSMatthew Dillon #include <vm/vm_page.h> 90df8bae1dSRodney W. Grimes #include <vm/vm_pager.h> 91b5e8ce9fSBruce Evans #include <vm/vm_pageout.h> 92efeaf95aSDavid Greenman #include <vm/vm_extern.h> 93867a482dSJohn Dyson #include <vm/vm_page.h> 9484110e7eSKonstantin Belousov #include <vm/vnode_pager.h> 95df8bae1dSRodney W. Grimes 9649874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 9749874f6eSJoseph Koshy #include <sys/pmckern.h> 9849874f6eSJoseph Koshy #endif 9949874f6eSJoseph Koshy 1007e19eda4SAndrey Zonov int old_mlock = 0; 101af3b2549SHans Petter Selasky SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0, 1027e19eda4SAndrey Zonov "Do not apply RLIMIT_MEMLOCK on mlockall"); 1033fbc2e00SKonstantin Belousov static int mincore_mapped = 1; 1043fbc2e00SKonstantin Belousov SYSCTL_INT(_vm, OID_AUTO, mincore_mapped, CTLFLAG_RWTUN, &mincore_mapped, 0, 1053fbc2e00SKonstantin Belousov "mincore reports mappings, not residency"); 1067e19eda4SAndrey Zonov 107edb572a3SJohn Baldwin #ifdef MAP_32BIT 108edb572a3SJohn Baldwin #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) 109d2d3e875SBruce Evans #endif 1100d94caffSDavid Greenman 111edb572a3SJohn Baldwin #ifndef _SYS_SYSPROTO_H_ 112edb572a3SJohn Baldwin struct sbrk_args { 113edb572a3SJohn Baldwin int incr; 114edb572a3SJohn Baldwin }; 115edb572a3SJohn Baldwin #endif 116edb572a3SJohn Baldwin 117df8bae1dSRodney W. Grimes int 11804e89ffbSKonstantin Belousov sys_sbrk(struct thread *td, struct sbrk_args *uap) 119df8bae1dSRodney W. Grimes { 120df8bae1dSRodney W. Grimes /* Not yet implemented */ 121df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 122df8bae1dSRodney W. Grimes } 123df8bae1dSRodney W. Grimes 124d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 125df8bae1dSRodney W. Grimes struct sstk_args { 126df8bae1dSRodney W. Grimes int incr; 127df8bae1dSRodney W. Grimes }; 128d2d3e875SBruce Evans #endif 1290d94caffSDavid Greenman 130df8bae1dSRodney W. Grimes int 13104e89ffbSKonstantin Belousov sys_sstk(struct thread *td, struct sstk_args *uap) 132df8bae1dSRodney W. Grimes { 133df8bae1dSRodney W. Grimes /* Not yet implemented */ 134df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 135df8bae1dSRodney W. Grimes } 136df8bae1dSRodney W. Grimes 1371930e303SPoul-Henning Kamp #if defined(COMPAT_43) 138df8bae1dSRodney W. Grimes int 139d48719bdSBrooks Davis ogetpagesize(struct thread *td, struct ogetpagesize_args *uap) 140df8bae1dSRodney W. Grimes { 14104e89ffbSKonstantin Belousov 142b40ce416SJulian Elischer td->td_retval[0] = PAGE_SIZE; 143df8bae1dSRodney W. Grimes return (0); 144df8bae1dSRodney W. Grimes } 1451930e303SPoul-Henning Kamp #endif /* COMPAT_43 */ 146df8bae1dSRodney W. Grimes 14754f42e4bSPeter Wemm 14854f42e4bSPeter Wemm /* 14954f42e4bSPeter Wemm * Memory Map (mmap) system call. Note that the file offset 15054f42e4bSPeter Wemm * and address are allowed to be NOT page aligned, though if 15154f42e4bSPeter Wemm * the MAP_FIXED flag it set, both must have the same remainder 15254f42e4bSPeter Wemm * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 15354f42e4bSPeter Wemm * page-aligned, the actual mapping starts at trunc_page(addr) 15454f42e4bSPeter Wemm * and the return value is adjusted up by the page offset. 155b4309055SMatthew Dillon * 156b4309055SMatthew Dillon * Generally speaking, only character devices which are themselves 157b4309055SMatthew Dillon * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 158b4309055SMatthew Dillon * there would be no cache coherency between a descriptor and a VM mapping 159b4309055SMatthew Dillon * both to the same character device. 16054f42e4bSPeter Wemm */ 161d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 162df8bae1dSRodney W. Grimes struct mmap_args { 163651bb817SAlexander Langer void *addr; 164df8bae1dSRodney W. Grimes size_t len; 165df8bae1dSRodney W. Grimes int prot; 166df8bae1dSRodney W. Grimes int flags; 167df8bae1dSRodney W. Grimes int fd; 168df8bae1dSRodney W. Grimes long pad; 169df8bae1dSRodney W. Grimes off_t pos; 170df8bae1dSRodney W. Grimes }; 171d2d3e875SBruce Evans #endif 172df8bae1dSRodney W. Grimes 173df8bae1dSRodney W. Grimes int 17469cdfcefSEdward Tomasz Napierala sys_mmap(struct thread *td, struct mmap_args *uap) 17569cdfcefSEdward Tomasz Napierala { 17669cdfcefSEdward Tomasz Napierala 177496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot, 178496ab053SKonstantin Belousov uap->flags, uap->fd, uap->pos)); 17969cdfcefSEdward Tomasz Napierala } 18069cdfcefSEdward Tomasz Napierala 18169cdfcefSEdward Tomasz Napierala int 182496ab053SKonstantin Belousov kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags, 183496ab053SKonstantin Belousov int fd, off_t pos) 184df8bae1dSRodney W. Grimes { 185496ab053SKonstantin Belousov struct vmspace *vms; 186c8daea13SAlexander Kabaev struct file *fp; 187496ab053SKonstantin Belousov vm_offset_t addr; 18869cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 1897077c426SJohn Baldwin vm_prot_t cap_maxprot; 19069cdfcefSEdward Tomasz Napierala int align, error; 191a9d2f8d8SRobert Watson cap_rights_t rights; 192df8bae1dSRodney W. Grimes 193496ab053SKonstantin Belousov vms = td->td_proc->p_vmspace; 194426da3bcSAlfred Perlstein fp = NULL; 19569cdfcefSEdward Tomasz Napierala AUDIT_ARG_FD(fd); 196496ab053SKonstantin Belousov addr = addr0; 19727bfa958SSimon L. B. Nielsen 1987707ccabSKonstantin Belousov /* 1995817298fSJohn Baldwin * Ignore old flags that used to be defined but did not do anything. 2005817298fSJohn Baldwin */ 2015817298fSJohn Baldwin flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040); 2025817298fSJohn Baldwin 2035817298fSJohn Baldwin /* 2047707ccabSKonstantin Belousov * Enforce the constraints. 2057707ccabSKonstantin Belousov * Mapping of length 0 is only allowed for old binaries. 2067707ccabSKonstantin Belousov * Anonymous mapping shall specify -1 as filedescriptor and 2077707ccabSKonstantin Belousov * zero position for new code. Be nice to ancient a.out 2087707ccabSKonstantin Belousov * binaries and correct pos for anonymous mapping, since old 2097707ccabSKonstantin Belousov * ld.so sometimes issues anonymous map requests with non-zero 2107707ccabSKonstantin Belousov * pos. 2117707ccabSKonstantin Belousov */ 2127707ccabSKonstantin Belousov if (!SV_CURPROC_FLAG(SV_AOUT)) { 21369cdfcefSEdward Tomasz Napierala if ((size == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) || 21469cdfcefSEdward Tomasz Napierala ((flags & MAP_ANON) != 0 && (fd != -1 || pos != 0))) 215df8bae1dSRodney W. Grimes return (EINVAL); 2167707ccabSKonstantin Belousov } else { 2177707ccabSKonstantin Belousov if ((flags & MAP_ANON) != 0) 2187707ccabSKonstantin Belousov pos = 0; 2197707ccabSKonstantin Belousov } 2209154ee6aSPeter Wemm 2212267af78SJulian Elischer if (flags & MAP_STACK) { 22269cdfcefSEdward Tomasz Napierala if ((fd != -1) || 2232267af78SJulian Elischer ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 2242267af78SJulian Elischer return (EINVAL); 2252267af78SJulian Elischer flags |= MAP_ANON; 2262267af78SJulian Elischer pos = 0; 2272907af2aSJulian Elischer } 2285817298fSJohn Baldwin if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE | 2295817298fSJohn Baldwin MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE | 23019bd0d9cSKonstantin Belousov MAP_PREFAULT_READ | MAP_GUARD | 2315fd3f8b3SJohn Baldwin #ifdef MAP_32BIT 2325fd3f8b3SJohn Baldwin MAP_32BIT | 2335fd3f8b3SJohn Baldwin #endif 2345fd3f8b3SJohn Baldwin MAP_ALIGNMENT_MASK)) != 0) 2355fd3f8b3SJohn Baldwin return (EINVAL); 23611c42bccSKonstantin Belousov if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL) 23711c42bccSKonstantin Belousov return (EINVAL); 23810204535SKonstantin Belousov if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED | MAP_PRIVATE)) 2395fd3f8b3SJohn Baldwin return (EINVAL); 2405fd3f8b3SJohn Baldwin if (prot != PROT_NONE && 2415fd3f8b3SJohn Baldwin (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) 2425fd3f8b3SJohn Baldwin return (EINVAL); 24319bd0d9cSKonstantin Belousov if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 || 24460221a57SAlan Cox pos != 0 || (flags & ~(MAP_FIXED | MAP_GUARD | MAP_EXCL | 245633d3b1cSKonstantin Belousov #ifdef MAP_32BIT 246633d3b1cSKonstantin Belousov MAP_32BIT | 247633d3b1cSKonstantin Belousov #endif 248633d3b1cSKonstantin Belousov MAP_ALIGNMENT_MASK)) != 0)) 24919bd0d9cSKonstantin Belousov return (EINVAL); 2502907af2aSJulian Elischer 2519154ee6aSPeter Wemm /* 25254f42e4bSPeter Wemm * Align the file position to a page boundary, 25354f42e4bSPeter Wemm * and save its page offset component. 2549154ee6aSPeter Wemm */ 25554f42e4bSPeter Wemm pageoff = (pos & PAGE_MASK); 25654f42e4bSPeter Wemm pos -= pageoff; 25754f42e4bSPeter Wemm 25854f42e4bSPeter Wemm /* Adjust size for rounding (on both ends). */ 25954f42e4bSPeter Wemm size += pageoff; /* low end... */ 26054f42e4bSPeter Wemm size = (vm_size_t) round_page(size); /* hi end */ 2619154ee6aSPeter Wemm 2625aa60b6fSJohn Baldwin /* Ensure alignment is at least a page and fits in a pointer. */ 2635aa60b6fSJohn Baldwin align = flags & MAP_ALIGNMENT_MASK; 2645aa60b6fSJohn Baldwin if (align != 0 && align != MAP_ALIGNED_SUPER && 2655aa60b6fSJohn Baldwin (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY || 2665aa60b6fSJohn Baldwin align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT)) 2675aa60b6fSJohn Baldwin return (EINVAL); 2685aa60b6fSJohn Baldwin 269df8bae1dSRodney W. Grimes /* 2700d94caffSDavid Greenman * Check for illegal addresses. Watch out for address wrap... Note 2710d94caffSDavid Greenman * that VM_*_ADDRESS are not constants due to casts (argh). 272df8bae1dSRodney W. Grimes */ 273df8bae1dSRodney W. Grimes if (flags & MAP_FIXED) { 27454f42e4bSPeter Wemm /* 27554f42e4bSPeter Wemm * The specified address must have the same remainder 27654f42e4bSPeter Wemm * as the file offset taken modulo PAGE_SIZE, so it 27754f42e4bSPeter Wemm * should be aligned after adjustment by pageoff. 27854f42e4bSPeter Wemm */ 27954f42e4bSPeter Wemm addr -= pageoff; 28054f42e4bSPeter Wemm if (addr & PAGE_MASK) 28154f42e4bSPeter Wemm return (EINVAL); 28227bfa958SSimon L. B. Nielsen 28354f42e4bSPeter Wemm /* Address range must be all in user VM space. */ 28405ba50f5SJake Burkholder if (addr < vm_map_min(&vms->vm_map) || 28505ba50f5SJake Burkholder addr + size > vm_map_max(&vms->vm_map)) 286df8bae1dSRodney W. Grimes return (EINVAL); 287bbc0ec52SDavid Greenman if (addr + size < addr) 288df8bae1dSRodney W. Grimes return (EINVAL); 289edb572a3SJohn Baldwin #ifdef MAP_32BIT 290edb572a3SJohn Baldwin if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR) 291edb572a3SJohn Baldwin return (EINVAL); 292edb572a3SJohn Baldwin } else if (flags & MAP_32BIT) { 293edb572a3SJohn Baldwin /* 294edb572a3SJohn Baldwin * For MAP_32BIT, override the hint if it is too high and 295edb572a3SJohn Baldwin * do not bother moving the mapping past the heap (since 296edb572a3SJohn Baldwin * the heap is usually above 2GB). 297edb572a3SJohn Baldwin */ 298edb572a3SJohn Baldwin if (addr + size > MAP_32BIT_MAX_ADDR) 299edb572a3SJohn Baldwin addr = 0; 300edb572a3SJohn Baldwin #endif 30191d5354aSJohn Baldwin } else { 302df8bae1dSRodney W. Grimes /* 30354f42e4bSPeter Wemm * XXX for non-fixed mappings where no hint is provided or 30454f42e4bSPeter Wemm * the hint would fall in the potential heap space, 30554f42e4bSPeter Wemm * place it after the end of the largest possible heap. 306df8bae1dSRodney W. Grimes * 30754f42e4bSPeter Wemm * There should really be a pmap call to determine a reasonable 30854f42e4bSPeter Wemm * location. 309df8bae1dSRodney W. Grimes */ 31091d5354aSJohn Baldwin if (addr == 0 || 3111f6889a1SMatthew Dillon (addr >= round_page((vm_offset_t)vms->vm_taddr) && 312c460ac3aSPeter Wemm addr < round_page((vm_offset_t)vms->vm_daddr + 313cd336badSMateusz Guzik lim_max(td, RLIMIT_DATA)))) 314c460ac3aSPeter Wemm addr = round_page((vm_offset_t)vms->vm_daddr + 315cd336badSMateusz Guzik lim_max(td, RLIMIT_DATA)); 31691d5354aSJohn Baldwin } 3177077c426SJohn Baldwin if (size == 0) { 3187077c426SJohn Baldwin /* 3197077c426SJohn Baldwin * Return success without mapping anything for old 3207077c426SJohn Baldwin * binaries that request a page-aligned mapping of 3217077c426SJohn Baldwin * length 0. For modern binaries, this function 3227077c426SJohn Baldwin * returns an error earlier. 3237077c426SJohn Baldwin */ 3247077c426SJohn Baldwin error = 0; 32519bd0d9cSKonstantin Belousov } else if ((flags & MAP_GUARD) != 0) { 32619bd0d9cSKonstantin Belousov error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE, 32719bd0d9cSKonstantin Belousov VM_PROT_NONE, flags, NULL, pos, FALSE, td); 32819bd0d9cSKonstantin Belousov } else if ((flags & MAP_ANON) != 0) { 329df8bae1dSRodney W. Grimes /* 330df8bae1dSRodney W. Grimes * Mapping blank space is trivial. 3317077c426SJohn Baldwin * 3327077c426SJohn Baldwin * This relies on VM_PROT_* matching PROT_*. 333df8bae1dSRodney W. Grimes */ 3347077c426SJohn Baldwin error = vm_mmap_object(&vms->vm_map, &addr, size, prot, 3357077c426SJohn Baldwin VM_PROT_ALL, flags, NULL, pos, FALSE, td); 33630d4dd7eSAlexander Kabaev } else { 337df8bae1dSRodney W. Grimes /* 338a9d2f8d8SRobert Watson * Mapping file, get fp for validation and don't let the 339a9d2f8d8SRobert Watson * descriptor disappear on us if we block. Check capability 340a9d2f8d8SRobert Watson * rights, but also return the maximum rights to be combined 341a9d2f8d8SRobert Watson * with maxprot later. 342df8bae1dSRodney W. Grimes */ 3437008be5bSPawel Jakub Dawidek cap_rights_init(&rights, CAP_MMAP); 344a9d2f8d8SRobert Watson if (prot & PROT_READ) 3457008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_R); 346a9d2f8d8SRobert Watson if ((flags & MAP_SHARED) != 0) { 347a9d2f8d8SRobert Watson if (prot & PROT_WRITE) 3487008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_W); 349a9d2f8d8SRobert Watson } 350a9d2f8d8SRobert Watson if (prot & PROT_EXEC) 3517008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_X); 35269cdfcefSEdward Tomasz Napierala error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp); 3537008be5bSPawel Jakub Dawidek if (error != 0) 354426da3bcSAlfred Perlstein goto done; 35510204535SKonstantin Belousov if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 && 35610204535SKonstantin Belousov td->td_proc->p_osrel >= P_OSREL_MAP_FSTRICT) { 35710204535SKonstantin Belousov error = EINVAL; 35810204535SKonstantin Belousov goto done; 35910204535SKonstantin Belousov } 3605fd3f8b3SJohn Baldwin 3615fd3f8b3SJohn Baldwin /* This relies on VM_PROT_* matching PROT_*. */ 3627077c426SJohn Baldwin error = fo_mmap(fp, &vms->vm_map, &addr, size, prot, 3637077c426SJohn Baldwin cap_maxprot, flags, pos, td); 36449874f6eSJoseph Koshy } 3657077c426SJohn Baldwin 366df8bae1dSRodney W. Grimes if (error == 0) 367b40ce416SJulian Elischer td->td_retval[0] = (register_t) (addr + pageoff); 368279d7226SMatthew Dillon done: 369279d7226SMatthew Dillon if (fp) 370b40ce416SJulian Elischer fdrop(fp, td); 371f6b5b182SJeff Roberson 372df8bae1dSRodney W. Grimes return (error); 373df8bae1dSRodney W. Grimes } 374df8bae1dSRodney W. Grimes 3750538aafcSKonstantin Belousov #if defined(COMPAT_FREEBSD6) 376c2815ad5SPeter Wemm int 377c2815ad5SPeter Wemm freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 378c2815ad5SPeter Wemm { 379c2815ad5SPeter Wemm 380496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot, 381496ab053SKonstantin Belousov uap->flags, uap->fd, uap->pos)); 382c2815ad5SPeter Wemm } 3830538aafcSKonstantin Belousov #endif 384c2815ad5SPeter Wemm 38505f0fdd2SPoul-Henning Kamp #ifdef COMPAT_43 386d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 38705f0fdd2SPoul-Henning Kamp struct ommap_args { 38805f0fdd2SPoul-Henning Kamp caddr_t addr; 38905f0fdd2SPoul-Henning Kamp int len; 39005f0fdd2SPoul-Henning Kamp int prot; 39105f0fdd2SPoul-Henning Kamp int flags; 39205f0fdd2SPoul-Henning Kamp int fd; 39305f0fdd2SPoul-Henning Kamp long pos; 39405f0fdd2SPoul-Henning Kamp }; 395d2d3e875SBruce Evans #endif 39605f0fdd2SPoul-Henning Kamp int 39769cdfcefSEdward Tomasz Napierala ommap(struct thread *td, struct ommap_args *uap) 39805f0fdd2SPoul-Henning Kamp { 39905f0fdd2SPoul-Henning Kamp static const char cvtbsdprot[8] = { 40005f0fdd2SPoul-Henning Kamp 0, 40105f0fdd2SPoul-Henning Kamp PROT_EXEC, 40205f0fdd2SPoul-Henning Kamp PROT_WRITE, 40305f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE, 40405f0fdd2SPoul-Henning Kamp PROT_READ, 40505f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_READ, 40605f0fdd2SPoul-Henning Kamp PROT_WRITE | PROT_READ, 40705f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE | PROT_READ, 40805f0fdd2SPoul-Henning Kamp }; 40969cdfcefSEdward Tomasz Napierala int flags, prot; 4100d94caffSDavid Greenman 41105f0fdd2SPoul-Henning Kamp #define OMAP_ANON 0x0002 41205f0fdd2SPoul-Henning Kamp #define OMAP_COPY 0x0020 41305f0fdd2SPoul-Henning Kamp #define OMAP_SHARED 0x0010 41405f0fdd2SPoul-Henning Kamp #define OMAP_FIXED 0x0100 41505f0fdd2SPoul-Henning Kamp 41669cdfcefSEdward Tomasz Napierala prot = cvtbsdprot[uap->prot & 0x7]; 4175dddee2dSKonstantin Belousov #if (defined(COMPAT_FREEBSD32) && defined(__amd64__)) || defined(__i386__) 418ee4116b8SKonstantin Belousov if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && 41969cdfcefSEdward Tomasz Napierala prot != 0) 42069cdfcefSEdward Tomasz Napierala prot |= PROT_EXEC; 421ee4116b8SKonstantin Belousov #endif 42269cdfcefSEdward Tomasz Napierala flags = 0; 42305f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_ANON) 42469cdfcefSEdward Tomasz Napierala flags |= MAP_ANON; 42505f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_COPY) 42669cdfcefSEdward Tomasz Napierala flags |= MAP_COPY; 42705f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_SHARED) 42869cdfcefSEdward Tomasz Napierala flags |= MAP_SHARED; 42905f0fdd2SPoul-Henning Kamp else 43069cdfcefSEdward Tomasz Napierala flags |= MAP_PRIVATE; 43105f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_FIXED) 43269cdfcefSEdward Tomasz Napierala flags |= MAP_FIXED; 433496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, flags, 434496ab053SKonstantin Belousov uap->fd, uap->pos)); 43505f0fdd2SPoul-Henning Kamp } 43605f0fdd2SPoul-Henning Kamp #endif /* COMPAT_43 */ 43705f0fdd2SPoul-Henning Kamp 43805f0fdd2SPoul-Henning Kamp 439d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 440df8bae1dSRodney W. Grimes struct msync_args { 441651bb817SAlexander Langer void *addr; 442c899450bSPeter Wemm size_t len; 443e6c6af11SDavid Greenman int flags; 444df8bae1dSRodney W. Grimes }; 445d2d3e875SBruce Evans #endif 446df8bae1dSRodney W. Grimes int 44769cdfcefSEdward Tomasz Napierala sys_msync(struct thread *td, struct msync_args *uap) 448df8bae1dSRodney W. Grimes { 44969cdfcefSEdward Tomasz Napierala 450496ab053SKonstantin Belousov return (kern_msync(td, (uintptr_t)uap->addr, uap->len, uap->flags)); 45169cdfcefSEdward Tomasz Napierala } 45269cdfcefSEdward Tomasz Napierala 45369cdfcefSEdward Tomasz Napierala int 454496ab053SKonstantin Belousov kern_msync(struct thread *td, uintptr_t addr0, size_t size, int flags) 45569cdfcefSEdward Tomasz Napierala { 456496ab053SKonstantin Belousov vm_offset_t addr; 45769cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 458df8bae1dSRodney W. Grimes vm_map_t map; 459df8bae1dSRodney W. Grimes int rv; 460df8bae1dSRodney W. Grimes 461496ab053SKonstantin Belousov addr = addr0; 462dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 463dabee6feSPeter Wemm addr -= pageoff; 464dabee6feSPeter Wemm size += pageoff; 465dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 4669154ee6aSPeter Wemm if (addr + size < addr) 467dabee6feSPeter Wemm return (EINVAL); 468dabee6feSPeter Wemm 469dabee6feSPeter Wemm if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 4701e62bc63SDavid Greenman return (EINVAL); 4711e62bc63SDavid Greenman 472b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 4739154ee6aSPeter Wemm 474df8bae1dSRodney W. Grimes /* 475df8bae1dSRodney W. Grimes * Clean the pages and interpret the return value. 476df8bae1dSRodney W. Grimes */ 477950f8459SAlan Cox rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 478e6c6af11SDavid Greenman (flags & MS_INVALIDATE) != 0); 479df8bae1dSRodney W. Grimes switch (rv) { 480df8bae1dSRodney W. Grimes case KERN_SUCCESS: 481d2c60af8SMatthew Dillon return (0); 482df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 483e103f5b1SPeter Holm return (ENOMEM); 484b7b7cd44SAlan Cox case KERN_INVALID_ARGUMENT: 485b7b7cd44SAlan Cox return (EBUSY); 486126d6082SKonstantin Belousov case KERN_FAILURE: 487126d6082SKonstantin Belousov return (EIO); 488df8bae1dSRodney W. Grimes default: 489df8bae1dSRodney W. Grimes return (EINVAL); 490df8bae1dSRodney W. Grimes } 491df8bae1dSRodney W. Grimes } 492df8bae1dSRodney W. Grimes 493d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 494df8bae1dSRodney W. Grimes struct munmap_args { 495651bb817SAlexander Langer void *addr; 4969154ee6aSPeter Wemm size_t len; 497df8bae1dSRodney W. Grimes }; 498d2d3e875SBruce Evans #endif 499df8bae1dSRodney W. Grimes int 50069cdfcefSEdward Tomasz Napierala sys_munmap(struct thread *td, struct munmap_args *uap) 50169cdfcefSEdward Tomasz Napierala { 50269cdfcefSEdward Tomasz Napierala 503496ab053SKonstantin Belousov return (kern_munmap(td, (uintptr_t)uap->addr, uap->len)); 50469cdfcefSEdward Tomasz Napierala } 50569cdfcefSEdward Tomasz Napierala 50669cdfcefSEdward Tomasz Napierala int 507496ab053SKonstantin Belousov kern_munmap(struct thread *td, uintptr_t addr0, size_t size) 508df8bae1dSRodney W. Grimes { 50949874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 51049874f6eSJoseph Koshy struct pmckern_map_out pkm; 51149874f6eSJoseph Koshy vm_map_entry_t entry; 512736ff8c3SMateusz Guzik bool pmc_handled; 51349874f6eSJoseph Koshy #endif 514496ab053SKonstantin Belousov vm_offset_t addr; 51569cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 516df8bae1dSRodney W. Grimes vm_map_t map; 517df8bae1dSRodney W. Grimes 518d8834602SAlan Cox if (size == 0) 519d8834602SAlan Cox return (EINVAL); 520dabee6feSPeter Wemm 521496ab053SKonstantin Belousov addr = addr0; 522dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 523dabee6feSPeter Wemm addr -= pageoff; 524dabee6feSPeter Wemm size += pageoff; 525dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5269154ee6aSPeter Wemm if (addr + size < addr) 527df8bae1dSRodney W. Grimes return (EINVAL); 5289154ee6aSPeter Wemm 529df8bae1dSRodney W. Grimes /* 53005ba50f5SJake Burkholder * Check for illegal addresses. Watch out for address wrap... 531df8bae1dSRodney W. Grimes */ 532b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 53305ba50f5SJake Burkholder if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 53405ba50f5SJake Burkholder return (EINVAL); 535d8834602SAlan Cox vm_map_lock(map); 53649874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 537736ff8c3SMateusz Guzik pmc_handled = false; 538736ff8c3SMateusz Guzik if (PMC_HOOK_INSTALLED(PMC_FN_MUNMAP)) { 539736ff8c3SMateusz Guzik pmc_handled = true; 54049874f6eSJoseph Koshy /* 54149874f6eSJoseph Koshy * Inform hwpmc if the address range being unmapped contains 54249874f6eSJoseph Koshy * an executable region. 54349874f6eSJoseph Koshy */ 5440d419640SRyan Stone pkm.pm_address = (uintptr_t) NULL; 54549874f6eSJoseph Koshy if (vm_map_lookup_entry(map, addr, &entry)) { 5461c5196c3SKonstantin Belousov for (; entry->start < addr + size; 54749874f6eSJoseph Koshy entry = entry->next) { 54849874f6eSJoseph Koshy if (vm_map_check_protection(map, entry->start, 54949874f6eSJoseph Koshy entry->end, VM_PROT_EXECUTE) == TRUE) { 55049874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 55149874f6eSJoseph Koshy pkm.pm_size = (size_t) size; 55249874f6eSJoseph Koshy break; 55349874f6eSJoseph Koshy } 55449874f6eSJoseph Koshy } 55549874f6eSJoseph Koshy } 556736ff8c3SMateusz Guzik } 55749874f6eSJoseph Koshy #endif 558655c3490SKonstantin Belousov vm_map_delete(map, addr, addr + size); 5590d419640SRyan Stone 5600d419640SRyan Stone #ifdef HWPMC_HOOKS 561736ff8c3SMateusz Guzik if (__predict_false(pmc_handled)) { 5620d419640SRyan Stone /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 5630d419640SRyan Stone vm_map_lock_downgrade(map); 564d473d3a1SRyan Stone if (pkm.pm_address != (uintptr_t) NULL) 5650d419640SRyan Stone PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 5660d419640SRyan Stone vm_map_unlock_read(map); 567736ff8c3SMateusz Guzik } else 5680d419640SRyan Stone #endif 569736ff8c3SMateusz Guzik vm_map_unlock(map); 570736ff8c3SMateusz Guzik 5710d419640SRyan Stone /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 572df8bae1dSRodney W. Grimes return (0); 573df8bae1dSRodney W. Grimes } 574df8bae1dSRodney W. Grimes 575d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 576df8bae1dSRodney W. Grimes struct mprotect_args { 577651bb817SAlexander Langer const void *addr; 5789154ee6aSPeter Wemm size_t len; 579df8bae1dSRodney W. Grimes int prot; 580df8bae1dSRodney W. Grimes }; 581d2d3e875SBruce Evans #endif 582df8bae1dSRodney W. Grimes int 58369cdfcefSEdward Tomasz Napierala sys_mprotect(struct thread *td, struct mprotect_args *uap) 584df8bae1dSRodney W. Grimes { 585df8bae1dSRodney W. Grimes 586496ab053SKonstantin Belousov return (kern_mprotect(td, (uintptr_t)uap->addr, uap->len, uap->prot)); 58769cdfcefSEdward Tomasz Napierala } 588df8bae1dSRodney W. Grimes 58969cdfcefSEdward Tomasz Napierala int 590496ab053SKonstantin Belousov kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot) 59169cdfcefSEdward Tomasz Napierala { 592496ab053SKonstantin Belousov vm_offset_t addr; 59369cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 59469cdfcefSEdward Tomasz Napierala 595496ab053SKonstantin Belousov addr = addr0; 59669cdfcefSEdward Tomasz Napierala prot = (prot & VM_PROT_ALL); 597dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 598dabee6feSPeter Wemm addr -= pageoff; 599dabee6feSPeter Wemm size += pageoff; 600dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6016e1d2cf6SKonstantin Belousov #ifdef COMPAT_FREEBSD32 6026e1d2cf6SKonstantin Belousov if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 6036e1d2cf6SKonstantin Belousov if (((addr + size) & 0xffffffff) < addr) 6046e1d2cf6SKonstantin Belousov return (EINVAL); 6056e1d2cf6SKonstantin Belousov } else 6066e1d2cf6SKonstantin Belousov #endif 6079154ee6aSPeter Wemm if (addr + size < addr) 608dabee6feSPeter Wemm return (EINVAL); 609dabee6feSPeter Wemm 61043285049SAlan Cox switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, 61143285049SAlan Cox addr + size, prot, FALSE)) { 612df8bae1dSRodney W. Grimes case KERN_SUCCESS: 613df8bae1dSRodney W. Grimes return (0); 614df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 615df8bae1dSRodney W. Grimes return (EACCES); 6163364c323SKonstantin Belousov case KERN_RESOURCE_SHORTAGE: 6173364c323SKonstantin Belousov return (ENOMEM); 618df8bae1dSRodney W. Grimes } 619df8bae1dSRodney W. Grimes return (EINVAL); 620df8bae1dSRodney W. Grimes } 621df8bae1dSRodney W. Grimes 622d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 623dabee6feSPeter Wemm struct minherit_args { 624651bb817SAlexander Langer void *addr; 6259154ee6aSPeter Wemm size_t len; 626dabee6feSPeter Wemm int inherit; 627dabee6feSPeter Wemm }; 628dabee6feSPeter Wemm #endif 629dabee6feSPeter Wemm int 63004e89ffbSKonstantin Belousov sys_minherit(struct thread *td, struct minherit_args *uap) 631dabee6feSPeter Wemm { 632dabee6feSPeter Wemm vm_offset_t addr; 633dabee6feSPeter Wemm vm_size_t size, pageoff; 63454d92145SMatthew Dillon vm_inherit_t inherit; 635dabee6feSPeter Wemm 636dabee6feSPeter Wemm addr = (vm_offset_t)uap->addr; 6379154ee6aSPeter Wemm size = uap->len; 638dabee6feSPeter Wemm inherit = uap->inherit; 639dabee6feSPeter Wemm 640dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 641dabee6feSPeter Wemm addr -= pageoff; 642dabee6feSPeter Wemm size += pageoff; 643dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6449154ee6aSPeter Wemm if (addr + size < addr) 645dabee6feSPeter Wemm return (EINVAL); 646dabee6feSPeter Wemm 647e0be79afSAlan Cox switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 648e0be79afSAlan Cox addr + size, inherit)) { 649dabee6feSPeter Wemm case KERN_SUCCESS: 650dabee6feSPeter Wemm return (0); 651dabee6feSPeter Wemm case KERN_PROTECTION_FAILURE: 652dabee6feSPeter Wemm return (EACCES); 653dabee6feSPeter Wemm } 654dabee6feSPeter Wemm return (EINVAL); 655dabee6feSPeter Wemm } 656dabee6feSPeter Wemm 657dabee6feSPeter Wemm #ifndef _SYS_SYSPROTO_H_ 658df8bae1dSRodney W. Grimes struct madvise_args { 659651bb817SAlexander Langer void *addr; 6609154ee6aSPeter Wemm size_t len; 661df8bae1dSRodney W. Grimes int behav; 662df8bae1dSRodney W. Grimes }; 663d2d3e875SBruce Evans #endif 6640d94caffSDavid Greenman 665df8bae1dSRodney W. Grimes int 66604e89ffbSKonstantin Belousov sys_madvise(struct thread *td, struct madvise_args *uap) 667df8bae1dSRodney W. Grimes { 66869cdfcefSEdward Tomasz Napierala 669496ab053SKonstantin Belousov return (kern_madvise(td, (uintptr_t)uap->addr, uap->len, uap->behav)); 67069cdfcefSEdward Tomasz Napierala } 67169cdfcefSEdward Tomasz Napierala 67269cdfcefSEdward Tomasz Napierala int 673496ab053SKonstantin Belousov kern_madvise(struct thread *td, uintptr_t addr0, size_t len, int behav) 67469cdfcefSEdward Tomasz Napierala { 67505ba50f5SJake Burkholder vm_map_t map; 676496ab053SKonstantin Belousov vm_offset_t addr, end, start; 67755648840SJohn Baldwin int flags; 678b4309055SMatthew Dillon 679b4309055SMatthew Dillon /* 680f4cf2141SWes Peters * Check for our special case, advising the swap pager we are 681f4cf2141SWes Peters * "immortal." 682f4cf2141SWes Peters */ 68369cdfcefSEdward Tomasz Napierala if (behav == MADV_PROTECT) { 68455648840SJohn Baldwin flags = PPROT_SET; 68555648840SJohn Baldwin return (kern_procctl(td, P_PID, td->td_proc->p_pid, 68655648840SJohn Baldwin PROC_SPROTECT, &flags)); 68769297bf8SJohn Baldwin } 68855648840SJohn Baldwin 689f4cf2141SWes Peters /* 690867a482dSJohn Dyson * Check for illegal addresses. Watch out for address wrap... Note 691867a482dSJohn Dyson * that VM_*_ADDRESS are not constants due to casts (argh). 692867a482dSJohn Dyson */ 69305ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 694496ab053SKonstantin Belousov addr = addr0; 69569cdfcefSEdward Tomasz Napierala if (addr < vm_map_min(map) || addr + len > vm_map_max(map)) 696867a482dSJohn Dyson return (EINVAL); 69769cdfcefSEdward Tomasz Napierala if ((addr + len) < addr) 698867a482dSJohn Dyson return (EINVAL); 699867a482dSJohn Dyson 700867a482dSJohn Dyson /* 701867a482dSJohn Dyson * Since this routine is only advisory, we default to conservative 702867a482dSJohn Dyson * behavior. 703867a482dSJohn Dyson */ 70469cdfcefSEdward Tomasz Napierala start = trunc_page(addr); 70569cdfcefSEdward Tomasz Napierala end = round_page(addr + len); 706867a482dSJohn Dyson 7073e7cb27cSAlan Cox /* 7083e7cb27cSAlan Cox * vm_map_madvise() checks for illegal values of behav. 7093e7cb27cSAlan Cox */ 7103e7cb27cSAlan Cox return (vm_map_madvise(map, start, end, behav)); 711df8bae1dSRodney W. Grimes } 712df8bae1dSRodney W. Grimes 713d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 714df8bae1dSRodney W. Grimes struct mincore_args { 715651bb817SAlexander Langer const void *addr; 7169154ee6aSPeter Wemm size_t len; 717df8bae1dSRodney W. Grimes char *vec; 718df8bae1dSRodney W. Grimes }; 719d2d3e875SBruce Evans #endif 7200d94caffSDavid Greenman 721df8bae1dSRodney W. Grimes int 72204e89ffbSKonstantin Belousov sys_mincore(struct thread *td, struct mincore_args *uap) 723df8bae1dSRodney W. Grimes { 72446dc8e9dSDmitry Chagin 72546dc8e9dSDmitry Chagin return (kern_mincore(td, (uintptr_t)uap->addr, uap->len, uap->vec)); 72646dc8e9dSDmitry Chagin } 72746dc8e9dSDmitry Chagin 72846dc8e9dSDmitry Chagin int 72946dc8e9dSDmitry Chagin kern_mincore(struct thread *td, uintptr_t addr0, size_t len, char *vec) 73046dc8e9dSDmitry Chagin { 731867a482dSJohn Dyson vm_offset_t addr, first_addr; 732867a482dSJohn Dyson vm_offset_t end, cend; 733867a482dSJohn Dyson pmap_t pmap; 734867a482dSJohn Dyson vm_map_t map; 735d2c60af8SMatthew Dillon int error = 0; 736867a482dSJohn Dyson int vecindex, lastvecindex; 73754d92145SMatthew Dillon vm_map_entry_t current; 738867a482dSJohn Dyson vm_map_entry_t entry; 739567e51e1SAlan Cox vm_object_t object; 740567e51e1SAlan Cox vm_paddr_t locked_pa; 741567e51e1SAlan Cox vm_page_t m; 742567e51e1SAlan Cox vm_pindex_t pindex; 743867a482dSJohn Dyson int mincoreinfo; 744dd2622a8SAlan Cox unsigned int timestamp; 745567e51e1SAlan Cox boolean_t locked; 746df8bae1dSRodney W. Grimes 747867a482dSJohn Dyson /* 748867a482dSJohn Dyson * Make sure that the addresses presented are valid for user 749867a482dSJohn Dyson * mode. 750867a482dSJohn Dyson */ 75146dc8e9dSDmitry Chagin first_addr = addr = trunc_page(addr0); 75246dc8e9dSDmitry Chagin end = addr + (vm_size_t)round_page(len); 75305ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 75405ba50f5SJake Burkholder if (end > vm_map_max(map) || end < addr) 755455dd7d4SKonstantin Belousov return (ENOMEM); 75602c04a2fSJohn Dyson 757b40ce416SJulian Elischer pmap = vmspace_pmap(td->td_proc->p_vmspace); 758867a482dSJohn Dyson 759eff50fcdSAlan Cox vm_map_lock_read(map); 760dd2622a8SAlan Cox RestartScan: 761dd2622a8SAlan Cox timestamp = map->timestamp; 762867a482dSJohn Dyson 763455dd7d4SKonstantin Belousov if (!vm_map_lookup_entry(map, addr, &entry)) { 764455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 765455dd7d4SKonstantin Belousov return (ENOMEM); 766455dd7d4SKonstantin Belousov } 767867a482dSJohn Dyson 768867a482dSJohn Dyson /* 769867a482dSJohn Dyson * Do this on a map entry basis so that if the pages are not 770867a482dSJohn Dyson * in the current processes address space, we can easily look 771867a482dSJohn Dyson * up the pages elsewhere. 772867a482dSJohn Dyson */ 773867a482dSJohn Dyson lastvecindex = -1; 7741c5196c3SKonstantin Belousov for (current = entry; current->start < end; current = current->next) { 775867a482dSJohn Dyson 776867a482dSJohn Dyson /* 777455dd7d4SKonstantin Belousov * check for contiguity 778455dd7d4SKonstantin Belousov */ 7791c5196c3SKonstantin Belousov if (current->end < end && current->next->start > current->end) { 780455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 781455dd7d4SKonstantin Belousov return (ENOMEM); 782455dd7d4SKonstantin Belousov } 783455dd7d4SKonstantin Belousov 784455dd7d4SKonstantin Belousov /* 785867a482dSJohn Dyson * ignore submaps (for now) or null objects 786867a482dSJohn Dyson */ 7879fdfe602SMatthew Dillon if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 788867a482dSJohn Dyson current->object.vm_object == NULL) 789867a482dSJohn Dyson continue; 790867a482dSJohn Dyson 791867a482dSJohn Dyson /* 792867a482dSJohn Dyson * limit this scan to the current map entry and the 793867a482dSJohn Dyson * limits for the mincore call 794867a482dSJohn Dyson */ 795867a482dSJohn Dyson if (addr < current->start) 796867a482dSJohn Dyson addr = current->start; 797867a482dSJohn Dyson cend = current->end; 798867a482dSJohn Dyson if (cend > end) 799867a482dSJohn Dyson cend = end; 800867a482dSJohn Dyson 801867a482dSJohn Dyson /* 802867a482dSJohn Dyson * scan this entry one page at a time 803867a482dSJohn Dyson */ 804867a482dSJohn Dyson while (addr < cend) { 805867a482dSJohn Dyson /* 806867a482dSJohn Dyson * Check pmap first, it is likely faster, also 807867a482dSJohn Dyson * it can provide info as to whether we are the 808867a482dSJohn Dyson * one referencing or modifying the page. 809867a482dSJohn Dyson */ 810567e51e1SAlan Cox object = NULL; 811567e51e1SAlan Cox locked_pa = 0; 812567e51e1SAlan Cox retry: 813567e51e1SAlan Cox m = NULL; 814567e51e1SAlan Cox mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 8153fbc2e00SKonstantin Belousov if (mincore_mapped) { 8163fbc2e00SKonstantin Belousov /* 8173fbc2e00SKonstantin Belousov * We only care about this pmap's 8183fbc2e00SKonstantin Belousov * mapping of the page, if any. 8193fbc2e00SKonstantin Belousov */ 820567e51e1SAlan Cox if (locked_pa != 0) { 8213fbc2e00SKonstantin Belousov vm_page_unlock(PHYS_TO_VM_PAGE( 8223fbc2e00SKonstantin Belousov locked_pa)); 8233fbc2e00SKonstantin Belousov } 8243fbc2e00SKonstantin Belousov } else if (locked_pa != 0) { 825867a482dSJohn Dyson /* 826567e51e1SAlan Cox * The page is mapped by this process but not 827567e51e1SAlan Cox * both accessed and modified. It is also 828567e51e1SAlan Cox * managed. Acquire the object lock so that 829567e51e1SAlan Cox * other mappings might be examined. 830867a482dSJohn Dyson */ 831567e51e1SAlan Cox m = PHYS_TO_VM_PAGE(locked_pa); 832567e51e1SAlan Cox if (m->object != object) { 833567e51e1SAlan Cox if (object != NULL) 83489f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 835567e51e1SAlan Cox object = m->object; 83689f6b863SAttilio Rao locked = VM_OBJECT_TRYWLOCK(object); 837567e51e1SAlan Cox vm_page_unlock(m); 838567e51e1SAlan Cox if (!locked) { 83989f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 8402965a453SKip Macy vm_page_lock(m); 841567e51e1SAlan Cox goto retry; 842567e51e1SAlan Cox } 843567e51e1SAlan Cox } else 844567e51e1SAlan Cox vm_page_unlock(m); 845567e51e1SAlan Cox KASSERT(m->valid == VM_PAGE_BITS_ALL, 846567e51e1SAlan Cox ("mincore: page %p is mapped but invalid", 847567e51e1SAlan Cox m)); 848567e51e1SAlan Cox } else if (mincoreinfo == 0) { 849567e51e1SAlan Cox /* 850567e51e1SAlan Cox * The page is not mapped by this process. If 851567e51e1SAlan Cox * the object implements managed pages, then 852567e51e1SAlan Cox * determine if the page is resident so that 853567e51e1SAlan Cox * the mappings might be examined. 854567e51e1SAlan Cox */ 855567e51e1SAlan Cox if (current->object.vm_object != object) { 856567e51e1SAlan Cox if (object != NULL) 85789f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 858567e51e1SAlan Cox object = current->object.vm_object; 85989f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 860567e51e1SAlan Cox } 861567e51e1SAlan Cox if (object->type == OBJT_DEFAULT || 862567e51e1SAlan Cox object->type == OBJT_SWAP || 863567e51e1SAlan Cox object->type == OBJT_VNODE) { 864567e51e1SAlan Cox pindex = OFF_TO_IDX(current->offset + 865567e51e1SAlan Cox (addr - current->start)); 866567e51e1SAlan Cox m = vm_page_lookup(object, pindex); 867567e51e1SAlan Cox if (m != NULL && m->valid == 0) 868567e51e1SAlan Cox m = NULL; 869567e51e1SAlan Cox if (m != NULL) 870567e51e1SAlan Cox mincoreinfo = MINCORE_INCORE; 871567e51e1SAlan Cox } 872567e51e1SAlan Cox } 873567e51e1SAlan Cox if (m != NULL) { 874567e51e1SAlan Cox /* Examine other mappings to the page. */ 875567e51e1SAlan Cox if (m->dirty == 0 && pmap_is_modified(m)) 876567e51e1SAlan Cox vm_page_dirty(m); 877567e51e1SAlan Cox if (m->dirty != 0) 878867a482dSJohn Dyson mincoreinfo |= MINCORE_MODIFIED_OTHER; 879c46b90e9SAlan Cox /* 8803407fefeSKonstantin Belousov * The first test for PGA_REFERENCED is an 881c46b90e9SAlan Cox * optimization. The second test is 882c46b90e9SAlan Cox * required because a concurrent pmap 883c46b90e9SAlan Cox * operation could clear the last reference 8843407fefeSKonstantin Belousov * and set PGA_REFERENCED before the call to 885c46b90e9SAlan Cox * pmap_is_referenced(). 886c46b90e9SAlan Cox */ 8873407fefeSKonstantin Belousov if ((m->aflags & PGA_REFERENCED) != 0 || 888c46b90e9SAlan Cox pmap_is_referenced(m) || 8893407fefeSKonstantin Belousov (m->aflags & PGA_REFERENCED) != 0) 890867a482dSJohn Dyson mincoreinfo |= MINCORE_REFERENCED_OTHER; 8919b5a5d81SJohn Dyson } 892567e51e1SAlan Cox if (object != NULL) 89389f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 894867a482dSJohn Dyson 895867a482dSJohn Dyson /* 896dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 897dd2622a8SAlan Cox * the map, we release the lock. 898dd2622a8SAlan Cox */ 899dd2622a8SAlan Cox vm_map_unlock_read(map); 900dd2622a8SAlan Cox 901dd2622a8SAlan Cox /* 902867a482dSJohn Dyson * calculate index into user supplied byte vector 903867a482dSJohn Dyson */ 904d1780e8dSKonstantin Belousov vecindex = atop(addr - first_addr); 905867a482dSJohn Dyson 906867a482dSJohn Dyson /* 907867a482dSJohn Dyson * If we have skipped map entries, we need to make sure that 908867a482dSJohn Dyson * the byte vector is zeroed for those skipped entries. 909867a482dSJohn Dyson */ 910867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 9116a87d217SJohn Baldwin ++lastvecindex; 912867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 913867a482dSJohn Dyson if (error) { 914d2c60af8SMatthew Dillon error = EFAULT; 915d2c60af8SMatthew Dillon goto done2; 916867a482dSJohn Dyson } 917867a482dSJohn Dyson } 918867a482dSJohn Dyson 919867a482dSJohn Dyson /* 920867a482dSJohn Dyson * Pass the page information to the user 921867a482dSJohn Dyson */ 922867a482dSJohn Dyson error = subyte(vec + vecindex, mincoreinfo); 923867a482dSJohn Dyson if (error) { 924d2c60af8SMatthew Dillon error = EFAULT; 925d2c60af8SMatthew Dillon goto done2; 926867a482dSJohn Dyson } 927dd2622a8SAlan Cox 928dd2622a8SAlan Cox /* 929dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 930dd2622a8SAlan Cox * output may be invalid. 931dd2622a8SAlan Cox */ 932dd2622a8SAlan Cox vm_map_lock_read(map); 933dd2622a8SAlan Cox if (timestamp != map->timestamp) 934dd2622a8SAlan Cox goto RestartScan; 935dd2622a8SAlan Cox 936867a482dSJohn Dyson lastvecindex = vecindex; 93702c04a2fSJohn Dyson addr += PAGE_SIZE; 93802c04a2fSJohn Dyson } 939867a482dSJohn Dyson } 940867a482dSJohn Dyson 941867a482dSJohn Dyson /* 942dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 943dd2622a8SAlan Cox * the map, we release the lock. 944dd2622a8SAlan Cox */ 945dd2622a8SAlan Cox vm_map_unlock_read(map); 946dd2622a8SAlan Cox 947dd2622a8SAlan Cox /* 948867a482dSJohn Dyson * Zero the last entries in the byte vector. 949867a482dSJohn Dyson */ 950d1780e8dSKonstantin Belousov vecindex = atop(end - first_addr); 951867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 9526a87d217SJohn Baldwin ++lastvecindex; 953867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 954867a482dSJohn Dyson if (error) { 955d2c60af8SMatthew Dillon error = EFAULT; 956d2c60af8SMatthew Dillon goto done2; 957867a482dSJohn Dyson } 958867a482dSJohn Dyson } 959867a482dSJohn Dyson 960dd2622a8SAlan Cox /* 961dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 962dd2622a8SAlan Cox * output may be invalid. 963dd2622a8SAlan Cox */ 964dd2622a8SAlan Cox vm_map_lock_read(map); 965dd2622a8SAlan Cox if (timestamp != map->timestamp) 966dd2622a8SAlan Cox goto RestartScan; 967eff50fcdSAlan Cox vm_map_unlock_read(map); 968d2c60af8SMatthew Dillon done2: 969d2c60af8SMatthew Dillon return (error); 970df8bae1dSRodney W. Grimes } 971df8bae1dSRodney W. Grimes 972d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 973df8bae1dSRodney W. Grimes struct mlock_args { 974651bb817SAlexander Langer const void *addr; 975df8bae1dSRodney W. Grimes size_t len; 976df8bae1dSRodney W. Grimes }; 977d2d3e875SBruce Evans #endif 978df8bae1dSRodney W. Grimes int 97904e89ffbSKonstantin Belousov sys_mlock(struct thread *td, struct mlock_args *uap) 980df8bae1dSRodney W. Grimes { 981995d7069SGleb Smirnoff 982496ab053SKonstantin Belousov return (kern_mlock(td->td_proc, td->td_ucred, 983496ab053SKonstantin Belousov __DECONST(uintptr_t, uap->addr), uap->len)); 984995d7069SGleb Smirnoff } 985995d7069SGleb Smirnoff 986995d7069SGleb Smirnoff int 987496ab053SKonstantin Belousov kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len) 988995d7069SGleb Smirnoff { 989bb734798SDon Lewis vm_offset_t addr, end, last, start; 990bb734798SDon Lewis vm_size_t npages, size; 9913ac7d297SAndrey Zonov vm_map_t map; 9921ba5ad42SEdward Tomasz Napierala unsigned long nsize; 993bb734798SDon Lewis int error; 994df8bae1dSRodney W. Grimes 995cc426dd3SMateusz Guzik error = priv_check_cred(cred, PRIV_VM_MLOCK); 99647934cefSDon Lewis if (error) 99747934cefSDon Lewis return (error); 998496ab053SKonstantin Belousov addr = addr0; 999995d7069SGleb Smirnoff size = len; 1000bb734798SDon Lewis last = addr + size; 100116929939SDon Lewis start = trunc_page(addr); 1002bb734798SDon Lewis end = round_page(last); 1003bb734798SDon Lewis if (last < addr || end < addr) 1004df8bae1dSRodney W. Grimes return (EINVAL); 100516929939SDon Lewis npages = atop(end - start); 100616929939SDon Lewis if (npages > vm_page_max_wired) 100716929939SDon Lewis return (ENOMEM); 10083ac7d297SAndrey Zonov map = &proc->p_vmspace->vm_map; 100947934cefSDon Lewis PROC_LOCK(proc); 10103ac7d297SAndrey Zonov nsize = ptoa(npages + pmap_wired_count(map->pmap)); 1011f6f6d240SMateusz Guzik if (nsize > lim_cur_proc(proc, RLIMIT_MEMLOCK)) { 101247934cefSDon Lewis PROC_UNLOCK(proc); 10134a40e3d4SJohn Dyson return (ENOMEM); 101491d5354aSJohn Baldwin } 101547934cefSDon Lewis PROC_UNLOCK(proc); 1016e958ad4cSJeff Roberson if (npages + vm_wire_count() > vm_page_max_wired) 101716929939SDon Lewis return (EAGAIN); 1018afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10194b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 10201ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 10211ba5ad42SEdward Tomasz Napierala error = racct_set(proc, RACCT_MEMLOCK, nsize); 10221ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 10231ba5ad42SEdward Tomasz Napierala if (error != 0) 10241ba5ad42SEdward Tomasz Napierala return (ENOMEM); 10254b5c9cf6SEdward Tomasz Napierala } 1026afcc55f3SEdward Tomasz Napierala #endif 10273ac7d297SAndrey Zonov error = vm_map_wire(map, start, end, 102816929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1029afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10304b5c9cf6SEdward Tomasz Napierala if (racct_enable && error != KERN_SUCCESS) { 10311ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 10321ba5ad42SEdward Tomasz Napierala racct_set(proc, RACCT_MEMLOCK, 10333ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 10341ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 10351ba5ad42SEdward Tomasz Napierala } 1036afcc55f3SEdward Tomasz Napierala #endif 1037df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1038df8bae1dSRodney W. Grimes } 1039df8bae1dSRodney W. Grimes 1040d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 10414a40e3d4SJohn Dyson struct mlockall_args { 10424a40e3d4SJohn Dyson int how; 10434a40e3d4SJohn Dyson }; 10444a40e3d4SJohn Dyson #endif 10454a40e3d4SJohn Dyson 10464a40e3d4SJohn Dyson int 104704e89ffbSKonstantin Belousov sys_mlockall(struct thread *td, struct mlockall_args *uap) 10484a40e3d4SJohn Dyson { 1049abd498aaSBruce M Simpson vm_map_t map; 1050abd498aaSBruce M Simpson int error; 1051abd498aaSBruce M Simpson 1052abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 10537e19eda4SAndrey Zonov error = priv_check(td, PRIV_VM_MLOCK); 10547e19eda4SAndrey Zonov if (error) 10557e19eda4SAndrey Zonov return (error); 1056abd498aaSBruce M Simpson 1057abd498aaSBruce M Simpson if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1058abd498aaSBruce M Simpson return (EINVAL); 1059abd498aaSBruce M Simpson 1060abd498aaSBruce M Simpson /* 1061abd498aaSBruce M Simpson * If wiring all pages in the process would cause it to exceed 1062abd498aaSBruce M Simpson * a hard resource limit, return ENOMEM. 1063abd498aaSBruce M Simpson */ 10647e19eda4SAndrey Zonov if (!old_mlock && uap->how & MCL_CURRENT) { 10652554f86aSMateusz Guzik if (map->size > lim_cur(td, RLIMIT_MEMLOCK)) 1066abd498aaSBruce M Simpson return (ENOMEM); 106791d5354aSJohn Baldwin } 1068afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10694b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 10701ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 10711ba5ad42SEdward Tomasz Napierala error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 10721ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 10731ba5ad42SEdward Tomasz Napierala if (error != 0) 10741ba5ad42SEdward Tomasz Napierala return (ENOMEM); 10754b5c9cf6SEdward Tomasz Napierala } 1076afcc55f3SEdward Tomasz Napierala #endif 1077abd498aaSBruce M Simpson 1078abd498aaSBruce M Simpson if (uap->how & MCL_FUTURE) { 1079abd498aaSBruce M Simpson vm_map_lock(map); 1080abd498aaSBruce M Simpson vm_map_modflags(map, MAP_WIREFUTURE, 0); 1081abd498aaSBruce M Simpson vm_map_unlock(map); 1082abd498aaSBruce M Simpson error = 0; 1083abd498aaSBruce M Simpson } 1084abd498aaSBruce M Simpson 1085abd498aaSBruce M Simpson if (uap->how & MCL_CURRENT) { 1086abd498aaSBruce M Simpson /* 1087abd498aaSBruce M Simpson * P1003.1-2001 mandates that all currently mapped pages 1088abd498aaSBruce M Simpson * will be memory resident and locked (wired) upon return 1089abd498aaSBruce M Simpson * from mlockall(). vm_map_wire() will wire pages, by 1090abd498aaSBruce M Simpson * calling vm_fault_wire() for each page in the region. 1091abd498aaSBruce M Simpson */ 1092abd498aaSBruce M Simpson error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1093abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1094abd498aaSBruce M Simpson error = (error == KERN_SUCCESS ? 0 : EAGAIN); 1095abd498aaSBruce M Simpson } 1096afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10974b5c9cf6SEdward Tomasz Napierala if (racct_enable && error != KERN_SUCCESS) { 10981ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 10991ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 11003ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 11011ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11021ba5ad42SEdward Tomasz Napierala } 1103afcc55f3SEdward Tomasz Napierala #endif 1104abd498aaSBruce M Simpson 1105abd498aaSBruce M Simpson return (error); 11064a40e3d4SJohn Dyson } 11074a40e3d4SJohn Dyson 11084a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1109fa721254SAlfred Perlstein struct munlockall_args { 1110abd498aaSBruce M Simpson register_t dummy; 11114a40e3d4SJohn Dyson }; 11124a40e3d4SJohn Dyson #endif 11134a40e3d4SJohn Dyson 11144a40e3d4SJohn Dyson int 111504e89ffbSKonstantin Belousov sys_munlockall(struct thread *td, struct munlockall_args *uap) 11164a40e3d4SJohn Dyson { 1117abd498aaSBruce M Simpson vm_map_t map; 1118abd498aaSBruce M Simpson int error; 1119abd498aaSBruce M Simpson 1120abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 1121acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 1122abd498aaSBruce M Simpson if (error) 1123abd498aaSBruce M Simpson return (error); 1124abd498aaSBruce M Simpson 1125abd498aaSBruce M Simpson /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1126abd498aaSBruce M Simpson vm_map_lock(map); 1127abd498aaSBruce M Simpson vm_map_modflags(map, 0, MAP_WIREFUTURE); 1128abd498aaSBruce M Simpson vm_map_unlock(map); 1129abd498aaSBruce M Simpson 1130abd498aaSBruce M Simpson /* Forcibly unwire all pages. */ 1131abd498aaSBruce M Simpson error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1132abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1133afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11344b5c9cf6SEdward Tomasz Napierala if (racct_enable && error == KERN_SUCCESS) { 11351ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11361ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 0); 11371ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11381ba5ad42SEdward Tomasz Napierala } 1139afcc55f3SEdward Tomasz Napierala #endif 1140abd498aaSBruce M Simpson 1141abd498aaSBruce M Simpson return (error); 11424a40e3d4SJohn Dyson } 11434a40e3d4SJohn Dyson 11444a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1145df8bae1dSRodney W. Grimes struct munlock_args { 1146651bb817SAlexander Langer const void *addr; 1147df8bae1dSRodney W. Grimes size_t len; 1148df8bae1dSRodney W. Grimes }; 1149d2d3e875SBruce Evans #endif 1150df8bae1dSRodney W. Grimes int 115169cdfcefSEdward Tomasz Napierala sys_munlock(struct thread *td, struct munlock_args *uap) 1152df8bae1dSRodney W. Grimes { 115369cdfcefSEdward Tomasz Napierala 1154496ab053SKonstantin Belousov return (kern_munlock(td, (uintptr_t)uap->addr, uap->len)); 115569cdfcefSEdward Tomasz Napierala } 115669cdfcefSEdward Tomasz Napierala 115769cdfcefSEdward Tomasz Napierala int 1158496ab053SKonstantin Belousov kern_munlock(struct thread *td, uintptr_t addr0, size_t size) 115969cdfcefSEdward Tomasz Napierala { 1160496ab053SKonstantin Belousov vm_offset_t addr, end, last, start; 1161fc2b1679SJeremie Le Hen #ifdef RACCT 1162c92b5069SJeremie Le Hen vm_map_t map; 1163fc2b1679SJeremie Le Hen #endif 1164df8bae1dSRodney W. Grimes int error; 1165df8bae1dSRodney W. Grimes 1166acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 116747934cefSDon Lewis if (error) 116847934cefSDon Lewis return (error); 1169496ab053SKonstantin Belousov addr = addr0; 1170bb734798SDon Lewis last = addr + size; 117116929939SDon Lewis start = trunc_page(addr); 1172bb734798SDon Lewis end = round_page(last); 1173bb734798SDon Lewis if (last < addr || end < addr) 1174df8bae1dSRodney W. Grimes return (EINVAL); 117516929939SDon Lewis error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 117616929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1177afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11784b5c9cf6SEdward Tomasz Napierala if (racct_enable && error == KERN_SUCCESS) { 11791ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 1180c92b5069SJeremie Le Hen map = &td->td_proc->p_vmspace->vm_map; 1181c92b5069SJeremie Le Hen racct_set(td->td_proc, RACCT_MEMLOCK, 1182c92b5069SJeremie Le Hen ptoa(pmap_wired_count(map->pmap))); 11831ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11841ba5ad42SEdward Tomasz Napierala } 1185afcc55f3SEdward Tomasz Napierala #endif 1186df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1187df8bae1dSRodney W. Grimes } 1188df8bae1dSRodney W. Grimes 1189df8bae1dSRodney W. Grimes /* 1190c8daea13SAlexander Kabaev * vm_mmap_vnode() 1191c8daea13SAlexander Kabaev * 1192c8daea13SAlexander Kabaev * Helper function for vm_mmap. Perform sanity check specific for mmap 1193c8daea13SAlexander Kabaev * operations on vnodes. 1194c8daea13SAlexander Kabaev */ 1195c8daea13SAlexander Kabaev int 1196c8daea13SAlexander Kabaev vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1197c8daea13SAlexander Kabaev vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 119884110e7eSKonstantin Belousov struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 119984110e7eSKonstantin Belousov boolean_t *writecounted) 1200c8daea13SAlexander Kabaev { 1201c8daea13SAlexander Kabaev struct vattr va; 1202c8daea13SAlexander Kabaev vm_object_t obj; 1203bd0e1bebSMark Johnston vm_ooffset_t foff; 12040359a12eSAttilio Rao struct ucred *cred; 1205*78022527SKonstantin Belousov int error, flags; 1206*78022527SKonstantin Belousov bool writex; 1207c8daea13SAlexander Kabaev 12080359a12eSAttilio Rao cred = td->td_ucred; 1209*78022527SKonstantin Belousov writex = (*maxprotp & VM_PROT_WRITE) != 0 && 1210*78022527SKonstantin Belousov (*flagsp & MAP_SHARED) != 0; 1211*78022527SKonstantin Belousov if ((error = vget(vp, LK_SHARED, td)) != 0) 1212c8daea13SAlexander Kabaev return (error); 12130df42647SRobert Watson AUDIT_ARG_VNODE1(vp); 121464345f0bSJohn Baldwin foff = *foffp; 1215c8daea13SAlexander Kabaev flags = *flagsp; 12168516dd18SPoul-Henning Kamp obj = vp->v_object; 1217c8daea13SAlexander Kabaev if (vp->v_type == VREG) { 1218c8daea13SAlexander Kabaev /* 1219c8daea13SAlexander Kabaev * Get the proper underlying object 1220c8daea13SAlexander Kabaev */ 12218516dd18SPoul-Henning Kamp if (obj == NULL) { 1222c8daea13SAlexander Kabaev error = EINVAL; 1223c8daea13SAlexander Kabaev goto done; 1224c8daea13SAlexander Kabaev } 1225e5f299ffSKonstantin Belousov if (obj->type == OBJT_VNODE && obj->handle != vp) { 1226c8daea13SAlexander Kabaev vput(vp); 1227c8daea13SAlexander Kabaev vp = (struct vnode *)obj->handle; 122884110e7eSKonstantin Belousov /* 122984110e7eSKonstantin Belousov * Bypass filesystems obey the mpsafety of the 123053f5f8a0SKonstantin Belousov * underlying fs. Tmpfs never bypasses. 123184110e7eSKonstantin Belousov */ 1232*78022527SKonstantin Belousov error = vget(vp, LK_SHARED, td); 12335050aa86SKonstantin Belousov if (error != 0) 123484110e7eSKonstantin Belousov return (error); 123584110e7eSKonstantin Belousov } 1236*78022527SKonstantin Belousov if (writex) { 123784110e7eSKonstantin Belousov *writecounted = TRUE; 123884110e7eSKonstantin Belousov vnode_pager_update_writecount(obj, 0, objsize); 123984110e7eSKonstantin Belousov } 1240c8daea13SAlexander Kabaev } else { 1241c8daea13SAlexander Kabaev error = EINVAL; 1242c8daea13SAlexander Kabaev goto done; 1243c8daea13SAlexander Kabaev } 12440359a12eSAttilio Rao if ((error = VOP_GETATTR(vp, &va, cred))) 1245c8daea13SAlexander Kabaev goto done; 1246c92163dcSChristian S.J. Peron #ifdef MAC 12477077c426SJohn Baldwin /* This relies on VM_PROT_* matching PROT_*. */ 12487077c426SJohn Baldwin error = mac_vnode_check_mmap(cred, vp, (int)prot, flags); 1249c92163dcSChristian S.J. Peron if (error != 0) 1250c92163dcSChristian S.J. Peron goto done; 1251c92163dcSChristian S.J. Peron #endif 1252c8daea13SAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 1253c8daea13SAlexander Kabaev if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 12547077c426SJohn Baldwin if (prot & VM_PROT_WRITE) { 1255c8daea13SAlexander Kabaev error = EPERM; 1256c8daea13SAlexander Kabaev goto done; 1257c8daea13SAlexander Kabaev } 1258c8daea13SAlexander Kabaev *maxprotp &= ~VM_PROT_WRITE; 1259c8daea13SAlexander Kabaev } 1260c8daea13SAlexander Kabaev } 1261c8daea13SAlexander Kabaev /* 1262c8daea13SAlexander Kabaev * If it is a regular file without any references 1263c8daea13SAlexander Kabaev * we do not need to sync it. 1264c8daea13SAlexander Kabaev * Adjust object size to be the size of actual file. 1265c8daea13SAlexander Kabaev */ 1266c8daea13SAlexander Kabaev objsize = round_page(va.va_size); 1267c8daea13SAlexander Kabaev if (va.va_nlink == 0) 1268c8daea13SAlexander Kabaev flags |= MAP_NOSYNC; 12693d653db0SAlan Cox if (obj->type == OBJT_VNODE) { 1270e5f299ffSKonstantin Belousov obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, 1271e5f299ffSKonstantin Belousov cred); 1272c8daea13SAlexander Kabaev if (obj == NULL) { 127364345f0bSJohn Baldwin error = ENOMEM; 1274c8daea13SAlexander Kabaev goto done; 1275c8daea13SAlexander Kabaev } 12763d653db0SAlan Cox } else { 12773d653db0SAlan Cox KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP, 12783d653db0SAlan Cox ("wrong object type")); 12793d653db0SAlan Cox VM_OBJECT_WLOCK(obj); 12803d653db0SAlan Cox vm_object_reference_locked(obj); 12813d653db0SAlan Cox #if VM_NRESERVLEVEL > 0 12823d653db0SAlan Cox vm_object_color(obj, 0); 12833d653db0SAlan Cox #endif 12843d653db0SAlan Cox VM_OBJECT_WUNLOCK(obj); 12853d653db0SAlan Cox } 1286c8daea13SAlexander Kabaev *objp = obj; 1287c8daea13SAlexander Kabaev *flagsp = flags; 128864345f0bSJohn Baldwin 12890359a12eSAttilio Rao vfs_mark_atime(vp, cred); 12901e309003SDiomidis Spinellis 1291c8daea13SAlexander Kabaev done: 1292bafa6cfcSKonstantin Belousov if (error != 0 && *writecounted) { 1293bafa6cfcSKonstantin Belousov *writecounted = FALSE; 1294bafa6cfcSKonstantin Belousov vnode_pager_update_writecount(obj, objsize, 0); 1295bafa6cfcSKonstantin Belousov } 1296c8daea13SAlexander Kabaev vput(vp); 1297c8daea13SAlexander Kabaev return (error); 1298c8daea13SAlexander Kabaev } 1299c8daea13SAlexander Kabaev 1300c8daea13SAlexander Kabaev /* 130198df9218SJohn Baldwin * vm_mmap_cdev() 130298df9218SJohn Baldwin * 130398df9218SJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 130498df9218SJohn Baldwin * operations on cdevs. 130598df9218SJohn Baldwin */ 130698df9218SJohn Baldwin int 13077077c426SJohn Baldwin vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot, 13087077c426SJohn Baldwin vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw, 13097077c426SJohn Baldwin vm_ooffset_t *foff, vm_object_t *objp) 131098df9218SJohn Baldwin { 131198df9218SJohn Baldwin vm_object_t obj; 13127077c426SJohn Baldwin int error, flags; 131398df9218SJohn Baldwin 131498df9218SJohn Baldwin flags = *flagsp; 131598df9218SJohn Baldwin 131691a35e78SKonstantin Belousov if (dsw->d_flags & D_MMAP_ANON) { 13177077c426SJohn Baldwin *objp = NULL; 13187077c426SJohn Baldwin *foff = 0; 131998df9218SJohn Baldwin *maxprotp = VM_PROT_ALL; 132098df9218SJohn Baldwin *flagsp |= MAP_ANON; 132198df9218SJohn Baldwin return (0); 132298df9218SJohn Baldwin } 132398df9218SJohn Baldwin /* 132464345f0bSJohn Baldwin * cdevs do not provide private mappings of any kind. 132598df9218SJohn Baldwin */ 132698df9218SJohn Baldwin if ((*maxprotp & VM_PROT_WRITE) == 0 && 13277077c426SJohn Baldwin (prot & VM_PROT_WRITE) != 0) 132898df9218SJohn Baldwin return (EACCES); 13297077c426SJohn Baldwin if (flags & (MAP_PRIVATE|MAP_COPY)) 133098df9218SJohn Baldwin return (EINVAL); 133198df9218SJohn Baldwin /* 133298df9218SJohn Baldwin * Force device mappings to be shared. 133398df9218SJohn Baldwin */ 133498df9218SJohn Baldwin flags |= MAP_SHARED; 133598df9218SJohn Baldwin #ifdef MAC_XXX 13367077c426SJohn Baldwin error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot); 13377077c426SJohn Baldwin if (error != 0) 133898df9218SJohn Baldwin return (error); 133998df9218SJohn Baldwin #endif 134064345f0bSJohn Baldwin /* 134164345f0bSJohn Baldwin * First, try d_mmap_single(). If that is not implemented 134264345f0bSJohn Baldwin * (returns ENODEV), fall back to using the device pager. 134364345f0bSJohn Baldwin * Note that d_mmap_single() must return a reference to the 134464345f0bSJohn Baldwin * object (it needs to bump the reference count of the object 134564345f0bSJohn Baldwin * it returns somehow). 134664345f0bSJohn Baldwin * 134764345f0bSJohn Baldwin * XXX assumes VM_PROT_* == PROT_* 134864345f0bSJohn Baldwin */ 134964345f0bSJohn Baldwin error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 135064345f0bSJohn Baldwin if (error != ENODEV) 135164345f0bSJohn Baldwin return (error); 13523364c323SKonstantin Belousov obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 13533364c323SKonstantin Belousov td->td_ucred); 135498df9218SJohn Baldwin if (obj == NULL) 135598df9218SJohn Baldwin return (EINVAL); 135698df9218SJohn Baldwin *objp = obj; 135798df9218SJohn Baldwin *flagsp = flags; 135898df9218SJohn Baldwin return (0); 135998df9218SJohn Baldwin } 136098df9218SJohn Baldwin 136198df9218SJohn Baldwin /* 1362d2c60af8SMatthew Dillon * vm_mmap() 1363d2c60af8SMatthew Dillon * 13647077c426SJohn Baldwin * Internal version of mmap used by exec, sys5 shared memory, and 13657077c426SJohn Baldwin * various device drivers. Handle is either a vnode pointer, a 13667077c426SJohn Baldwin * character device, or NULL for MAP_ANON. 1367df8bae1dSRodney W. Grimes */ 1368df8bae1dSRodney W. Grimes int 1369b9dcd593SBruce Evans vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1370b9dcd593SBruce Evans vm_prot_t maxprot, int flags, 137198df9218SJohn Baldwin objtype_t handle_type, void *handle, 1372b9dcd593SBruce Evans vm_ooffset_t foff) 1373df8bae1dSRodney W. Grimes { 13747077c426SJohn Baldwin vm_object_t object; 1375b40ce416SJulian Elischer struct thread *td = curthread; 13767077c426SJohn Baldwin int error; 137784110e7eSKonstantin Belousov boolean_t writecounted; 1378df8bae1dSRodney W. Grimes 1379df8bae1dSRodney W. Grimes if (size == 0) 13807077c426SJohn Baldwin return (EINVAL); 1381df8bae1dSRodney W. Grimes 1382749474f2SPeter Wemm size = round_page(size); 1383010ba384SMark Johnston object = NULL; 13847077c426SJohn Baldwin writecounted = FALSE; 13857077c426SJohn Baldwin 13867077c426SJohn Baldwin /* 13877077c426SJohn Baldwin * Lookup/allocate object. 13887077c426SJohn Baldwin */ 13897077c426SJohn Baldwin switch (handle_type) { 13907077c426SJohn Baldwin case OBJT_DEVICE: { 13917077c426SJohn Baldwin struct cdevsw *dsw; 13927077c426SJohn Baldwin struct cdev *cdev; 13937077c426SJohn Baldwin int ref; 13947077c426SJohn Baldwin 13957077c426SJohn Baldwin cdev = handle; 13967077c426SJohn Baldwin dsw = dev_refthread(cdev, &ref); 13977077c426SJohn Baldwin if (dsw == NULL) 13987077c426SJohn Baldwin return (ENXIO); 13997077c426SJohn Baldwin error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev, 14007077c426SJohn Baldwin dsw, &foff, &object); 14017077c426SJohn Baldwin dev_relthread(cdev, ref); 14027077c426SJohn Baldwin break; 14037077c426SJohn Baldwin } 14047077c426SJohn Baldwin case OBJT_VNODE: 14057077c426SJohn Baldwin error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 14067077c426SJohn Baldwin handle, &foff, &object, &writecounted); 14077077c426SJohn Baldwin break; 14087077c426SJohn Baldwin case OBJT_DEFAULT: 14097077c426SJohn Baldwin if (handle == NULL) { 14107077c426SJohn Baldwin error = 0; 14117077c426SJohn Baldwin break; 14127077c426SJohn Baldwin } 14137077c426SJohn Baldwin /* FALLTHROUGH */ 14147077c426SJohn Baldwin default: 14157077c426SJohn Baldwin error = EINVAL; 14167077c426SJohn Baldwin break; 14177077c426SJohn Baldwin } 14187077c426SJohn Baldwin if (error) 14197077c426SJohn Baldwin return (error); 14207077c426SJohn Baldwin 14217077c426SJohn Baldwin error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 14227077c426SJohn Baldwin foff, writecounted, td); 14237077c426SJohn Baldwin if (error != 0 && object != NULL) { 14247077c426SJohn Baldwin /* 14257077c426SJohn Baldwin * If this mapping was accounted for in the vnode's 14267077c426SJohn Baldwin * writecount, then undo that now. 14277077c426SJohn Baldwin */ 14287077c426SJohn Baldwin if (writecounted) 14297077c426SJohn Baldwin vnode_pager_release_writecount(object, 0, size); 14307077c426SJohn Baldwin vm_object_deallocate(object); 14317077c426SJohn Baldwin } 14327077c426SJohn Baldwin return (error); 14337077c426SJohn Baldwin } 14347077c426SJohn Baldwin 14357077c426SJohn Baldwin /* 14367077c426SJohn Baldwin * Internal version of mmap that maps a specific VM object into an 14377077c426SJohn Baldwin * map. Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap. 14387077c426SJohn Baldwin */ 14397077c426SJohn Baldwin int 14407077c426SJohn Baldwin vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 14417077c426SJohn Baldwin vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff, 14427077c426SJohn Baldwin boolean_t writecounted, struct thread *td) 14437077c426SJohn Baldwin { 14446a97a3f7SKonstantin Belousov boolean_t curmap, fitit; 14456a97a3f7SKonstantin Belousov vm_offset_t max_addr; 14467077c426SJohn Baldwin int docow, error, findspace, rv; 1447df8bae1dSRodney W. Grimes 14486a97a3f7SKonstantin Belousov curmap = map == &td->td_proc->p_vmspace->vm_map; 14496a97a3f7SKonstantin Belousov if (curmap) { 14502554f86aSMateusz Guzik RACCT_PROC_LOCK(td->td_proc); 14512554f86aSMateusz Guzik if (map->size + size > lim_cur(td, RLIMIT_VMEM)) { 14522554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 1453070f64feSMatthew Dillon return (ENOMEM); 1454070f64feSMatthew Dillon } 1455a6492969SAlan Cox if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 14562554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 14571ba5ad42SEdward Tomasz Napierala return (ENOMEM); 14581ba5ad42SEdward Tomasz Napierala } 14597e19eda4SAndrey Zonov if (!old_mlock && map->flags & MAP_WIREFUTURE) { 14603ac7d297SAndrey Zonov if (ptoa(pmap_wired_count(map->pmap)) + size > 14612554f86aSMateusz Guzik lim_cur(td, RLIMIT_MEMLOCK)) { 14627e19eda4SAndrey Zonov racct_set_force(td->td_proc, RACCT_VMEM, 14637e19eda4SAndrey Zonov map->size); 14642554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 14657e19eda4SAndrey Zonov return (ENOMEM); 14667e19eda4SAndrey Zonov } 14677e19eda4SAndrey Zonov error = racct_set(td->td_proc, RACCT_MEMLOCK, 14683ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap)) + size); 14697e19eda4SAndrey Zonov if (error != 0) { 14707e19eda4SAndrey Zonov racct_set_force(td->td_proc, RACCT_VMEM, 14717e19eda4SAndrey Zonov map->size); 14722554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 14737e19eda4SAndrey Zonov return (error); 14747e19eda4SAndrey Zonov } 14757e19eda4SAndrey Zonov } 14762554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 1477a6492969SAlan Cox } 1478070f64feSMatthew Dillon 1479df8bae1dSRodney W. Grimes /* 1480bc9ad247SDavid Greenman * We currently can only deal with page aligned file offsets. 14817077c426SJohn Baldwin * The mmap() system call already enforces this by subtracting 14827077c426SJohn Baldwin * the page offset from the file offset, but checking here 14837077c426SJohn Baldwin * catches errors in device drivers (e.g. d_single_mmap() 14847077c426SJohn Baldwin * callbacks) and other internal mapping requests (such as in 14857077c426SJohn Baldwin * exec). 1486bc9ad247SDavid Greenman */ 1487bc9ad247SDavid Greenman if (foff & PAGE_MASK) 1488bc9ad247SDavid Greenman return (EINVAL); 1489bc9ad247SDavid Greenman 149006cb7259SDavid Greenman if ((flags & MAP_FIXED) == 0) { 149106cb7259SDavid Greenman fitit = TRUE; 149206cb7259SDavid Greenman *addr = round_page(*addr); 149306cb7259SDavid Greenman } else { 149406cb7259SDavid Greenman if (*addr != trunc_page(*addr)) 149506cb7259SDavid Greenman return (EINVAL); 149606cb7259SDavid Greenman fitit = FALSE; 149706cb7259SDavid Greenman } 149884110e7eSKonstantin Belousov 14995f55e841SDavid Greenman if (flags & MAP_ANON) { 15007077c426SJohn Baldwin if (object != NULL || foff != 0) 15017077c426SJohn Baldwin return (EINVAL); 1502c8daea13SAlexander Kabaev docow = 0; 150374ffb9afSAlan Cox } else if (flags & MAP_PREFAULT_READ) 150474ffb9afSAlan Cox docow = MAP_PREFAULT; 150574ffb9afSAlan Cox else 15064738fa09SAlan Cox docow = MAP_PREFAULT_PARTIAL; 1507df8bae1dSRodney W. Grimes 15084f79d873SMatthew Dillon if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 15094738fa09SAlan Cox docow |= MAP_COPY_ON_WRITE; 15104f79d873SMatthew Dillon if (flags & MAP_NOSYNC) 15114f79d873SMatthew Dillon docow |= MAP_DISABLE_SYNCER; 15129730a5daSPaul Saab if (flags & MAP_NOCORE) 15139730a5daSPaul Saab docow |= MAP_DISABLE_COREDUMP; 15148211bd45SKonstantin Belousov /* Shared memory is also shared with children. */ 15158211bd45SKonstantin Belousov if (flags & MAP_SHARED) 15168211bd45SKonstantin Belousov docow |= MAP_INHERIT_SHARE; 151784110e7eSKonstantin Belousov if (writecounted) 151884110e7eSKonstantin Belousov docow |= MAP_VN_WRITECOUNT; 15194648ba0aSKonstantin Belousov if (flags & MAP_STACK) { 15204648ba0aSKonstantin Belousov if (object != NULL) 15214648ba0aSKonstantin Belousov return (EINVAL); 15224648ba0aSKonstantin Belousov docow |= MAP_STACK_GROWS_DOWN; 15234648ba0aSKonstantin Belousov } 152411c42bccSKonstantin Belousov if ((flags & MAP_EXCL) != 0) 152511c42bccSKonstantin Belousov docow |= MAP_CHECK_EXCL; 152619bd0d9cSKonstantin Belousov if ((flags & MAP_GUARD) != 0) 152719bd0d9cSKonstantin Belousov docow |= MAP_CREATE_GUARD; 15285850152dSJohn Dyson 15294648ba0aSKonstantin Belousov if (fitit) { 15305aa60b6fSJohn Baldwin if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER) 15315aa60b6fSJohn Baldwin findspace = VMFS_SUPER_SPACE; 15325aa60b6fSJohn Baldwin else if ((flags & MAP_ALIGNMENT_MASK) != 0) 15335aa60b6fSJohn Baldwin findspace = VMFS_ALIGNED_SPACE(flags >> 15345aa60b6fSJohn Baldwin MAP_ALIGNMENT_SHIFT); 15352267af78SJulian Elischer else 15365aa60b6fSJohn Baldwin findspace = VMFS_OPTIMAL_SPACE; 15376a97a3f7SKonstantin Belousov max_addr = 0; 1538edb572a3SJohn Baldwin #ifdef MAP_32BIT 15396a97a3f7SKonstantin Belousov if ((flags & MAP_32BIT) != 0) 15406a97a3f7SKonstantin Belousov max_addr = MAP_32BIT_MAX_ADDR; 1541edb572a3SJohn Baldwin #endif 15426a97a3f7SKonstantin Belousov if (curmap) { 15436a97a3f7SKonstantin Belousov rv = vm_map_find_min(map, object, foff, addr, size, 15446a97a3f7SKonstantin Belousov round_page((vm_offset_t)td->td_proc->p_vmspace-> 15456a97a3f7SKonstantin Belousov vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr, 15466a97a3f7SKonstantin Belousov findspace, prot, maxprot, docow); 15476a97a3f7SKonstantin Belousov } else { 15486a97a3f7SKonstantin Belousov rv = vm_map_find(map, object, foff, addr, size, 15496a97a3f7SKonstantin Belousov max_addr, findspace, prot, maxprot, docow); 15506a97a3f7SKonstantin Belousov } 15514648ba0aSKonstantin Belousov } else { 1552b8ca4ef2SAlan Cox rv = vm_map_fixed(map, object, foff, *addr, size, 1553bd7e5f99SJohn Dyson prot, maxprot, docow); 15544648ba0aSKonstantin Belousov } 1555bd7e5f99SJohn Dyson 1556f9230ad6SAlan Cox if (rv == KERN_SUCCESS) { 15577fb0c17eSDavid Greenman /* 1558f9230ad6SAlan Cox * If the process has requested that all future mappings 1559f9230ad6SAlan Cox * be wired, then heed this. 1560f9230ad6SAlan Cox */ 15611472f4f4SKonstantin Belousov if (map->flags & MAP_WIREFUTURE) { 1562f9230ad6SAlan Cox vm_map_wire(map, *addr, *addr + size, 15631472f4f4SKonstantin Belousov VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? 15641472f4f4SKonstantin Belousov VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); 15651472f4f4SKonstantin Belousov } 1566df8bae1dSRodney W. Grimes } 15672e32165cSKonstantin Belousov return (vm_mmap_to_errno(rv)); 15682e32165cSKonstantin Belousov } 15692e32165cSKonstantin Belousov 1570f9230ad6SAlan Cox /* 1571f9230ad6SAlan Cox * Translate a Mach VM return code to zero on success or the appropriate errno 1572f9230ad6SAlan Cox * on failure. 1573f9230ad6SAlan Cox */ 15742e32165cSKonstantin Belousov int 15752e32165cSKonstantin Belousov vm_mmap_to_errno(int rv) 15762e32165cSKonstantin Belousov { 15772e32165cSKonstantin Belousov 1578df8bae1dSRodney W. Grimes switch (rv) { 1579df8bae1dSRodney W. Grimes case KERN_SUCCESS: 1580df8bae1dSRodney W. Grimes return (0); 1581df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 1582df8bae1dSRodney W. Grimes case KERN_NO_SPACE: 1583df8bae1dSRodney W. Grimes return (ENOMEM); 1584df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 1585df8bae1dSRodney W. Grimes return (EACCES); 1586df8bae1dSRodney W. Grimes default: 1587df8bae1dSRodney W. Grimes return (EINVAL); 1588df8bae1dSRodney W. Grimes } 1589df8bae1dSRodney W. Grimes } 1590