160727d8bSWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1988 University of Utah. 5df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 6df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 7df8bae1dSRodney W. Grimes * 8df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 9df8bae1dSRodney W. Grimes * the Systems Programming Group of the University of Utah Computer 10df8bae1dSRodney W. Grimes * Science Department. 11df8bae1dSRodney W. Grimes * 12df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 13df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 14df8bae1dSRodney W. Grimes * are met: 15df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 17df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 18df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 19df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 20fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 21df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 22df8bae1dSRodney W. Grimes * without specific prior written permission. 23df8bae1dSRodney W. Grimes * 24df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34df8bae1dSRodney W. Grimes * SUCH DAMAGE. 35df8bae1dSRodney W. Grimes * 36df8bae1dSRodney W. Grimes * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 37df8bae1dSRodney W. Grimes * 38df8bae1dSRodney W. Grimes * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 39df8bae1dSRodney W. Grimes */ 40df8bae1dSRodney W. Grimes 41df8bae1dSRodney W. Grimes /* 42df8bae1dSRodney W. Grimes * Mapped file (mmap) interface to VM 43df8bae1dSRodney W. Grimes */ 44df8bae1dSRodney W. Grimes 45874651b1SDavid E. O'Brien #include <sys/cdefs.h> 46874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 47874651b1SDavid E. O'Brien 4849874f6eSJoseph Koshy #include "opt_hwpmc_hooks.h" 493d653db0SAlan Cox #include "opt_vm.h" 50e9822d92SJoerg Wunsch 51df8bae1dSRodney W. Grimes #include <sys/param.h> 52df8bae1dSRodney W. Grimes #include <sys/systm.h> 534a144410SRobert Watson #include <sys/capsicum.h> 54a9d2f8d8SRobert Watson #include <sys/kernel.h> 55fb919e4dSMark Murray #include <sys/lock.h> 5623955314SAlfred Perlstein #include <sys/mutex.h> 57d2d3e875SBruce Evans #include <sys/sysproto.h> 58df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 59acd3428bSRobert Watson #include <sys/priv.h> 60df8bae1dSRodney W. Grimes #include <sys/proc.h> 6155648840SJohn Baldwin #include <sys/procctl.h> 621ba5ad42SEdward Tomasz Napierala #include <sys/racct.h> 63070f64feSMatthew Dillon #include <sys/resource.h> 64070f64feSMatthew Dillon #include <sys/resourcevar.h> 6589f6b863SAttilio Rao #include <sys/rwlock.h> 667e19eda4SAndrey Zonov #include <sys/sysctl.h> 67df8bae1dSRodney W. Grimes #include <sys/vnode.h> 683ac4d1efSBruce Evans #include <sys/fcntl.h> 69df8bae1dSRodney W. Grimes #include <sys/file.h> 70df8bae1dSRodney W. Grimes #include <sys/mman.h> 71b483c7f6SGuido van Rooij #include <sys/mount.h> 72df8bae1dSRodney W. Grimes #include <sys/conf.h> 734183b6b6SPeter Wemm #include <sys/stat.h> 7455648840SJohn Baldwin #include <sys/syscallsubr.h> 75497a8238SKonstantin Belousov #include <sys/sysent.h> 76efeaf95aSDavid Greenman #include <sys/vmmeter.h> 77a7f67facSKonstantin Belousov #if defined(__amd64__) || defined(__i386__) /* for i386_read_exec */ 78a7f67facSKonstantin Belousov #include <machine/md_var.h> 79a7f67facSKonstantin Belousov #endif 80df8bae1dSRodney W. Grimes 8151d1f690SRobert Watson #include <security/audit/audit.h> 82aed55708SRobert Watson #include <security/mac/mac_framework.h> 83aed55708SRobert Watson 84df8bae1dSRodney W. Grimes #include <vm/vm.h> 85efeaf95aSDavid Greenman #include <vm/vm_param.h> 86efeaf95aSDavid Greenman #include <vm/pmap.h> 87efeaf95aSDavid Greenman #include <vm/vm_map.h> 88efeaf95aSDavid Greenman #include <vm/vm_object.h> 891c7c3c6aSMatthew Dillon #include <vm/vm_page.h> 90df8bae1dSRodney W. Grimes #include <vm/vm_pager.h> 91b5e8ce9fSBruce Evans #include <vm/vm_pageout.h> 92efeaf95aSDavid Greenman #include <vm/vm_extern.h> 93867a482dSJohn Dyson #include <vm/vm_page.h> 9484110e7eSKonstantin Belousov #include <vm/vnode_pager.h> 95df8bae1dSRodney W. Grimes 9649874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 9749874f6eSJoseph Koshy #include <sys/pmckern.h> 9849874f6eSJoseph Koshy #endif 9949874f6eSJoseph Koshy 1007e19eda4SAndrey Zonov int old_mlock = 0; 101af3b2549SHans Petter Selasky SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0, 1027e19eda4SAndrey Zonov "Do not apply RLIMIT_MEMLOCK on mlockall"); 1033fbc2e00SKonstantin Belousov static int mincore_mapped = 1; 1043fbc2e00SKonstantin Belousov SYSCTL_INT(_vm, OID_AUTO, mincore_mapped, CTLFLAG_RWTUN, &mincore_mapped, 0, 1053fbc2e00SKonstantin Belousov "mincore reports mappings, not residency"); 106*74a1b66cSBrooks Davis static int imply_prot_max = 0; 107*74a1b66cSBrooks Davis SYSCTL_INT(_vm, OID_AUTO, imply_prot_max, CTLFLAG_RWTUN, &imply_prot_max, 0, 108*74a1b66cSBrooks Davis "Imply maximum page permissions in mmap() when none are specified"); 1097e19eda4SAndrey Zonov 110edb572a3SJohn Baldwin #ifdef MAP_32BIT 111edb572a3SJohn Baldwin #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) 112d2d3e875SBruce Evans #endif 1130d94caffSDavid Greenman 114edb572a3SJohn Baldwin #ifndef _SYS_SYSPROTO_H_ 115edb572a3SJohn Baldwin struct sbrk_args { 116edb572a3SJohn Baldwin int incr; 117edb572a3SJohn Baldwin }; 118edb572a3SJohn Baldwin #endif 119edb572a3SJohn Baldwin 120df8bae1dSRodney W. Grimes int 12104e89ffbSKonstantin Belousov sys_sbrk(struct thread *td, struct sbrk_args *uap) 122df8bae1dSRodney W. Grimes { 123df8bae1dSRodney W. Grimes /* Not yet implemented */ 124df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 125df8bae1dSRodney W. Grimes } 126df8bae1dSRodney W. Grimes 127d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 128df8bae1dSRodney W. Grimes struct sstk_args { 129df8bae1dSRodney W. Grimes int incr; 130df8bae1dSRodney W. Grimes }; 131d2d3e875SBruce Evans #endif 1320d94caffSDavid Greenman 133df8bae1dSRodney W. Grimes int 13404e89ffbSKonstantin Belousov sys_sstk(struct thread *td, struct sstk_args *uap) 135df8bae1dSRodney W. Grimes { 136df8bae1dSRodney W. Grimes /* Not yet implemented */ 137df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 138df8bae1dSRodney W. Grimes } 139df8bae1dSRodney W. Grimes 1401930e303SPoul-Henning Kamp #if defined(COMPAT_43) 141df8bae1dSRodney W. Grimes int 142d48719bdSBrooks Davis ogetpagesize(struct thread *td, struct ogetpagesize_args *uap) 143df8bae1dSRodney W. Grimes { 14404e89ffbSKonstantin Belousov 145b40ce416SJulian Elischer td->td_retval[0] = PAGE_SIZE; 146df8bae1dSRodney W. Grimes return (0); 147df8bae1dSRodney W. Grimes } 1481930e303SPoul-Henning Kamp #endif /* COMPAT_43 */ 149df8bae1dSRodney W. Grimes 15054f42e4bSPeter Wemm 15154f42e4bSPeter Wemm /* 15254f42e4bSPeter Wemm * Memory Map (mmap) system call. Note that the file offset 15354f42e4bSPeter Wemm * and address are allowed to be NOT page aligned, though if 15454f42e4bSPeter Wemm * the MAP_FIXED flag it set, both must have the same remainder 15554f42e4bSPeter Wemm * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 15654f42e4bSPeter Wemm * page-aligned, the actual mapping starts at trunc_page(addr) 15754f42e4bSPeter Wemm * and the return value is adjusted up by the page offset. 158b4309055SMatthew Dillon * 159b4309055SMatthew Dillon * Generally speaking, only character devices which are themselves 160b4309055SMatthew Dillon * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 161b4309055SMatthew Dillon * there would be no cache coherency between a descriptor and a VM mapping 162b4309055SMatthew Dillon * both to the same character device. 16354f42e4bSPeter Wemm */ 164d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 165df8bae1dSRodney W. Grimes struct mmap_args { 166651bb817SAlexander Langer void *addr; 167df8bae1dSRodney W. Grimes size_t len; 168df8bae1dSRodney W. Grimes int prot; 169df8bae1dSRodney W. Grimes int flags; 170df8bae1dSRodney W. Grimes int fd; 171df8bae1dSRodney W. Grimes long pad; 172df8bae1dSRodney W. Grimes off_t pos; 173df8bae1dSRodney W. Grimes }; 174d2d3e875SBruce Evans #endif 175df8bae1dSRodney W. Grimes 176df8bae1dSRodney W. Grimes int 17769cdfcefSEdward Tomasz Napierala sys_mmap(struct thread *td, struct mmap_args *uap) 17869cdfcefSEdward Tomasz Napierala { 17969cdfcefSEdward Tomasz Napierala 180496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot, 181496ab053SKonstantin Belousov uap->flags, uap->fd, uap->pos)); 18269cdfcefSEdward Tomasz Napierala } 18369cdfcefSEdward Tomasz Napierala 18469cdfcefSEdward Tomasz Napierala int 18577555b84SDoug Moore kern_mmap(struct thread *td, uintptr_t addr0, size_t len, int prot, int flags, 186496ab053SKonstantin Belousov int fd, off_t pos) 187df8bae1dSRodney W. Grimes { 188496ab053SKonstantin Belousov struct vmspace *vms; 189c8daea13SAlexander Kabaev struct file *fp; 190496ab053SKonstantin Belousov vm_offset_t addr; 19177555b84SDoug Moore vm_size_t pageoff, size; 1927077c426SJohn Baldwin vm_prot_t cap_maxprot; 193*74a1b66cSBrooks Davis int align, error, max_prot; 194a9d2f8d8SRobert Watson cap_rights_t rights; 195df8bae1dSRodney W. Grimes 196*74a1b66cSBrooks Davis if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) 197*74a1b66cSBrooks Davis return (EINVAL); 198*74a1b66cSBrooks Davis max_prot = PROT_MAX_EXTRACT(prot); 199*74a1b66cSBrooks Davis prot = PROT_EXTRACT(prot); 200*74a1b66cSBrooks Davis if (max_prot != 0 && (max_prot & prot) != prot) 201*74a1b66cSBrooks Davis return (EINVAL); 202*74a1b66cSBrooks Davis /* 203*74a1b66cSBrooks Davis * Always honor PROT_MAX if set. If not, default to all 204*74a1b66cSBrooks Davis * permissions unless we're implying maximum permissions. 205*74a1b66cSBrooks Davis * 206*74a1b66cSBrooks Davis * XXX: should be tunable per process and ABI. 207*74a1b66cSBrooks Davis */ 208*74a1b66cSBrooks Davis if (max_prot == 0) 209*74a1b66cSBrooks Davis max_prot = (imply_prot_max && prot != PROT_NONE) ? 210*74a1b66cSBrooks Davis prot : _PROT_ALL; 211*74a1b66cSBrooks Davis 212496ab053SKonstantin Belousov vms = td->td_proc->p_vmspace; 213426da3bcSAlfred Perlstein fp = NULL; 21469cdfcefSEdward Tomasz Napierala AUDIT_ARG_FD(fd); 215496ab053SKonstantin Belousov addr = addr0; 21627bfa958SSimon L. B. Nielsen 2177707ccabSKonstantin Belousov /* 2185817298fSJohn Baldwin * Ignore old flags that used to be defined but did not do anything. 2195817298fSJohn Baldwin */ 2205817298fSJohn Baldwin flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040); 2215817298fSJohn Baldwin 2225817298fSJohn Baldwin /* 2237707ccabSKonstantin Belousov * Enforce the constraints. 2247707ccabSKonstantin Belousov * Mapping of length 0 is only allowed for old binaries. 2257707ccabSKonstantin Belousov * Anonymous mapping shall specify -1 as filedescriptor and 2267707ccabSKonstantin Belousov * zero position for new code. Be nice to ancient a.out 2277707ccabSKonstantin Belousov * binaries and correct pos for anonymous mapping, since old 2287707ccabSKonstantin Belousov * ld.so sometimes issues anonymous map requests with non-zero 2297707ccabSKonstantin Belousov * pos. 2307707ccabSKonstantin Belousov */ 2317707ccabSKonstantin Belousov if (!SV_CURPROC_FLAG(SV_AOUT)) { 23277555b84SDoug Moore if ((len == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) || 23369cdfcefSEdward Tomasz Napierala ((flags & MAP_ANON) != 0 && (fd != -1 || pos != 0))) 234df8bae1dSRodney W. Grimes return (EINVAL); 2357707ccabSKonstantin Belousov } else { 2367707ccabSKonstantin Belousov if ((flags & MAP_ANON) != 0) 2377707ccabSKonstantin Belousov pos = 0; 2387707ccabSKonstantin Belousov } 2399154ee6aSPeter Wemm 2402267af78SJulian Elischer if (flags & MAP_STACK) { 24169cdfcefSEdward Tomasz Napierala if ((fd != -1) || 2422267af78SJulian Elischer ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 2432267af78SJulian Elischer return (EINVAL); 2442267af78SJulian Elischer flags |= MAP_ANON; 2452267af78SJulian Elischer pos = 0; 2462907af2aSJulian Elischer } 2475817298fSJohn Baldwin if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE | 2485817298fSJohn Baldwin MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE | 24919bd0d9cSKonstantin Belousov MAP_PREFAULT_READ | MAP_GUARD | 2505fd3f8b3SJohn Baldwin #ifdef MAP_32BIT 2515fd3f8b3SJohn Baldwin MAP_32BIT | 2525fd3f8b3SJohn Baldwin #endif 2535fd3f8b3SJohn Baldwin MAP_ALIGNMENT_MASK)) != 0) 2545fd3f8b3SJohn Baldwin return (EINVAL); 25511c42bccSKonstantin Belousov if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL) 25611c42bccSKonstantin Belousov return (EINVAL); 25710204535SKonstantin Belousov if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED | MAP_PRIVATE)) 2585fd3f8b3SJohn Baldwin return (EINVAL); 2595fd3f8b3SJohn Baldwin if (prot != PROT_NONE && 2605fd3f8b3SJohn Baldwin (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) 2615fd3f8b3SJohn Baldwin return (EINVAL); 26219bd0d9cSKonstantin Belousov if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 || 26360221a57SAlan Cox pos != 0 || (flags & ~(MAP_FIXED | MAP_GUARD | MAP_EXCL | 264633d3b1cSKonstantin Belousov #ifdef MAP_32BIT 265633d3b1cSKonstantin Belousov MAP_32BIT | 266633d3b1cSKonstantin Belousov #endif 267633d3b1cSKonstantin Belousov MAP_ALIGNMENT_MASK)) != 0)) 26819bd0d9cSKonstantin Belousov return (EINVAL); 2692907af2aSJulian Elischer 2709154ee6aSPeter Wemm /* 27154f42e4bSPeter Wemm * Align the file position to a page boundary, 27254f42e4bSPeter Wemm * and save its page offset component. 2739154ee6aSPeter Wemm */ 27454f42e4bSPeter Wemm pageoff = (pos & PAGE_MASK); 27554f42e4bSPeter Wemm pos -= pageoff; 27654f42e4bSPeter Wemm 27777555b84SDoug Moore /* Compute size from len by rounding (on both ends). */ 27877555b84SDoug Moore size = len + pageoff; /* low end... */ 27997220a27SDoug Moore size = round_page(size); /* hi end */ 28077555b84SDoug Moore /* Check for rounding up to zero. */ 281f8c8b2e8SDoug Moore if (len > size) 28277555b84SDoug Moore return (ENOMEM); 2839154ee6aSPeter Wemm 2845aa60b6fSJohn Baldwin /* Ensure alignment is at least a page and fits in a pointer. */ 2855aa60b6fSJohn Baldwin align = flags & MAP_ALIGNMENT_MASK; 2865aa60b6fSJohn Baldwin if (align != 0 && align != MAP_ALIGNED_SUPER && 2875aa60b6fSJohn Baldwin (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY || 2885aa60b6fSJohn Baldwin align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT)) 2895aa60b6fSJohn Baldwin return (EINVAL); 2905aa60b6fSJohn Baldwin 291df8bae1dSRodney W. Grimes /* 2920d94caffSDavid Greenman * Check for illegal addresses. Watch out for address wrap... Note 2930d94caffSDavid Greenman * that VM_*_ADDRESS are not constants due to casts (argh). 294df8bae1dSRodney W. Grimes */ 295df8bae1dSRodney W. Grimes if (flags & MAP_FIXED) { 29654f42e4bSPeter Wemm /* 29754f42e4bSPeter Wemm * The specified address must have the same remainder 29854f42e4bSPeter Wemm * as the file offset taken modulo PAGE_SIZE, so it 29954f42e4bSPeter Wemm * should be aligned after adjustment by pageoff. 30054f42e4bSPeter Wemm */ 30154f42e4bSPeter Wemm addr -= pageoff; 30254f42e4bSPeter Wemm if (addr & PAGE_MASK) 30354f42e4bSPeter Wemm return (EINVAL); 30427bfa958SSimon L. B. Nielsen 30554f42e4bSPeter Wemm /* Address range must be all in user VM space. */ 30605ba50f5SJake Burkholder if (addr < vm_map_min(&vms->vm_map) || 30705ba50f5SJake Burkholder addr + size > vm_map_max(&vms->vm_map)) 308df8bae1dSRodney W. Grimes return (EINVAL); 309bbc0ec52SDavid Greenman if (addr + size < addr) 310df8bae1dSRodney W. Grimes return (EINVAL); 311edb572a3SJohn Baldwin #ifdef MAP_32BIT 312edb572a3SJohn Baldwin if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR) 313edb572a3SJohn Baldwin return (EINVAL); 314edb572a3SJohn Baldwin } else if (flags & MAP_32BIT) { 315edb572a3SJohn Baldwin /* 316edb572a3SJohn Baldwin * For MAP_32BIT, override the hint if it is too high and 317edb572a3SJohn Baldwin * do not bother moving the mapping past the heap (since 318edb572a3SJohn Baldwin * the heap is usually above 2GB). 319edb572a3SJohn Baldwin */ 320edb572a3SJohn Baldwin if (addr + size > MAP_32BIT_MAX_ADDR) 321edb572a3SJohn Baldwin addr = 0; 322edb572a3SJohn Baldwin #endif 32391d5354aSJohn Baldwin } else { 324df8bae1dSRodney W. Grimes /* 32554f42e4bSPeter Wemm * XXX for non-fixed mappings where no hint is provided or 32654f42e4bSPeter Wemm * the hint would fall in the potential heap space, 32754f42e4bSPeter Wemm * place it after the end of the largest possible heap. 328df8bae1dSRodney W. Grimes * 32954f42e4bSPeter Wemm * There should really be a pmap call to determine a reasonable 33054f42e4bSPeter Wemm * location. 331df8bae1dSRodney W. Grimes */ 33291d5354aSJohn Baldwin if (addr == 0 || 3331f6889a1SMatthew Dillon (addr >= round_page((vm_offset_t)vms->vm_taddr) && 334c460ac3aSPeter Wemm addr < round_page((vm_offset_t)vms->vm_daddr + 335cd336badSMateusz Guzik lim_max(td, RLIMIT_DATA)))) 336c460ac3aSPeter Wemm addr = round_page((vm_offset_t)vms->vm_daddr + 337cd336badSMateusz Guzik lim_max(td, RLIMIT_DATA)); 33891d5354aSJohn Baldwin } 33977555b84SDoug Moore if (len == 0) { 3407077c426SJohn Baldwin /* 3417077c426SJohn Baldwin * Return success without mapping anything for old 3427077c426SJohn Baldwin * binaries that request a page-aligned mapping of 3437077c426SJohn Baldwin * length 0. For modern binaries, this function 3447077c426SJohn Baldwin * returns an error earlier. 3457077c426SJohn Baldwin */ 3467077c426SJohn Baldwin error = 0; 34719bd0d9cSKonstantin Belousov } else if ((flags & MAP_GUARD) != 0) { 34819bd0d9cSKonstantin Belousov error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE, 34919bd0d9cSKonstantin Belousov VM_PROT_NONE, flags, NULL, pos, FALSE, td); 35019bd0d9cSKonstantin Belousov } else if ((flags & MAP_ANON) != 0) { 351df8bae1dSRodney W. Grimes /* 352df8bae1dSRodney W. Grimes * Mapping blank space is trivial. 3537077c426SJohn Baldwin * 3547077c426SJohn Baldwin * This relies on VM_PROT_* matching PROT_*. 355df8bae1dSRodney W. Grimes */ 3567077c426SJohn Baldwin error = vm_mmap_object(&vms->vm_map, &addr, size, prot, 357*74a1b66cSBrooks Davis max_prot, flags, NULL, pos, FALSE, td); 35830d4dd7eSAlexander Kabaev } else { 359df8bae1dSRodney W. Grimes /* 360a9d2f8d8SRobert Watson * Mapping file, get fp for validation and don't let the 361a9d2f8d8SRobert Watson * descriptor disappear on us if we block. Check capability 362a9d2f8d8SRobert Watson * rights, but also return the maximum rights to be combined 363a9d2f8d8SRobert Watson * with maxprot later. 364df8bae1dSRodney W. Grimes */ 3657008be5bSPawel Jakub Dawidek cap_rights_init(&rights, CAP_MMAP); 366a9d2f8d8SRobert Watson if (prot & PROT_READ) 3677008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_R); 368a9d2f8d8SRobert Watson if ((flags & MAP_SHARED) != 0) { 369a9d2f8d8SRobert Watson if (prot & PROT_WRITE) 3707008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_W); 371a9d2f8d8SRobert Watson } 372a9d2f8d8SRobert Watson if (prot & PROT_EXEC) 3737008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_X); 37469cdfcefSEdward Tomasz Napierala error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp); 3757008be5bSPawel Jakub Dawidek if (error != 0) 376426da3bcSAlfred Perlstein goto done; 37710204535SKonstantin Belousov if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 && 37810204535SKonstantin Belousov td->td_proc->p_osrel >= P_OSREL_MAP_FSTRICT) { 37910204535SKonstantin Belousov error = EINVAL; 38010204535SKonstantin Belousov goto done; 38110204535SKonstantin Belousov } 3825fd3f8b3SJohn Baldwin 3835fd3f8b3SJohn Baldwin /* This relies on VM_PROT_* matching PROT_*. */ 3847077c426SJohn Baldwin error = fo_mmap(fp, &vms->vm_map, &addr, size, prot, 385*74a1b66cSBrooks Davis max_prot & cap_maxprot, flags, pos, td); 38649874f6eSJoseph Koshy } 3877077c426SJohn Baldwin 388df8bae1dSRodney W. Grimes if (error == 0) 389b40ce416SJulian Elischer td->td_retval[0] = (register_t) (addr + pageoff); 390279d7226SMatthew Dillon done: 391279d7226SMatthew Dillon if (fp) 392b40ce416SJulian Elischer fdrop(fp, td); 393f6b5b182SJeff Roberson 394df8bae1dSRodney W. Grimes return (error); 395df8bae1dSRodney W. Grimes } 396df8bae1dSRodney W. Grimes 3970538aafcSKonstantin Belousov #if defined(COMPAT_FREEBSD6) 398c2815ad5SPeter Wemm int 399c2815ad5SPeter Wemm freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 400c2815ad5SPeter Wemm { 401c2815ad5SPeter Wemm 402496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot, 403496ab053SKonstantin Belousov uap->flags, uap->fd, uap->pos)); 404c2815ad5SPeter Wemm } 4050538aafcSKonstantin Belousov #endif 406c2815ad5SPeter Wemm 40705f0fdd2SPoul-Henning Kamp #ifdef COMPAT_43 408d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 40905f0fdd2SPoul-Henning Kamp struct ommap_args { 41005f0fdd2SPoul-Henning Kamp caddr_t addr; 41105f0fdd2SPoul-Henning Kamp int len; 41205f0fdd2SPoul-Henning Kamp int prot; 41305f0fdd2SPoul-Henning Kamp int flags; 41405f0fdd2SPoul-Henning Kamp int fd; 41505f0fdd2SPoul-Henning Kamp long pos; 41605f0fdd2SPoul-Henning Kamp }; 417d2d3e875SBruce Evans #endif 41805f0fdd2SPoul-Henning Kamp int 41969cdfcefSEdward Tomasz Napierala ommap(struct thread *td, struct ommap_args *uap) 42005f0fdd2SPoul-Henning Kamp { 42105f0fdd2SPoul-Henning Kamp static const char cvtbsdprot[8] = { 42205f0fdd2SPoul-Henning Kamp 0, 42305f0fdd2SPoul-Henning Kamp PROT_EXEC, 42405f0fdd2SPoul-Henning Kamp PROT_WRITE, 42505f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE, 42605f0fdd2SPoul-Henning Kamp PROT_READ, 42705f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_READ, 42805f0fdd2SPoul-Henning Kamp PROT_WRITE | PROT_READ, 42905f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE | PROT_READ, 43005f0fdd2SPoul-Henning Kamp }; 43169cdfcefSEdward Tomasz Napierala int flags, prot; 4320d94caffSDavid Greenman 43305f0fdd2SPoul-Henning Kamp #define OMAP_ANON 0x0002 43405f0fdd2SPoul-Henning Kamp #define OMAP_COPY 0x0020 43505f0fdd2SPoul-Henning Kamp #define OMAP_SHARED 0x0010 43605f0fdd2SPoul-Henning Kamp #define OMAP_FIXED 0x0100 43705f0fdd2SPoul-Henning Kamp 43869cdfcefSEdward Tomasz Napierala prot = cvtbsdprot[uap->prot & 0x7]; 4395dddee2dSKonstantin Belousov #if (defined(COMPAT_FREEBSD32) && defined(__amd64__)) || defined(__i386__) 440ee4116b8SKonstantin Belousov if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && 44169cdfcefSEdward Tomasz Napierala prot != 0) 44269cdfcefSEdward Tomasz Napierala prot |= PROT_EXEC; 443ee4116b8SKonstantin Belousov #endif 44469cdfcefSEdward Tomasz Napierala flags = 0; 44505f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_ANON) 44669cdfcefSEdward Tomasz Napierala flags |= MAP_ANON; 44705f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_COPY) 44869cdfcefSEdward Tomasz Napierala flags |= MAP_COPY; 44905f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_SHARED) 45069cdfcefSEdward Tomasz Napierala flags |= MAP_SHARED; 45105f0fdd2SPoul-Henning Kamp else 45269cdfcefSEdward Tomasz Napierala flags |= MAP_PRIVATE; 45305f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_FIXED) 45469cdfcefSEdward Tomasz Napierala flags |= MAP_FIXED; 455496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, flags, 456496ab053SKonstantin Belousov uap->fd, uap->pos)); 45705f0fdd2SPoul-Henning Kamp } 45805f0fdd2SPoul-Henning Kamp #endif /* COMPAT_43 */ 45905f0fdd2SPoul-Henning Kamp 46005f0fdd2SPoul-Henning Kamp 461d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 462df8bae1dSRodney W. Grimes struct msync_args { 463651bb817SAlexander Langer void *addr; 464c899450bSPeter Wemm size_t len; 465e6c6af11SDavid Greenman int flags; 466df8bae1dSRodney W. Grimes }; 467d2d3e875SBruce Evans #endif 468df8bae1dSRodney W. Grimes int 46969cdfcefSEdward Tomasz Napierala sys_msync(struct thread *td, struct msync_args *uap) 470df8bae1dSRodney W. Grimes { 47169cdfcefSEdward Tomasz Napierala 472496ab053SKonstantin Belousov return (kern_msync(td, (uintptr_t)uap->addr, uap->len, uap->flags)); 47369cdfcefSEdward Tomasz Napierala } 47469cdfcefSEdward Tomasz Napierala 47569cdfcefSEdward Tomasz Napierala int 476496ab053SKonstantin Belousov kern_msync(struct thread *td, uintptr_t addr0, size_t size, int flags) 47769cdfcefSEdward Tomasz Napierala { 478496ab053SKonstantin Belousov vm_offset_t addr; 47969cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 480df8bae1dSRodney W. Grimes vm_map_t map; 481df8bae1dSRodney W. Grimes int rv; 482df8bae1dSRodney W. Grimes 483496ab053SKonstantin Belousov addr = addr0; 484dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 485dabee6feSPeter Wemm addr -= pageoff; 486dabee6feSPeter Wemm size += pageoff; 487dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 4889154ee6aSPeter Wemm if (addr + size < addr) 489dabee6feSPeter Wemm return (EINVAL); 490dabee6feSPeter Wemm 491dabee6feSPeter Wemm if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 4921e62bc63SDavid Greenman return (EINVAL); 4931e62bc63SDavid Greenman 494b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 4959154ee6aSPeter Wemm 496df8bae1dSRodney W. Grimes /* 497df8bae1dSRodney W. Grimes * Clean the pages and interpret the return value. 498df8bae1dSRodney W. Grimes */ 499950f8459SAlan Cox rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 500e6c6af11SDavid Greenman (flags & MS_INVALIDATE) != 0); 501df8bae1dSRodney W. Grimes switch (rv) { 502df8bae1dSRodney W. Grimes case KERN_SUCCESS: 503d2c60af8SMatthew Dillon return (0); 504df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 505e103f5b1SPeter Holm return (ENOMEM); 506b7b7cd44SAlan Cox case KERN_INVALID_ARGUMENT: 507b7b7cd44SAlan Cox return (EBUSY); 508126d6082SKonstantin Belousov case KERN_FAILURE: 509126d6082SKonstantin Belousov return (EIO); 510df8bae1dSRodney W. Grimes default: 511df8bae1dSRodney W. Grimes return (EINVAL); 512df8bae1dSRodney W. Grimes } 513df8bae1dSRodney W. Grimes } 514df8bae1dSRodney W. Grimes 515d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 516df8bae1dSRodney W. Grimes struct munmap_args { 517651bb817SAlexander Langer void *addr; 5189154ee6aSPeter Wemm size_t len; 519df8bae1dSRodney W. Grimes }; 520d2d3e875SBruce Evans #endif 521df8bae1dSRodney W. Grimes int 52269cdfcefSEdward Tomasz Napierala sys_munmap(struct thread *td, struct munmap_args *uap) 52369cdfcefSEdward Tomasz Napierala { 52469cdfcefSEdward Tomasz Napierala 525496ab053SKonstantin Belousov return (kern_munmap(td, (uintptr_t)uap->addr, uap->len)); 52669cdfcefSEdward Tomasz Napierala } 52769cdfcefSEdward Tomasz Napierala 52869cdfcefSEdward Tomasz Napierala int 529496ab053SKonstantin Belousov kern_munmap(struct thread *td, uintptr_t addr0, size_t size) 530df8bae1dSRodney W. Grimes { 53149874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 53249874f6eSJoseph Koshy struct pmckern_map_out pkm; 53349874f6eSJoseph Koshy vm_map_entry_t entry; 534736ff8c3SMateusz Guzik bool pmc_handled; 53549874f6eSJoseph Koshy #endif 536496ab053SKonstantin Belousov vm_offset_t addr; 53769cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 538df8bae1dSRodney W. Grimes vm_map_t map; 539df8bae1dSRodney W. Grimes 540d8834602SAlan Cox if (size == 0) 541d8834602SAlan Cox return (EINVAL); 542dabee6feSPeter Wemm 543496ab053SKonstantin Belousov addr = addr0; 544dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 545dabee6feSPeter Wemm addr -= pageoff; 546dabee6feSPeter Wemm size += pageoff; 547dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5489154ee6aSPeter Wemm if (addr + size < addr) 549df8bae1dSRodney W. Grimes return (EINVAL); 5509154ee6aSPeter Wemm 551df8bae1dSRodney W. Grimes /* 55205ba50f5SJake Burkholder * Check for illegal addresses. Watch out for address wrap... 553df8bae1dSRodney W. Grimes */ 554b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 55505ba50f5SJake Burkholder if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 55605ba50f5SJake Burkholder return (EINVAL); 557d8834602SAlan Cox vm_map_lock(map); 55849874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 559736ff8c3SMateusz Guzik pmc_handled = false; 560736ff8c3SMateusz Guzik if (PMC_HOOK_INSTALLED(PMC_FN_MUNMAP)) { 561736ff8c3SMateusz Guzik pmc_handled = true; 56249874f6eSJoseph Koshy /* 56349874f6eSJoseph Koshy * Inform hwpmc if the address range being unmapped contains 56449874f6eSJoseph Koshy * an executable region. 56549874f6eSJoseph Koshy */ 5660d419640SRyan Stone pkm.pm_address = (uintptr_t) NULL; 56749874f6eSJoseph Koshy if (vm_map_lookup_entry(map, addr, &entry)) { 5681c5196c3SKonstantin Belousov for (; entry->start < addr + size; 56949874f6eSJoseph Koshy entry = entry->next) { 57049874f6eSJoseph Koshy if (vm_map_check_protection(map, entry->start, 57149874f6eSJoseph Koshy entry->end, VM_PROT_EXECUTE) == TRUE) { 57249874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 57349874f6eSJoseph Koshy pkm.pm_size = (size_t) size; 57449874f6eSJoseph Koshy break; 57549874f6eSJoseph Koshy } 57649874f6eSJoseph Koshy } 57749874f6eSJoseph Koshy } 578736ff8c3SMateusz Guzik } 57949874f6eSJoseph Koshy #endif 580655c3490SKonstantin Belousov vm_map_delete(map, addr, addr + size); 5810d419640SRyan Stone 5820d419640SRyan Stone #ifdef HWPMC_HOOKS 583736ff8c3SMateusz Guzik if (__predict_false(pmc_handled)) { 5840d419640SRyan Stone /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 5850d419640SRyan Stone vm_map_lock_downgrade(map); 586d473d3a1SRyan Stone if (pkm.pm_address != (uintptr_t) NULL) 5870d419640SRyan Stone PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 5880d419640SRyan Stone vm_map_unlock_read(map); 589736ff8c3SMateusz Guzik } else 5900d419640SRyan Stone #endif 591736ff8c3SMateusz Guzik vm_map_unlock(map); 592736ff8c3SMateusz Guzik 5930d419640SRyan Stone /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 594df8bae1dSRodney W. Grimes return (0); 595df8bae1dSRodney W. Grimes } 596df8bae1dSRodney W. Grimes 597d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 598df8bae1dSRodney W. Grimes struct mprotect_args { 599651bb817SAlexander Langer const void *addr; 6009154ee6aSPeter Wemm size_t len; 601df8bae1dSRodney W. Grimes int prot; 602df8bae1dSRodney W. Grimes }; 603d2d3e875SBruce Evans #endif 604df8bae1dSRodney W. Grimes int 60569cdfcefSEdward Tomasz Napierala sys_mprotect(struct thread *td, struct mprotect_args *uap) 606df8bae1dSRodney W. Grimes { 607df8bae1dSRodney W. Grimes 608496ab053SKonstantin Belousov return (kern_mprotect(td, (uintptr_t)uap->addr, uap->len, uap->prot)); 60969cdfcefSEdward Tomasz Napierala } 610df8bae1dSRodney W. Grimes 61169cdfcefSEdward Tomasz Napierala int 612496ab053SKonstantin Belousov kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot) 61369cdfcefSEdward Tomasz Napierala { 614496ab053SKonstantin Belousov vm_offset_t addr; 61569cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 616*74a1b66cSBrooks Davis int vm_error, max_prot; 61769cdfcefSEdward Tomasz Napierala 618496ab053SKonstantin Belousov addr = addr0; 619*74a1b66cSBrooks Davis if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) 620*74a1b66cSBrooks Davis return (EINVAL); 621*74a1b66cSBrooks Davis max_prot = PROT_MAX_EXTRACT(prot); 622*74a1b66cSBrooks Davis prot = PROT_EXTRACT(prot); 623dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 624dabee6feSPeter Wemm addr -= pageoff; 625dabee6feSPeter Wemm size += pageoff; 626dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6276e1d2cf6SKonstantin Belousov #ifdef COMPAT_FREEBSD32 6286e1d2cf6SKonstantin Belousov if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 6296e1d2cf6SKonstantin Belousov if (((addr + size) & 0xffffffff) < addr) 6306e1d2cf6SKonstantin Belousov return (EINVAL); 6316e1d2cf6SKonstantin Belousov } else 6326e1d2cf6SKonstantin Belousov #endif 6339154ee6aSPeter Wemm if (addr + size < addr) 634dabee6feSPeter Wemm return (EINVAL); 635dabee6feSPeter Wemm 636*74a1b66cSBrooks Davis vm_error = KERN_SUCCESS; 637*74a1b66cSBrooks Davis if (max_prot != 0) { 638*74a1b66cSBrooks Davis if ((max_prot & prot) != prot) 639*74a1b66cSBrooks Davis return (EINVAL); 640*74a1b66cSBrooks Davis vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map, 641*74a1b66cSBrooks Davis addr, addr + size, max_prot, TRUE); 642*74a1b66cSBrooks Davis } 643*74a1b66cSBrooks Davis if (vm_error == KERN_SUCCESS) 644*74a1b66cSBrooks Davis vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map, 645*74a1b66cSBrooks Davis addr, addr + size, prot, FALSE); 646*74a1b66cSBrooks Davis 647*74a1b66cSBrooks Davis switch (vm_error) { 648df8bae1dSRodney W. Grimes case KERN_SUCCESS: 649df8bae1dSRodney W. Grimes return (0); 650df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 651df8bae1dSRodney W. Grimes return (EACCES); 6523364c323SKonstantin Belousov case KERN_RESOURCE_SHORTAGE: 6533364c323SKonstantin Belousov return (ENOMEM); 654df8bae1dSRodney W. Grimes } 655df8bae1dSRodney W. Grimes return (EINVAL); 656df8bae1dSRodney W. Grimes } 657df8bae1dSRodney W. Grimes 658d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 659dabee6feSPeter Wemm struct minherit_args { 660651bb817SAlexander Langer void *addr; 6619154ee6aSPeter Wemm size_t len; 662dabee6feSPeter Wemm int inherit; 663dabee6feSPeter Wemm }; 664dabee6feSPeter Wemm #endif 665dabee6feSPeter Wemm int 66604e89ffbSKonstantin Belousov sys_minherit(struct thread *td, struct minherit_args *uap) 667dabee6feSPeter Wemm { 668dabee6feSPeter Wemm vm_offset_t addr; 669dabee6feSPeter Wemm vm_size_t size, pageoff; 67054d92145SMatthew Dillon vm_inherit_t inherit; 671dabee6feSPeter Wemm 672dabee6feSPeter Wemm addr = (vm_offset_t)uap->addr; 6739154ee6aSPeter Wemm size = uap->len; 674dabee6feSPeter Wemm inherit = uap->inherit; 675dabee6feSPeter Wemm 676dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 677dabee6feSPeter Wemm addr -= pageoff; 678dabee6feSPeter Wemm size += pageoff; 679dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6809154ee6aSPeter Wemm if (addr + size < addr) 681dabee6feSPeter Wemm return (EINVAL); 682dabee6feSPeter Wemm 683e0be79afSAlan Cox switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 684e0be79afSAlan Cox addr + size, inherit)) { 685dabee6feSPeter Wemm case KERN_SUCCESS: 686dabee6feSPeter Wemm return (0); 687dabee6feSPeter Wemm case KERN_PROTECTION_FAILURE: 688dabee6feSPeter Wemm return (EACCES); 689dabee6feSPeter Wemm } 690dabee6feSPeter Wemm return (EINVAL); 691dabee6feSPeter Wemm } 692dabee6feSPeter Wemm 693dabee6feSPeter Wemm #ifndef _SYS_SYSPROTO_H_ 694df8bae1dSRodney W. Grimes struct madvise_args { 695651bb817SAlexander Langer void *addr; 6969154ee6aSPeter Wemm size_t len; 697df8bae1dSRodney W. Grimes int behav; 698df8bae1dSRodney W. Grimes }; 699d2d3e875SBruce Evans #endif 7000d94caffSDavid Greenman 701df8bae1dSRodney W. Grimes int 70204e89ffbSKonstantin Belousov sys_madvise(struct thread *td, struct madvise_args *uap) 703df8bae1dSRodney W. Grimes { 70469cdfcefSEdward Tomasz Napierala 705496ab053SKonstantin Belousov return (kern_madvise(td, (uintptr_t)uap->addr, uap->len, uap->behav)); 70669cdfcefSEdward Tomasz Napierala } 70769cdfcefSEdward Tomasz Napierala 70869cdfcefSEdward Tomasz Napierala int 709496ab053SKonstantin Belousov kern_madvise(struct thread *td, uintptr_t addr0, size_t len, int behav) 71069cdfcefSEdward Tomasz Napierala { 71105ba50f5SJake Burkholder vm_map_t map; 712496ab053SKonstantin Belousov vm_offset_t addr, end, start; 71355648840SJohn Baldwin int flags; 714b4309055SMatthew Dillon 715b4309055SMatthew Dillon /* 716f4cf2141SWes Peters * Check for our special case, advising the swap pager we are 717f4cf2141SWes Peters * "immortal." 718f4cf2141SWes Peters */ 71969cdfcefSEdward Tomasz Napierala if (behav == MADV_PROTECT) { 72055648840SJohn Baldwin flags = PPROT_SET; 72155648840SJohn Baldwin return (kern_procctl(td, P_PID, td->td_proc->p_pid, 72255648840SJohn Baldwin PROC_SPROTECT, &flags)); 72369297bf8SJohn Baldwin } 72455648840SJohn Baldwin 725f4cf2141SWes Peters /* 726867a482dSJohn Dyson * Check for illegal addresses. Watch out for address wrap... Note 727867a482dSJohn Dyson * that VM_*_ADDRESS are not constants due to casts (argh). 728867a482dSJohn Dyson */ 72905ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 730496ab053SKonstantin Belousov addr = addr0; 73169cdfcefSEdward Tomasz Napierala if (addr < vm_map_min(map) || addr + len > vm_map_max(map)) 732867a482dSJohn Dyson return (EINVAL); 73369cdfcefSEdward Tomasz Napierala if ((addr + len) < addr) 734867a482dSJohn Dyson return (EINVAL); 735867a482dSJohn Dyson 736867a482dSJohn Dyson /* 737867a482dSJohn Dyson * Since this routine is only advisory, we default to conservative 738867a482dSJohn Dyson * behavior. 739867a482dSJohn Dyson */ 74069cdfcefSEdward Tomasz Napierala start = trunc_page(addr); 74169cdfcefSEdward Tomasz Napierala end = round_page(addr + len); 742867a482dSJohn Dyson 7433e7cb27cSAlan Cox /* 7443e7cb27cSAlan Cox * vm_map_madvise() checks for illegal values of behav. 7453e7cb27cSAlan Cox */ 7463e7cb27cSAlan Cox return (vm_map_madvise(map, start, end, behav)); 747df8bae1dSRodney W. Grimes } 748df8bae1dSRodney W. Grimes 749d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 750df8bae1dSRodney W. Grimes struct mincore_args { 751651bb817SAlexander Langer const void *addr; 7529154ee6aSPeter Wemm size_t len; 753df8bae1dSRodney W. Grimes char *vec; 754df8bae1dSRodney W. Grimes }; 755d2d3e875SBruce Evans #endif 7560d94caffSDavid Greenman 757df8bae1dSRodney W. Grimes int 75804e89ffbSKonstantin Belousov sys_mincore(struct thread *td, struct mincore_args *uap) 759df8bae1dSRodney W. Grimes { 76046dc8e9dSDmitry Chagin 76146dc8e9dSDmitry Chagin return (kern_mincore(td, (uintptr_t)uap->addr, uap->len, uap->vec)); 76246dc8e9dSDmitry Chagin } 76346dc8e9dSDmitry Chagin 76446dc8e9dSDmitry Chagin int 76546dc8e9dSDmitry Chagin kern_mincore(struct thread *td, uintptr_t addr0, size_t len, char *vec) 76646dc8e9dSDmitry Chagin { 767867a482dSJohn Dyson vm_offset_t addr, first_addr; 768867a482dSJohn Dyson vm_offset_t end, cend; 769867a482dSJohn Dyson pmap_t pmap; 770867a482dSJohn Dyson vm_map_t map; 771d2c60af8SMatthew Dillon int error = 0; 772867a482dSJohn Dyson int vecindex, lastvecindex; 77354d92145SMatthew Dillon vm_map_entry_t current; 774867a482dSJohn Dyson vm_map_entry_t entry; 775567e51e1SAlan Cox vm_object_t object; 776567e51e1SAlan Cox vm_paddr_t locked_pa; 777567e51e1SAlan Cox vm_page_t m; 778567e51e1SAlan Cox vm_pindex_t pindex; 779867a482dSJohn Dyson int mincoreinfo; 780dd2622a8SAlan Cox unsigned int timestamp; 781567e51e1SAlan Cox boolean_t locked; 782df8bae1dSRodney W. Grimes 783867a482dSJohn Dyson /* 784867a482dSJohn Dyson * Make sure that the addresses presented are valid for user 785867a482dSJohn Dyson * mode. 786867a482dSJohn Dyson */ 78746dc8e9dSDmitry Chagin first_addr = addr = trunc_page(addr0); 78846dc8e9dSDmitry Chagin end = addr + (vm_size_t)round_page(len); 78905ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 79005ba50f5SJake Burkholder if (end > vm_map_max(map) || end < addr) 791455dd7d4SKonstantin Belousov return (ENOMEM); 79202c04a2fSJohn Dyson 793b40ce416SJulian Elischer pmap = vmspace_pmap(td->td_proc->p_vmspace); 794867a482dSJohn Dyson 795eff50fcdSAlan Cox vm_map_lock_read(map); 796dd2622a8SAlan Cox RestartScan: 797dd2622a8SAlan Cox timestamp = map->timestamp; 798867a482dSJohn Dyson 799455dd7d4SKonstantin Belousov if (!vm_map_lookup_entry(map, addr, &entry)) { 800455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 801455dd7d4SKonstantin Belousov return (ENOMEM); 802455dd7d4SKonstantin Belousov } 803867a482dSJohn Dyson 804867a482dSJohn Dyson /* 805867a482dSJohn Dyson * Do this on a map entry basis so that if the pages are not 806867a482dSJohn Dyson * in the current processes address space, we can easily look 807867a482dSJohn Dyson * up the pages elsewhere. 808867a482dSJohn Dyson */ 809867a482dSJohn Dyson lastvecindex = -1; 8101c5196c3SKonstantin Belousov for (current = entry; current->start < end; current = current->next) { 811867a482dSJohn Dyson 812867a482dSJohn Dyson /* 813455dd7d4SKonstantin Belousov * check for contiguity 814455dd7d4SKonstantin Belousov */ 8151c5196c3SKonstantin Belousov if (current->end < end && current->next->start > current->end) { 816455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 817455dd7d4SKonstantin Belousov return (ENOMEM); 818455dd7d4SKonstantin Belousov } 819455dd7d4SKonstantin Belousov 820455dd7d4SKonstantin Belousov /* 821867a482dSJohn Dyson * ignore submaps (for now) or null objects 822867a482dSJohn Dyson */ 8239fdfe602SMatthew Dillon if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 824867a482dSJohn Dyson current->object.vm_object == NULL) 825867a482dSJohn Dyson continue; 826867a482dSJohn Dyson 827867a482dSJohn Dyson /* 828867a482dSJohn Dyson * limit this scan to the current map entry and the 829867a482dSJohn Dyson * limits for the mincore call 830867a482dSJohn Dyson */ 831867a482dSJohn Dyson if (addr < current->start) 832867a482dSJohn Dyson addr = current->start; 833867a482dSJohn Dyson cend = current->end; 834867a482dSJohn Dyson if (cend > end) 835867a482dSJohn Dyson cend = end; 836867a482dSJohn Dyson 837867a482dSJohn Dyson /* 838867a482dSJohn Dyson * scan this entry one page at a time 839867a482dSJohn Dyson */ 840867a482dSJohn Dyson while (addr < cend) { 841867a482dSJohn Dyson /* 842867a482dSJohn Dyson * Check pmap first, it is likely faster, also 843867a482dSJohn Dyson * it can provide info as to whether we are the 844867a482dSJohn Dyson * one referencing or modifying the page. 845867a482dSJohn Dyson */ 846567e51e1SAlan Cox object = NULL; 847567e51e1SAlan Cox locked_pa = 0; 848567e51e1SAlan Cox retry: 849567e51e1SAlan Cox m = NULL; 850567e51e1SAlan Cox mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); 8513fbc2e00SKonstantin Belousov if (mincore_mapped) { 8523fbc2e00SKonstantin Belousov /* 8533fbc2e00SKonstantin Belousov * We only care about this pmap's 8543fbc2e00SKonstantin Belousov * mapping of the page, if any. 8553fbc2e00SKonstantin Belousov */ 856567e51e1SAlan Cox if (locked_pa != 0) { 8573fbc2e00SKonstantin Belousov vm_page_unlock(PHYS_TO_VM_PAGE( 8583fbc2e00SKonstantin Belousov locked_pa)); 8593fbc2e00SKonstantin Belousov } 8603fbc2e00SKonstantin Belousov } else if (locked_pa != 0) { 861867a482dSJohn Dyson /* 862567e51e1SAlan Cox * The page is mapped by this process but not 863567e51e1SAlan Cox * both accessed and modified. It is also 864567e51e1SAlan Cox * managed. Acquire the object lock so that 865567e51e1SAlan Cox * other mappings might be examined. 866867a482dSJohn Dyson */ 867567e51e1SAlan Cox m = PHYS_TO_VM_PAGE(locked_pa); 868567e51e1SAlan Cox if (m->object != object) { 869567e51e1SAlan Cox if (object != NULL) 87089f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 871567e51e1SAlan Cox object = m->object; 87289f6b863SAttilio Rao locked = VM_OBJECT_TRYWLOCK(object); 873567e51e1SAlan Cox vm_page_unlock(m); 874567e51e1SAlan Cox if (!locked) { 87589f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 8762965a453SKip Macy vm_page_lock(m); 877567e51e1SAlan Cox goto retry; 878567e51e1SAlan Cox } 879567e51e1SAlan Cox } else 880567e51e1SAlan Cox vm_page_unlock(m); 881567e51e1SAlan Cox KASSERT(m->valid == VM_PAGE_BITS_ALL, 882567e51e1SAlan Cox ("mincore: page %p is mapped but invalid", 883567e51e1SAlan Cox m)); 884567e51e1SAlan Cox } else if (mincoreinfo == 0) { 885567e51e1SAlan Cox /* 886567e51e1SAlan Cox * The page is not mapped by this process. If 887567e51e1SAlan Cox * the object implements managed pages, then 888567e51e1SAlan Cox * determine if the page is resident so that 889567e51e1SAlan Cox * the mappings might be examined. 890567e51e1SAlan Cox */ 891567e51e1SAlan Cox if (current->object.vm_object != object) { 892567e51e1SAlan Cox if (object != NULL) 89389f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 894567e51e1SAlan Cox object = current->object.vm_object; 89589f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 896567e51e1SAlan Cox } 897567e51e1SAlan Cox if (object->type == OBJT_DEFAULT || 898567e51e1SAlan Cox object->type == OBJT_SWAP || 899567e51e1SAlan Cox object->type == OBJT_VNODE) { 900567e51e1SAlan Cox pindex = OFF_TO_IDX(current->offset + 901567e51e1SAlan Cox (addr - current->start)); 902567e51e1SAlan Cox m = vm_page_lookup(object, pindex); 903567e51e1SAlan Cox if (m != NULL && m->valid == 0) 904567e51e1SAlan Cox m = NULL; 905567e51e1SAlan Cox if (m != NULL) 906567e51e1SAlan Cox mincoreinfo = MINCORE_INCORE; 907567e51e1SAlan Cox } 908567e51e1SAlan Cox } 909567e51e1SAlan Cox if (m != NULL) { 910567e51e1SAlan Cox /* Examine other mappings to the page. */ 911567e51e1SAlan Cox if (m->dirty == 0 && pmap_is_modified(m)) 912567e51e1SAlan Cox vm_page_dirty(m); 913567e51e1SAlan Cox if (m->dirty != 0) 914867a482dSJohn Dyson mincoreinfo |= MINCORE_MODIFIED_OTHER; 915c46b90e9SAlan Cox /* 9163407fefeSKonstantin Belousov * The first test for PGA_REFERENCED is an 917c46b90e9SAlan Cox * optimization. The second test is 918c46b90e9SAlan Cox * required because a concurrent pmap 919c46b90e9SAlan Cox * operation could clear the last reference 9203407fefeSKonstantin Belousov * and set PGA_REFERENCED before the call to 921c46b90e9SAlan Cox * pmap_is_referenced(). 922c46b90e9SAlan Cox */ 9233407fefeSKonstantin Belousov if ((m->aflags & PGA_REFERENCED) != 0 || 924c46b90e9SAlan Cox pmap_is_referenced(m) || 9253407fefeSKonstantin Belousov (m->aflags & PGA_REFERENCED) != 0) 926867a482dSJohn Dyson mincoreinfo |= MINCORE_REFERENCED_OTHER; 9279b5a5d81SJohn Dyson } 928567e51e1SAlan Cox if (object != NULL) 92989f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 930867a482dSJohn Dyson 931867a482dSJohn Dyson /* 932dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 933dd2622a8SAlan Cox * the map, we release the lock. 934dd2622a8SAlan Cox */ 935dd2622a8SAlan Cox vm_map_unlock_read(map); 936dd2622a8SAlan Cox 937dd2622a8SAlan Cox /* 938867a482dSJohn Dyson * calculate index into user supplied byte vector 939867a482dSJohn Dyson */ 940d1780e8dSKonstantin Belousov vecindex = atop(addr - first_addr); 941867a482dSJohn Dyson 942867a482dSJohn Dyson /* 943867a482dSJohn Dyson * If we have skipped map entries, we need to make sure that 944867a482dSJohn Dyson * the byte vector is zeroed for those skipped entries. 945867a482dSJohn Dyson */ 946867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 9476a87d217SJohn Baldwin ++lastvecindex; 948867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 949867a482dSJohn Dyson if (error) { 950d2c60af8SMatthew Dillon error = EFAULT; 951d2c60af8SMatthew Dillon goto done2; 952867a482dSJohn Dyson } 953867a482dSJohn Dyson } 954867a482dSJohn Dyson 955867a482dSJohn Dyson /* 956867a482dSJohn Dyson * Pass the page information to the user 957867a482dSJohn Dyson */ 958867a482dSJohn Dyson error = subyte(vec + vecindex, mincoreinfo); 959867a482dSJohn Dyson if (error) { 960d2c60af8SMatthew Dillon error = EFAULT; 961d2c60af8SMatthew Dillon goto done2; 962867a482dSJohn Dyson } 963dd2622a8SAlan Cox 964dd2622a8SAlan Cox /* 965dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 966dd2622a8SAlan Cox * output may be invalid. 967dd2622a8SAlan Cox */ 968dd2622a8SAlan Cox vm_map_lock_read(map); 969dd2622a8SAlan Cox if (timestamp != map->timestamp) 970dd2622a8SAlan Cox goto RestartScan; 971dd2622a8SAlan Cox 972867a482dSJohn Dyson lastvecindex = vecindex; 97302c04a2fSJohn Dyson addr += PAGE_SIZE; 97402c04a2fSJohn Dyson } 975867a482dSJohn Dyson } 976867a482dSJohn Dyson 977867a482dSJohn Dyson /* 978dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 979dd2622a8SAlan Cox * the map, we release the lock. 980dd2622a8SAlan Cox */ 981dd2622a8SAlan Cox vm_map_unlock_read(map); 982dd2622a8SAlan Cox 983dd2622a8SAlan Cox /* 984867a482dSJohn Dyson * Zero the last entries in the byte vector. 985867a482dSJohn Dyson */ 986d1780e8dSKonstantin Belousov vecindex = atop(end - first_addr); 987867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 9886a87d217SJohn Baldwin ++lastvecindex; 989867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 990867a482dSJohn Dyson if (error) { 991d2c60af8SMatthew Dillon error = EFAULT; 992d2c60af8SMatthew Dillon goto done2; 993867a482dSJohn Dyson } 994867a482dSJohn Dyson } 995867a482dSJohn Dyson 996dd2622a8SAlan Cox /* 997dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 998dd2622a8SAlan Cox * output may be invalid. 999dd2622a8SAlan Cox */ 1000dd2622a8SAlan Cox vm_map_lock_read(map); 1001dd2622a8SAlan Cox if (timestamp != map->timestamp) 1002dd2622a8SAlan Cox goto RestartScan; 1003eff50fcdSAlan Cox vm_map_unlock_read(map); 1004d2c60af8SMatthew Dillon done2: 1005d2c60af8SMatthew Dillon return (error); 1006df8bae1dSRodney W. Grimes } 1007df8bae1dSRodney W. Grimes 1008d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 1009df8bae1dSRodney W. Grimes struct mlock_args { 1010651bb817SAlexander Langer const void *addr; 1011df8bae1dSRodney W. Grimes size_t len; 1012df8bae1dSRodney W. Grimes }; 1013d2d3e875SBruce Evans #endif 1014df8bae1dSRodney W. Grimes int 101504e89ffbSKonstantin Belousov sys_mlock(struct thread *td, struct mlock_args *uap) 1016df8bae1dSRodney W. Grimes { 1017995d7069SGleb Smirnoff 1018496ab053SKonstantin Belousov return (kern_mlock(td->td_proc, td->td_ucred, 1019496ab053SKonstantin Belousov __DECONST(uintptr_t, uap->addr), uap->len)); 1020995d7069SGleb Smirnoff } 1021995d7069SGleb Smirnoff 1022995d7069SGleb Smirnoff int 1023496ab053SKonstantin Belousov kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len) 1024995d7069SGleb Smirnoff { 1025bb734798SDon Lewis vm_offset_t addr, end, last, start; 1026bb734798SDon Lewis vm_size_t npages, size; 10273ac7d297SAndrey Zonov vm_map_t map; 10281ba5ad42SEdward Tomasz Napierala unsigned long nsize; 1029bb734798SDon Lewis int error; 1030df8bae1dSRodney W. Grimes 1031cc426dd3SMateusz Guzik error = priv_check_cred(cred, PRIV_VM_MLOCK); 103247934cefSDon Lewis if (error) 103347934cefSDon Lewis return (error); 1034496ab053SKonstantin Belousov addr = addr0; 1035995d7069SGleb Smirnoff size = len; 1036bb734798SDon Lewis last = addr + size; 103716929939SDon Lewis start = trunc_page(addr); 1038bb734798SDon Lewis end = round_page(last); 1039bb734798SDon Lewis if (last < addr || end < addr) 1040df8bae1dSRodney W. Grimes return (EINVAL); 104116929939SDon Lewis npages = atop(end - start); 104254a3a114SMark Johnston if (npages > vm_page_max_user_wired) 104316929939SDon Lewis return (ENOMEM); 10443ac7d297SAndrey Zonov map = &proc->p_vmspace->vm_map; 104547934cefSDon Lewis PROC_LOCK(proc); 10463ac7d297SAndrey Zonov nsize = ptoa(npages + pmap_wired_count(map->pmap)); 1047f6f6d240SMateusz Guzik if (nsize > lim_cur_proc(proc, RLIMIT_MEMLOCK)) { 104847934cefSDon Lewis PROC_UNLOCK(proc); 10494a40e3d4SJohn Dyson return (ENOMEM); 105091d5354aSJohn Baldwin } 105147934cefSDon Lewis PROC_UNLOCK(proc); 1052afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10534b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 10541ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 10551ba5ad42SEdward Tomasz Napierala error = racct_set(proc, RACCT_MEMLOCK, nsize); 10561ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 10571ba5ad42SEdward Tomasz Napierala if (error != 0) 10581ba5ad42SEdward Tomasz Napierala return (ENOMEM); 10594b5c9cf6SEdward Tomasz Napierala } 1060afcc55f3SEdward Tomasz Napierala #endif 10613ac7d297SAndrey Zonov error = vm_map_wire(map, start, end, 106216929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1063afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10644b5c9cf6SEdward Tomasz Napierala if (racct_enable && error != KERN_SUCCESS) { 10651ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 10661ba5ad42SEdward Tomasz Napierala racct_set(proc, RACCT_MEMLOCK, 10673ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 10681ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 10691ba5ad42SEdward Tomasz Napierala } 1070afcc55f3SEdward Tomasz Napierala #endif 1071df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1072df8bae1dSRodney W. Grimes } 1073df8bae1dSRodney W. Grimes 1074d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 10754a40e3d4SJohn Dyson struct mlockall_args { 10764a40e3d4SJohn Dyson int how; 10774a40e3d4SJohn Dyson }; 10784a40e3d4SJohn Dyson #endif 10794a40e3d4SJohn Dyson 10804a40e3d4SJohn Dyson int 108104e89ffbSKonstantin Belousov sys_mlockall(struct thread *td, struct mlockall_args *uap) 10824a40e3d4SJohn Dyson { 1083abd498aaSBruce M Simpson vm_map_t map; 1084abd498aaSBruce M Simpson int error; 1085abd498aaSBruce M Simpson 1086abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 10877e19eda4SAndrey Zonov error = priv_check(td, PRIV_VM_MLOCK); 10887e19eda4SAndrey Zonov if (error) 10897e19eda4SAndrey Zonov return (error); 1090abd498aaSBruce M Simpson 1091abd498aaSBruce M Simpson if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1092abd498aaSBruce M Simpson return (EINVAL); 1093abd498aaSBruce M Simpson 1094abd498aaSBruce M Simpson /* 1095abd498aaSBruce M Simpson * If wiring all pages in the process would cause it to exceed 1096abd498aaSBruce M Simpson * a hard resource limit, return ENOMEM. 1097abd498aaSBruce M Simpson */ 10987e19eda4SAndrey Zonov if (!old_mlock && uap->how & MCL_CURRENT) { 10992554f86aSMateusz Guzik if (map->size > lim_cur(td, RLIMIT_MEMLOCK)) 1100abd498aaSBruce M Simpson return (ENOMEM); 110191d5354aSJohn Baldwin } 1102afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11034b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 11041ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11051ba5ad42SEdward Tomasz Napierala error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 11061ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11071ba5ad42SEdward Tomasz Napierala if (error != 0) 11081ba5ad42SEdward Tomasz Napierala return (ENOMEM); 11094b5c9cf6SEdward Tomasz Napierala } 1110afcc55f3SEdward Tomasz Napierala #endif 1111abd498aaSBruce M Simpson 1112abd498aaSBruce M Simpson if (uap->how & MCL_FUTURE) { 1113abd498aaSBruce M Simpson vm_map_lock(map); 1114abd498aaSBruce M Simpson vm_map_modflags(map, MAP_WIREFUTURE, 0); 1115abd498aaSBruce M Simpson vm_map_unlock(map); 1116abd498aaSBruce M Simpson error = 0; 1117abd498aaSBruce M Simpson } 1118abd498aaSBruce M Simpson 1119abd498aaSBruce M Simpson if (uap->how & MCL_CURRENT) { 1120abd498aaSBruce M Simpson /* 1121abd498aaSBruce M Simpson * P1003.1-2001 mandates that all currently mapped pages 1122abd498aaSBruce M Simpson * will be memory resident and locked (wired) upon return 1123abd498aaSBruce M Simpson * from mlockall(). vm_map_wire() will wire pages, by 1124abd498aaSBruce M Simpson * calling vm_fault_wire() for each page in the region. 1125abd498aaSBruce M Simpson */ 1126abd498aaSBruce M Simpson error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1127abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 112854a3a114SMark Johnston if (error == KERN_SUCCESS) 112954a3a114SMark Johnston error = 0; 113054a3a114SMark Johnston else if (error == KERN_RESOURCE_SHORTAGE) 113154a3a114SMark Johnston error = ENOMEM; 113254a3a114SMark Johnston else 113354a3a114SMark Johnston error = EAGAIN; 1134abd498aaSBruce M Simpson } 1135afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11364b5c9cf6SEdward Tomasz Napierala if (racct_enable && error != KERN_SUCCESS) { 11371ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11381ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 11393ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 11401ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11411ba5ad42SEdward Tomasz Napierala } 1142afcc55f3SEdward Tomasz Napierala #endif 1143abd498aaSBruce M Simpson 1144abd498aaSBruce M Simpson return (error); 11454a40e3d4SJohn Dyson } 11464a40e3d4SJohn Dyson 11474a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1148fa721254SAlfred Perlstein struct munlockall_args { 1149abd498aaSBruce M Simpson register_t dummy; 11504a40e3d4SJohn Dyson }; 11514a40e3d4SJohn Dyson #endif 11524a40e3d4SJohn Dyson 11534a40e3d4SJohn Dyson int 115404e89ffbSKonstantin Belousov sys_munlockall(struct thread *td, struct munlockall_args *uap) 11554a40e3d4SJohn Dyson { 1156abd498aaSBruce M Simpson vm_map_t map; 1157abd498aaSBruce M Simpson int error; 1158abd498aaSBruce M Simpson 1159abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 1160acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 1161abd498aaSBruce M Simpson if (error) 1162abd498aaSBruce M Simpson return (error); 1163abd498aaSBruce M Simpson 1164abd498aaSBruce M Simpson /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1165abd498aaSBruce M Simpson vm_map_lock(map); 1166abd498aaSBruce M Simpson vm_map_modflags(map, 0, MAP_WIREFUTURE); 1167abd498aaSBruce M Simpson vm_map_unlock(map); 1168abd498aaSBruce M Simpson 1169abd498aaSBruce M Simpson /* Forcibly unwire all pages. */ 1170abd498aaSBruce M Simpson error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1171abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1172afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11734b5c9cf6SEdward Tomasz Napierala if (racct_enable && error == KERN_SUCCESS) { 11741ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11751ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 0); 11761ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11771ba5ad42SEdward Tomasz Napierala } 1178afcc55f3SEdward Tomasz Napierala #endif 1179abd498aaSBruce M Simpson 1180abd498aaSBruce M Simpson return (error); 11814a40e3d4SJohn Dyson } 11824a40e3d4SJohn Dyson 11834a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1184df8bae1dSRodney W. Grimes struct munlock_args { 1185651bb817SAlexander Langer const void *addr; 1186df8bae1dSRodney W. Grimes size_t len; 1187df8bae1dSRodney W. Grimes }; 1188d2d3e875SBruce Evans #endif 1189df8bae1dSRodney W. Grimes int 119069cdfcefSEdward Tomasz Napierala sys_munlock(struct thread *td, struct munlock_args *uap) 1191df8bae1dSRodney W. Grimes { 119269cdfcefSEdward Tomasz Napierala 1193496ab053SKonstantin Belousov return (kern_munlock(td, (uintptr_t)uap->addr, uap->len)); 119469cdfcefSEdward Tomasz Napierala } 119569cdfcefSEdward Tomasz Napierala 119669cdfcefSEdward Tomasz Napierala int 1197496ab053SKonstantin Belousov kern_munlock(struct thread *td, uintptr_t addr0, size_t size) 119869cdfcefSEdward Tomasz Napierala { 1199496ab053SKonstantin Belousov vm_offset_t addr, end, last, start; 1200fc2b1679SJeremie Le Hen #ifdef RACCT 1201c92b5069SJeremie Le Hen vm_map_t map; 1202fc2b1679SJeremie Le Hen #endif 1203df8bae1dSRodney W. Grimes int error; 1204df8bae1dSRodney W. Grimes 1205acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 120647934cefSDon Lewis if (error) 120747934cefSDon Lewis return (error); 1208496ab053SKonstantin Belousov addr = addr0; 1209bb734798SDon Lewis last = addr + size; 121016929939SDon Lewis start = trunc_page(addr); 1211bb734798SDon Lewis end = round_page(last); 1212bb734798SDon Lewis if (last < addr || end < addr) 1213df8bae1dSRodney W. Grimes return (EINVAL); 121416929939SDon Lewis error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 121516929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1216afcc55f3SEdward Tomasz Napierala #ifdef RACCT 12174b5c9cf6SEdward Tomasz Napierala if (racct_enable && error == KERN_SUCCESS) { 12181ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 1219c92b5069SJeremie Le Hen map = &td->td_proc->p_vmspace->vm_map; 1220c92b5069SJeremie Le Hen racct_set(td->td_proc, RACCT_MEMLOCK, 1221c92b5069SJeremie Le Hen ptoa(pmap_wired_count(map->pmap))); 12221ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 12231ba5ad42SEdward Tomasz Napierala } 1224afcc55f3SEdward Tomasz Napierala #endif 1225df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1226df8bae1dSRodney W. Grimes } 1227df8bae1dSRodney W. Grimes 1228df8bae1dSRodney W. Grimes /* 1229c8daea13SAlexander Kabaev * vm_mmap_vnode() 1230c8daea13SAlexander Kabaev * 1231c8daea13SAlexander Kabaev * Helper function for vm_mmap. Perform sanity check specific for mmap 1232c8daea13SAlexander Kabaev * operations on vnodes. 1233c8daea13SAlexander Kabaev */ 1234c8daea13SAlexander Kabaev int 1235c8daea13SAlexander Kabaev vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1236c8daea13SAlexander Kabaev vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 123784110e7eSKonstantin Belousov struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 123884110e7eSKonstantin Belousov boolean_t *writecounted) 1239c8daea13SAlexander Kabaev { 1240c8daea13SAlexander Kabaev struct vattr va; 1241c8daea13SAlexander Kabaev vm_object_t obj; 1242bd0e1bebSMark Johnston vm_ooffset_t foff; 12430359a12eSAttilio Rao struct ucred *cred; 124478022527SKonstantin Belousov int error, flags; 124578022527SKonstantin Belousov bool writex; 1246c8daea13SAlexander Kabaev 12470359a12eSAttilio Rao cred = td->td_ucred; 124878022527SKonstantin Belousov writex = (*maxprotp & VM_PROT_WRITE) != 0 && 124978022527SKonstantin Belousov (*flagsp & MAP_SHARED) != 0; 125078022527SKonstantin Belousov if ((error = vget(vp, LK_SHARED, td)) != 0) 1251c8daea13SAlexander Kabaev return (error); 12520df42647SRobert Watson AUDIT_ARG_VNODE1(vp); 125364345f0bSJohn Baldwin foff = *foffp; 1254c8daea13SAlexander Kabaev flags = *flagsp; 12558516dd18SPoul-Henning Kamp obj = vp->v_object; 1256c8daea13SAlexander Kabaev if (vp->v_type == VREG) { 1257c8daea13SAlexander Kabaev /* 1258c8daea13SAlexander Kabaev * Get the proper underlying object 1259c8daea13SAlexander Kabaev */ 12608516dd18SPoul-Henning Kamp if (obj == NULL) { 1261c8daea13SAlexander Kabaev error = EINVAL; 1262c8daea13SAlexander Kabaev goto done; 1263c8daea13SAlexander Kabaev } 1264e5f299ffSKonstantin Belousov if (obj->type == OBJT_VNODE && obj->handle != vp) { 1265c8daea13SAlexander Kabaev vput(vp); 1266c8daea13SAlexander Kabaev vp = (struct vnode *)obj->handle; 126784110e7eSKonstantin Belousov /* 126884110e7eSKonstantin Belousov * Bypass filesystems obey the mpsafety of the 126953f5f8a0SKonstantin Belousov * underlying fs. Tmpfs never bypasses. 127084110e7eSKonstantin Belousov */ 127178022527SKonstantin Belousov error = vget(vp, LK_SHARED, td); 12725050aa86SKonstantin Belousov if (error != 0) 127384110e7eSKonstantin Belousov return (error); 127484110e7eSKonstantin Belousov } 127578022527SKonstantin Belousov if (writex) { 127684110e7eSKonstantin Belousov *writecounted = TRUE; 127784110e7eSKonstantin Belousov vnode_pager_update_writecount(obj, 0, objsize); 127884110e7eSKonstantin Belousov } 1279c8daea13SAlexander Kabaev } else { 1280c8daea13SAlexander Kabaev error = EINVAL; 1281c8daea13SAlexander Kabaev goto done; 1282c8daea13SAlexander Kabaev } 12830359a12eSAttilio Rao if ((error = VOP_GETATTR(vp, &va, cred))) 1284c8daea13SAlexander Kabaev goto done; 1285c92163dcSChristian S.J. Peron #ifdef MAC 12867077c426SJohn Baldwin /* This relies on VM_PROT_* matching PROT_*. */ 12877077c426SJohn Baldwin error = mac_vnode_check_mmap(cred, vp, (int)prot, flags); 1288c92163dcSChristian S.J. Peron if (error != 0) 1289c92163dcSChristian S.J. Peron goto done; 1290c92163dcSChristian S.J. Peron #endif 1291c8daea13SAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 1292c8daea13SAlexander Kabaev if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 12937077c426SJohn Baldwin if (prot & VM_PROT_WRITE) { 1294c8daea13SAlexander Kabaev error = EPERM; 1295c8daea13SAlexander Kabaev goto done; 1296c8daea13SAlexander Kabaev } 1297c8daea13SAlexander Kabaev *maxprotp &= ~VM_PROT_WRITE; 1298c8daea13SAlexander Kabaev } 1299c8daea13SAlexander Kabaev } 1300c8daea13SAlexander Kabaev /* 1301c8daea13SAlexander Kabaev * If it is a regular file without any references 1302c8daea13SAlexander Kabaev * we do not need to sync it. 1303c8daea13SAlexander Kabaev * Adjust object size to be the size of actual file. 1304c8daea13SAlexander Kabaev */ 1305c8daea13SAlexander Kabaev objsize = round_page(va.va_size); 1306c8daea13SAlexander Kabaev if (va.va_nlink == 0) 1307c8daea13SAlexander Kabaev flags |= MAP_NOSYNC; 13083d653db0SAlan Cox if (obj->type == OBJT_VNODE) { 1309e5f299ffSKonstantin Belousov obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, 1310e5f299ffSKonstantin Belousov cred); 1311c8daea13SAlexander Kabaev if (obj == NULL) { 131264345f0bSJohn Baldwin error = ENOMEM; 1313c8daea13SAlexander Kabaev goto done; 1314c8daea13SAlexander Kabaev } 13153d653db0SAlan Cox } else { 13163d653db0SAlan Cox KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP, 13173d653db0SAlan Cox ("wrong object type")); 13183d653db0SAlan Cox VM_OBJECT_WLOCK(obj); 13193d653db0SAlan Cox vm_object_reference_locked(obj); 13203d653db0SAlan Cox #if VM_NRESERVLEVEL > 0 13213d653db0SAlan Cox vm_object_color(obj, 0); 13223d653db0SAlan Cox #endif 13233d653db0SAlan Cox VM_OBJECT_WUNLOCK(obj); 13243d653db0SAlan Cox } 1325c8daea13SAlexander Kabaev *objp = obj; 1326c8daea13SAlexander Kabaev *flagsp = flags; 132764345f0bSJohn Baldwin 13280359a12eSAttilio Rao vfs_mark_atime(vp, cred); 13291e309003SDiomidis Spinellis 1330c8daea13SAlexander Kabaev done: 1331bafa6cfcSKonstantin Belousov if (error != 0 && *writecounted) { 1332bafa6cfcSKonstantin Belousov *writecounted = FALSE; 1333bafa6cfcSKonstantin Belousov vnode_pager_update_writecount(obj, objsize, 0); 1334bafa6cfcSKonstantin Belousov } 1335c8daea13SAlexander Kabaev vput(vp); 1336c8daea13SAlexander Kabaev return (error); 1337c8daea13SAlexander Kabaev } 1338c8daea13SAlexander Kabaev 1339c8daea13SAlexander Kabaev /* 134098df9218SJohn Baldwin * vm_mmap_cdev() 134198df9218SJohn Baldwin * 134298df9218SJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 134398df9218SJohn Baldwin * operations on cdevs. 134498df9218SJohn Baldwin */ 134598df9218SJohn Baldwin int 13467077c426SJohn Baldwin vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot, 13477077c426SJohn Baldwin vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw, 13487077c426SJohn Baldwin vm_ooffset_t *foff, vm_object_t *objp) 134998df9218SJohn Baldwin { 135098df9218SJohn Baldwin vm_object_t obj; 13517077c426SJohn Baldwin int error, flags; 135298df9218SJohn Baldwin 135398df9218SJohn Baldwin flags = *flagsp; 135498df9218SJohn Baldwin 135591a35e78SKonstantin Belousov if (dsw->d_flags & D_MMAP_ANON) { 13567077c426SJohn Baldwin *objp = NULL; 13577077c426SJohn Baldwin *foff = 0; 135898df9218SJohn Baldwin *maxprotp = VM_PROT_ALL; 135998df9218SJohn Baldwin *flagsp |= MAP_ANON; 136098df9218SJohn Baldwin return (0); 136198df9218SJohn Baldwin } 136298df9218SJohn Baldwin /* 136364345f0bSJohn Baldwin * cdevs do not provide private mappings of any kind. 136498df9218SJohn Baldwin */ 136598df9218SJohn Baldwin if ((*maxprotp & VM_PROT_WRITE) == 0 && 13667077c426SJohn Baldwin (prot & VM_PROT_WRITE) != 0) 136798df9218SJohn Baldwin return (EACCES); 13687077c426SJohn Baldwin if (flags & (MAP_PRIVATE|MAP_COPY)) 136998df9218SJohn Baldwin return (EINVAL); 137098df9218SJohn Baldwin /* 137198df9218SJohn Baldwin * Force device mappings to be shared. 137298df9218SJohn Baldwin */ 137398df9218SJohn Baldwin flags |= MAP_SHARED; 137498df9218SJohn Baldwin #ifdef MAC_XXX 13757077c426SJohn Baldwin error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot); 13767077c426SJohn Baldwin if (error != 0) 137798df9218SJohn Baldwin return (error); 137898df9218SJohn Baldwin #endif 137964345f0bSJohn Baldwin /* 138064345f0bSJohn Baldwin * First, try d_mmap_single(). If that is not implemented 138164345f0bSJohn Baldwin * (returns ENODEV), fall back to using the device pager. 138264345f0bSJohn Baldwin * Note that d_mmap_single() must return a reference to the 138364345f0bSJohn Baldwin * object (it needs to bump the reference count of the object 138464345f0bSJohn Baldwin * it returns somehow). 138564345f0bSJohn Baldwin * 138664345f0bSJohn Baldwin * XXX assumes VM_PROT_* == PROT_* 138764345f0bSJohn Baldwin */ 138864345f0bSJohn Baldwin error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 138964345f0bSJohn Baldwin if (error != ENODEV) 139064345f0bSJohn Baldwin return (error); 13913364c323SKonstantin Belousov obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 13923364c323SKonstantin Belousov td->td_ucred); 139398df9218SJohn Baldwin if (obj == NULL) 139498df9218SJohn Baldwin return (EINVAL); 139598df9218SJohn Baldwin *objp = obj; 139698df9218SJohn Baldwin *flagsp = flags; 139798df9218SJohn Baldwin return (0); 139898df9218SJohn Baldwin } 139998df9218SJohn Baldwin 140098df9218SJohn Baldwin /* 1401d2c60af8SMatthew Dillon * vm_mmap() 1402d2c60af8SMatthew Dillon * 14037077c426SJohn Baldwin * Internal version of mmap used by exec, sys5 shared memory, and 14047077c426SJohn Baldwin * various device drivers. Handle is either a vnode pointer, a 14057077c426SJohn Baldwin * character device, or NULL for MAP_ANON. 1406df8bae1dSRodney W. Grimes */ 1407df8bae1dSRodney W. Grimes int 1408b9dcd593SBruce Evans vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1409b9dcd593SBruce Evans vm_prot_t maxprot, int flags, 141098df9218SJohn Baldwin objtype_t handle_type, void *handle, 1411b9dcd593SBruce Evans vm_ooffset_t foff) 1412df8bae1dSRodney W. Grimes { 14137077c426SJohn Baldwin vm_object_t object; 1414b40ce416SJulian Elischer struct thread *td = curthread; 14157077c426SJohn Baldwin int error; 141684110e7eSKonstantin Belousov boolean_t writecounted; 1417df8bae1dSRodney W. Grimes 1418df8bae1dSRodney W. Grimes if (size == 0) 14197077c426SJohn Baldwin return (EINVAL); 1420df8bae1dSRodney W. Grimes 1421749474f2SPeter Wemm size = round_page(size); 1422010ba384SMark Johnston object = NULL; 14237077c426SJohn Baldwin writecounted = FALSE; 14247077c426SJohn Baldwin 14257077c426SJohn Baldwin /* 14267077c426SJohn Baldwin * Lookup/allocate object. 14277077c426SJohn Baldwin */ 14287077c426SJohn Baldwin switch (handle_type) { 14297077c426SJohn Baldwin case OBJT_DEVICE: { 14307077c426SJohn Baldwin struct cdevsw *dsw; 14317077c426SJohn Baldwin struct cdev *cdev; 14327077c426SJohn Baldwin int ref; 14337077c426SJohn Baldwin 14347077c426SJohn Baldwin cdev = handle; 14357077c426SJohn Baldwin dsw = dev_refthread(cdev, &ref); 14367077c426SJohn Baldwin if (dsw == NULL) 14377077c426SJohn Baldwin return (ENXIO); 14387077c426SJohn Baldwin error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev, 14397077c426SJohn Baldwin dsw, &foff, &object); 14407077c426SJohn Baldwin dev_relthread(cdev, ref); 14417077c426SJohn Baldwin break; 14427077c426SJohn Baldwin } 14437077c426SJohn Baldwin case OBJT_VNODE: 14447077c426SJohn Baldwin error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 14457077c426SJohn Baldwin handle, &foff, &object, &writecounted); 14467077c426SJohn Baldwin break; 14477077c426SJohn Baldwin case OBJT_DEFAULT: 14487077c426SJohn Baldwin if (handle == NULL) { 14497077c426SJohn Baldwin error = 0; 14507077c426SJohn Baldwin break; 14517077c426SJohn Baldwin } 14527077c426SJohn Baldwin /* FALLTHROUGH */ 14537077c426SJohn Baldwin default: 14547077c426SJohn Baldwin error = EINVAL; 14557077c426SJohn Baldwin break; 14567077c426SJohn Baldwin } 14577077c426SJohn Baldwin if (error) 14587077c426SJohn Baldwin return (error); 14597077c426SJohn Baldwin 14607077c426SJohn Baldwin error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 14617077c426SJohn Baldwin foff, writecounted, td); 14627077c426SJohn Baldwin if (error != 0 && object != NULL) { 14637077c426SJohn Baldwin /* 14647077c426SJohn Baldwin * If this mapping was accounted for in the vnode's 14657077c426SJohn Baldwin * writecount, then undo that now. 14667077c426SJohn Baldwin */ 14677077c426SJohn Baldwin if (writecounted) 14687077c426SJohn Baldwin vnode_pager_release_writecount(object, 0, size); 14697077c426SJohn Baldwin vm_object_deallocate(object); 14707077c426SJohn Baldwin } 14717077c426SJohn Baldwin return (error); 14727077c426SJohn Baldwin } 14737077c426SJohn Baldwin 14747077c426SJohn Baldwin /* 14757077c426SJohn Baldwin * Internal version of mmap that maps a specific VM object into an 14767077c426SJohn Baldwin * map. Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap. 14777077c426SJohn Baldwin */ 14787077c426SJohn Baldwin int 14797077c426SJohn Baldwin vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 14807077c426SJohn Baldwin vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff, 14817077c426SJohn Baldwin boolean_t writecounted, struct thread *td) 14827077c426SJohn Baldwin { 14836a97a3f7SKonstantin Belousov boolean_t curmap, fitit; 14846a97a3f7SKonstantin Belousov vm_offset_t max_addr; 14857077c426SJohn Baldwin int docow, error, findspace, rv; 1486df8bae1dSRodney W. Grimes 14876a97a3f7SKonstantin Belousov curmap = map == &td->td_proc->p_vmspace->vm_map; 14886a97a3f7SKonstantin Belousov if (curmap) { 14892554f86aSMateusz Guzik RACCT_PROC_LOCK(td->td_proc); 14902554f86aSMateusz Guzik if (map->size + size > lim_cur(td, RLIMIT_VMEM)) { 14912554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 1492070f64feSMatthew Dillon return (ENOMEM); 1493070f64feSMatthew Dillon } 1494a6492969SAlan Cox if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 14952554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 14961ba5ad42SEdward Tomasz Napierala return (ENOMEM); 14971ba5ad42SEdward Tomasz Napierala } 14987e19eda4SAndrey Zonov if (!old_mlock && map->flags & MAP_WIREFUTURE) { 14993ac7d297SAndrey Zonov if (ptoa(pmap_wired_count(map->pmap)) + size > 15002554f86aSMateusz Guzik lim_cur(td, RLIMIT_MEMLOCK)) { 15017e19eda4SAndrey Zonov racct_set_force(td->td_proc, RACCT_VMEM, 15027e19eda4SAndrey Zonov map->size); 15032554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 15047e19eda4SAndrey Zonov return (ENOMEM); 15057e19eda4SAndrey Zonov } 15067e19eda4SAndrey Zonov error = racct_set(td->td_proc, RACCT_MEMLOCK, 15073ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap)) + size); 15087e19eda4SAndrey Zonov if (error != 0) { 15097e19eda4SAndrey Zonov racct_set_force(td->td_proc, RACCT_VMEM, 15107e19eda4SAndrey Zonov map->size); 15112554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 15127e19eda4SAndrey Zonov return (error); 15137e19eda4SAndrey Zonov } 15147e19eda4SAndrey Zonov } 15152554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 1516a6492969SAlan Cox } 1517070f64feSMatthew Dillon 1518df8bae1dSRodney W. Grimes /* 1519bc9ad247SDavid Greenman * We currently can only deal with page aligned file offsets. 15207077c426SJohn Baldwin * The mmap() system call already enforces this by subtracting 15217077c426SJohn Baldwin * the page offset from the file offset, but checking here 15227077c426SJohn Baldwin * catches errors in device drivers (e.g. d_single_mmap() 15237077c426SJohn Baldwin * callbacks) and other internal mapping requests (such as in 15247077c426SJohn Baldwin * exec). 1525bc9ad247SDavid Greenman */ 1526bc9ad247SDavid Greenman if (foff & PAGE_MASK) 1527bc9ad247SDavid Greenman return (EINVAL); 1528bc9ad247SDavid Greenman 152906cb7259SDavid Greenman if ((flags & MAP_FIXED) == 0) { 153006cb7259SDavid Greenman fitit = TRUE; 153106cb7259SDavid Greenman *addr = round_page(*addr); 153206cb7259SDavid Greenman } else { 153306cb7259SDavid Greenman if (*addr != trunc_page(*addr)) 153406cb7259SDavid Greenman return (EINVAL); 153506cb7259SDavid Greenman fitit = FALSE; 153606cb7259SDavid Greenman } 153784110e7eSKonstantin Belousov 15385f55e841SDavid Greenman if (flags & MAP_ANON) { 15397077c426SJohn Baldwin if (object != NULL || foff != 0) 15407077c426SJohn Baldwin return (EINVAL); 1541c8daea13SAlexander Kabaev docow = 0; 154274ffb9afSAlan Cox } else if (flags & MAP_PREFAULT_READ) 154374ffb9afSAlan Cox docow = MAP_PREFAULT; 154474ffb9afSAlan Cox else 15454738fa09SAlan Cox docow = MAP_PREFAULT_PARTIAL; 1546df8bae1dSRodney W. Grimes 15474f79d873SMatthew Dillon if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 15484738fa09SAlan Cox docow |= MAP_COPY_ON_WRITE; 15494f79d873SMatthew Dillon if (flags & MAP_NOSYNC) 15504f79d873SMatthew Dillon docow |= MAP_DISABLE_SYNCER; 15519730a5daSPaul Saab if (flags & MAP_NOCORE) 15529730a5daSPaul Saab docow |= MAP_DISABLE_COREDUMP; 15538211bd45SKonstantin Belousov /* Shared memory is also shared with children. */ 15548211bd45SKonstantin Belousov if (flags & MAP_SHARED) 15558211bd45SKonstantin Belousov docow |= MAP_INHERIT_SHARE; 155684110e7eSKonstantin Belousov if (writecounted) 155784110e7eSKonstantin Belousov docow |= MAP_VN_WRITECOUNT; 15584648ba0aSKonstantin Belousov if (flags & MAP_STACK) { 15594648ba0aSKonstantin Belousov if (object != NULL) 15604648ba0aSKonstantin Belousov return (EINVAL); 15614648ba0aSKonstantin Belousov docow |= MAP_STACK_GROWS_DOWN; 15624648ba0aSKonstantin Belousov } 156311c42bccSKonstantin Belousov if ((flags & MAP_EXCL) != 0) 156411c42bccSKonstantin Belousov docow |= MAP_CHECK_EXCL; 156519bd0d9cSKonstantin Belousov if ((flags & MAP_GUARD) != 0) 156619bd0d9cSKonstantin Belousov docow |= MAP_CREATE_GUARD; 15675850152dSJohn Dyson 15684648ba0aSKonstantin Belousov if (fitit) { 15695aa60b6fSJohn Baldwin if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER) 15705aa60b6fSJohn Baldwin findspace = VMFS_SUPER_SPACE; 15715aa60b6fSJohn Baldwin else if ((flags & MAP_ALIGNMENT_MASK) != 0) 15725aa60b6fSJohn Baldwin findspace = VMFS_ALIGNED_SPACE(flags >> 15735aa60b6fSJohn Baldwin MAP_ALIGNMENT_SHIFT); 15742267af78SJulian Elischer else 15755aa60b6fSJohn Baldwin findspace = VMFS_OPTIMAL_SPACE; 15766a97a3f7SKonstantin Belousov max_addr = 0; 1577edb572a3SJohn Baldwin #ifdef MAP_32BIT 15786a97a3f7SKonstantin Belousov if ((flags & MAP_32BIT) != 0) 15796a97a3f7SKonstantin Belousov max_addr = MAP_32BIT_MAX_ADDR; 1580edb572a3SJohn Baldwin #endif 15816a97a3f7SKonstantin Belousov if (curmap) { 15826a97a3f7SKonstantin Belousov rv = vm_map_find_min(map, object, foff, addr, size, 15836a97a3f7SKonstantin Belousov round_page((vm_offset_t)td->td_proc->p_vmspace-> 15846a97a3f7SKonstantin Belousov vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr, 15856a97a3f7SKonstantin Belousov findspace, prot, maxprot, docow); 15866a97a3f7SKonstantin Belousov } else { 15876a97a3f7SKonstantin Belousov rv = vm_map_find(map, object, foff, addr, size, 15886a97a3f7SKonstantin Belousov max_addr, findspace, prot, maxprot, docow); 15896a97a3f7SKonstantin Belousov } 15904648ba0aSKonstantin Belousov } else { 1591b8ca4ef2SAlan Cox rv = vm_map_fixed(map, object, foff, *addr, size, 1592bd7e5f99SJohn Dyson prot, maxprot, docow); 15934648ba0aSKonstantin Belousov } 1594bd7e5f99SJohn Dyson 1595f9230ad6SAlan Cox if (rv == KERN_SUCCESS) { 15967fb0c17eSDavid Greenman /* 1597f9230ad6SAlan Cox * If the process has requested that all future mappings 1598f9230ad6SAlan Cox * be wired, then heed this. 1599f9230ad6SAlan Cox */ 160054a3a114SMark Johnston if ((map->flags & MAP_WIREFUTURE) != 0) { 160154a3a114SMark Johnston vm_map_lock(map); 160254a3a114SMark Johnston if ((map->flags & MAP_WIREFUTURE) != 0) 16038cd6a80dSMark Johnston (void)vm_map_wire_locked(map, *addr, 160454a3a114SMark Johnston *addr + size, VM_MAP_WIRE_USER | 160554a3a114SMark Johnston ((flags & MAP_STACK) ? VM_MAP_WIRE_HOLESOK : 160654a3a114SMark Johnston VM_MAP_WIRE_NOHOLES)); 160754a3a114SMark Johnston vm_map_unlock(map); 16081472f4f4SKonstantin Belousov } 1609df8bae1dSRodney W. Grimes } 16102e32165cSKonstantin Belousov return (vm_mmap_to_errno(rv)); 16112e32165cSKonstantin Belousov } 16122e32165cSKonstantin Belousov 1613f9230ad6SAlan Cox /* 1614f9230ad6SAlan Cox * Translate a Mach VM return code to zero on success or the appropriate errno 1615f9230ad6SAlan Cox * on failure. 1616f9230ad6SAlan Cox */ 16172e32165cSKonstantin Belousov int 16182e32165cSKonstantin Belousov vm_mmap_to_errno(int rv) 16192e32165cSKonstantin Belousov { 16202e32165cSKonstantin Belousov 1621df8bae1dSRodney W. Grimes switch (rv) { 1622df8bae1dSRodney W. Grimes case KERN_SUCCESS: 1623df8bae1dSRodney W. Grimes return (0); 1624df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 1625df8bae1dSRodney W. Grimes case KERN_NO_SPACE: 1626df8bae1dSRodney W. Grimes return (ENOMEM); 1627df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 1628df8bae1dSRodney W. Grimes return (EACCES); 1629df8bae1dSRodney W. Grimes default: 1630df8bae1dSRodney W. Grimes return (EINVAL); 1631df8bae1dSRodney W. Grimes } 1632df8bae1dSRodney W. Grimes } 1633