160727d8bSWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1988 University of Utah. 5df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 6df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 7df8bae1dSRodney W. Grimes * 8df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 9df8bae1dSRodney W. Grimes * the Systems Programming Group of the University of Utah Computer 10df8bae1dSRodney W. Grimes * Science Department. 11df8bae1dSRodney W. Grimes * 12df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 13df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 14df8bae1dSRodney W. Grimes * are met: 15df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 17df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 18df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 19df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 20fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 21df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 22df8bae1dSRodney W. Grimes * without specific prior written permission. 23df8bae1dSRodney W. Grimes * 24df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34df8bae1dSRodney W. Grimes * SUCH DAMAGE. 35df8bae1dSRodney W. Grimes * 36df8bae1dSRodney W. Grimes * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 37df8bae1dSRodney W. Grimes * 38df8bae1dSRodney W. Grimes * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 39df8bae1dSRodney W. Grimes */ 40df8bae1dSRodney W. Grimes 41df8bae1dSRodney W. Grimes /* 42df8bae1dSRodney W. Grimes * Mapped file (mmap) interface to VM 43df8bae1dSRodney W. Grimes */ 44df8bae1dSRodney W. Grimes 45874651b1SDavid E. O'Brien #include <sys/cdefs.h> 46874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 47874651b1SDavid E. O'Brien 4849874f6eSJoseph Koshy #include "opt_hwpmc_hooks.h" 493d653db0SAlan Cox #include "opt_vm.h" 50e9822d92SJoerg Wunsch 51df8bae1dSRodney W. Grimes #include <sys/param.h> 52df8bae1dSRodney W. Grimes #include <sys/systm.h> 534a144410SRobert Watson #include <sys/capsicum.h> 54a9d2f8d8SRobert Watson #include <sys/kernel.h> 55fb919e4dSMark Murray #include <sys/lock.h> 5623955314SAlfred Perlstein #include <sys/mutex.h> 57d2d3e875SBruce Evans #include <sys/sysproto.h> 585dc7e31aSKonstantin Belousov #include <sys/elf.h> 59df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 60acd3428bSRobert Watson #include <sys/priv.h> 61df8bae1dSRodney W. Grimes #include <sys/proc.h> 6255648840SJohn Baldwin #include <sys/procctl.h> 631ba5ad42SEdward Tomasz Napierala #include <sys/racct.h> 64070f64feSMatthew Dillon #include <sys/resource.h> 65070f64feSMatthew Dillon #include <sys/resourcevar.h> 6689f6b863SAttilio Rao #include <sys/rwlock.h> 677e19eda4SAndrey Zonov #include <sys/sysctl.h> 68df8bae1dSRodney W. Grimes #include <sys/vnode.h> 693ac4d1efSBruce Evans #include <sys/fcntl.h> 70df8bae1dSRodney W. Grimes #include <sys/file.h> 71df8bae1dSRodney W. Grimes #include <sys/mman.h> 72b483c7f6SGuido van Rooij #include <sys/mount.h> 73df8bae1dSRodney W. Grimes #include <sys/conf.h> 744183b6b6SPeter Wemm #include <sys/stat.h> 7555648840SJohn Baldwin #include <sys/syscallsubr.h> 76497a8238SKonstantin Belousov #include <sys/sysent.h> 77efeaf95aSDavid Greenman #include <sys/vmmeter.h> 78a7f67facSKonstantin Belousov #if defined(__amd64__) || defined(__i386__) /* for i386_read_exec */ 79a7f67facSKonstantin Belousov #include <machine/md_var.h> 80a7f67facSKonstantin Belousov #endif 81df8bae1dSRodney W. Grimes 8251d1f690SRobert Watson #include <security/audit/audit.h> 83aed55708SRobert Watson #include <security/mac/mac_framework.h> 84aed55708SRobert Watson 85df8bae1dSRodney W. Grimes #include <vm/vm.h> 86efeaf95aSDavid Greenman #include <vm/vm_param.h> 87efeaf95aSDavid Greenman #include <vm/pmap.h> 88efeaf95aSDavid Greenman #include <vm/vm_map.h> 89efeaf95aSDavid Greenman #include <vm/vm_object.h> 901c7c3c6aSMatthew Dillon #include <vm/vm_page.h> 91df8bae1dSRodney W. Grimes #include <vm/vm_pager.h> 92b5e8ce9fSBruce Evans #include <vm/vm_pageout.h> 93efeaf95aSDavid Greenman #include <vm/vm_extern.h> 94867a482dSJohn Dyson #include <vm/vm_page.h> 9584110e7eSKonstantin Belousov #include <vm/vnode_pager.h> 96df8bae1dSRodney W. Grimes 9749874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 9849874f6eSJoseph Koshy #include <sys/pmckern.h> 9949874f6eSJoseph Koshy #endif 10049874f6eSJoseph Koshy 1017e19eda4SAndrey Zonov int old_mlock = 0; 102af3b2549SHans Petter Selasky SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0, 1037e19eda4SAndrey Zonov "Do not apply RLIMIT_MEMLOCK on mlockall"); 1043fbc2e00SKonstantin Belousov static int mincore_mapped = 1; 1053fbc2e00SKonstantin Belousov SYSCTL_INT(_vm, OID_AUTO, mincore_mapped, CTLFLAG_RWTUN, &mincore_mapped, 0, 1063fbc2e00SKonstantin Belousov "mincore reports mappings, not residency"); 10774a1b66cSBrooks Davis static int imply_prot_max = 0; 10874a1b66cSBrooks Davis SYSCTL_INT(_vm, OID_AUTO, imply_prot_max, CTLFLAG_RWTUN, &imply_prot_max, 0, 1094d13f784SEd Maste "Imply maximum page protections in mmap() when none are specified"); 1107e19eda4SAndrey Zonov 111edb572a3SJohn Baldwin #ifdef MAP_32BIT 112edb572a3SJohn Baldwin #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) 113d2d3e875SBruce Evans #endif 1140d94caffSDavid Greenman 115847ab36bSMark Johnston _Static_assert(MAXPAGESIZES <= 4, "MINCORE_SUPER too narrow"); 116847ab36bSMark Johnston 117edb572a3SJohn Baldwin #ifndef _SYS_SYSPROTO_H_ 118edb572a3SJohn Baldwin struct sbrk_args { 119edb572a3SJohn Baldwin int incr; 120edb572a3SJohn Baldwin }; 121edb572a3SJohn Baldwin #endif 122edb572a3SJohn Baldwin 123df8bae1dSRodney W. Grimes int 12404e89ffbSKonstantin Belousov sys_sbrk(struct thread *td, struct sbrk_args *uap) 125df8bae1dSRodney W. Grimes { 126df8bae1dSRodney W. Grimes /* Not yet implemented */ 127df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 128df8bae1dSRodney W. Grimes } 129df8bae1dSRodney W. Grimes 130d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 131df8bae1dSRodney W. Grimes struct sstk_args { 132df8bae1dSRodney W. Grimes int incr; 133df8bae1dSRodney W. Grimes }; 134d2d3e875SBruce Evans #endif 1350d94caffSDavid Greenman 136df8bae1dSRodney W. Grimes int 13704e89ffbSKonstantin Belousov sys_sstk(struct thread *td, struct sstk_args *uap) 138df8bae1dSRodney W. Grimes { 139df8bae1dSRodney W. Grimes /* Not yet implemented */ 140df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 141df8bae1dSRodney W. Grimes } 142df8bae1dSRodney W. Grimes 1431930e303SPoul-Henning Kamp #if defined(COMPAT_43) 144df8bae1dSRodney W. Grimes int 145d48719bdSBrooks Davis ogetpagesize(struct thread *td, struct ogetpagesize_args *uap) 146df8bae1dSRodney W. Grimes { 14704e89ffbSKonstantin Belousov 148b40ce416SJulian Elischer td->td_retval[0] = PAGE_SIZE; 149df8bae1dSRodney W. Grimes return (0); 150df8bae1dSRodney W. Grimes } 1511930e303SPoul-Henning Kamp #endif /* COMPAT_43 */ 152df8bae1dSRodney W. Grimes 15354f42e4bSPeter Wemm /* 15454f42e4bSPeter Wemm * Memory Map (mmap) system call. Note that the file offset 15554f42e4bSPeter Wemm * and address are allowed to be NOT page aligned, though if 15654f42e4bSPeter Wemm * the MAP_FIXED flag it set, both must have the same remainder 15754f42e4bSPeter Wemm * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 15854f42e4bSPeter Wemm * page-aligned, the actual mapping starts at trunc_page(addr) 15954f42e4bSPeter Wemm * and the return value is adjusted up by the page offset. 160b4309055SMatthew Dillon * 161b4309055SMatthew Dillon * Generally speaking, only character devices which are themselves 162b4309055SMatthew Dillon * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 163b4309055SMatthew Dillon * there would be no cache coherency between a descriptor and a VM mapping 164b4309055SMatthew Dillon * both to the same character device. 16554f42e4bSPeter Wemm */ 166d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 167df8bae1dSRodney W. Grimes struct mmap_args { 168651bb817SAlexander Langer void *addr; 169df8bae1dSRodney W. Grimes size_t len; 170df8bae1dSRodney W. Grimes int prot; 171df8bae1dSRodney W. Grimes int flags; 172df8bae1dSRodney W. Grimes int fd; 173df8bae1dSRodney W. Grimes long pad; 174df8bae1dSRodney W. Grimes off_t pos; 175df8bae1dSRodney W. Grimes }; 176d2d3e875SBruce Evans #endif 177df8bae1dSRodney W. Grimes 178df8bae1dSRodney W. Grimes int 17969cdfcefSEdward Tomasz Napierala sys_mmap(struct thread *td, struct mmap_args *uap) 18069cdfcefSEdward Tomasz Napierala { 18169cdfcefSEdward Tomasz Napierala 182496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot, 183496ab053SKonstantin Belousov uap->flags, uap->fd, uap->pos)); 18469cdfcefSEdward Tomasz Napierala } 18569cdfcefSEdward Tomasz Napierala 18669cdfcefSEdward Tomasz Napierala int 1875dc7e31aSKonstantin Belousov kern_mmap_maxprot(struct proc *p, int prot) 1885dc7e31aSKonstantin Belousov { 1895dc7e31aSKonstantin Belousov 1905dc7e31aSKonstantin Belousov if ((p->p_flag2 & P2_PROTMAX_DISABLE) != 0 || 1915dc7e31aSKonstantin Belousov (p->p_fctl0 & NT_FREEBSD_FCTL_PROTMAX_DISABLE) != 0) 1925dc7e31aSKonstantin Belousov return (_PROT_ALL); 1935dc7e31aSKonstantin Belousov if (((p->p_flag2 & P2_PROTMAX_ENABLE) != 0 || imply_prot_max) && 1945dc7e31aSKonstantin Belousov prot != PROT_NONE) 1955dc7e31aSKonstantin Belousov return (prot); 1965dc7e31aSKonstantin Belousov return (_PROT_ALL); 1975dc7e31aSKonstantin Belousov } 1985dc7e31aSKonstantin Belousov 1995dc7e31aSKonstantin Belousov int 20077555b84SDoug Moore kern_mmap(struct thread *td, uintptr_t addr0, size_t len, int prot, int flags, 201496ab053SKonstantin Belousov int fd, off_t pos) 202df8bae1dSRodney W. Grimes { 203d718de81SBrooks Davis struct mmap_req mr = { 204d718de81SBrooks Davis .mr_hint = addr0, 205d718de81SBrooks Davis .mr_len = len, 206d718de81SBrooks Davis .mr_prot = prot, 207d718de81SBrooks Davis .mr_flags = flags, 208d718de81SBrooks Davis .mr_fd = fd, 209d718de81SBrooks Davis .mr_pos = pos 210d718de81SBrooks Davis }; 21118348a23SKyle Evans 212d718de81SBrooks Davis return (kern_mmap_req(td, &mr)); 21318348a23SKyle Evans } 21418348a23SKyle Evans 21518348a23SKyle Evans int 216d718de81SBrooks Davis kern_mmap_req(struct thread *td, const struct mmap_req *mrp) 21718348a23SKyle Evans { 218496ab053SKonstantin Belousov struct vmspace *vms; 219c8daea13SAlexander Kabaev struct file *fp; 22037306951SKonstantin Belousov struct proc *p; 221d718de81SBrooks Davis off_t pos; 222d301b358SKonstantin Belousov vm_offset_t addr, orig_addr; 223d718de81SBrooks Davis vm_size_t len, pageoff, size; 2247077c426SJohn Baldwin vm_prot_t cap_maxprot; 225d718de81SBrooks Davis int align, error, fd, flags, max_prot, prot; 226a9d2f8d8SRobert Watson cap_rights_t rights; 227d718de81SBrooks Davis mmap_check_fp_fn check_fp_fn; 228d718de81SBrooks Davis 229d301b358SKonstantin Belousov orig_addr = addr = mrp->mr_hint; 230d718de81SBrooks Davis len = mrp->mr_len; 231d718de81SBrooks Davis prot = mrp->mr_prot; 232d718de81SBrooks Davis flags = mrp->mr_flags; 233d718de81SBrooks Davis fd = mrp->mr_fd; 234d718de81SBrooks Davis pos = mrp->mr_pos; 235d718de81SBrooks Davis check_fp_fn = mrp->mr_check_fp_fn; 236df8bae1dSRodney W. Grimes 23774a1b66cSBrooks Davis if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) 23874a1b66cSBrooks Davis return (EINVAL); 23974a1b66cSBrooks Davis max_prot = PROT_MAX_EXTRACT(prot); 24074a1b66cSBrooks Davis prot = PROT_EXTRACT(prot); 24174a1b66cSBrooks Davis if (max_prot != 0 && (max_prot & prot) != prot) 242acb8858fSEd Maste return (ENOTSUP); 24337306951SKonstantin Belousov 24437306951SKonstantin Belousov p = td->td_proc; 24537306951SKonstantin Belousov 24674a1b66cSBrooks Davis /* 24774a1b66cSBrooks Davis * Always honor PROT_MAX if set. If not, default to all 24874a1b66cSBrooks Davis * permissions unless we're implying maximum permissions. 24974a1b66cSBrooks Davis */ 25074a1b66cSBrooks Davis if (max_prot == 0) 2515dc7e31aSKonstantin Belousov max_prot = kern_mmap_maxprot(p, prot); 25274a1b66cSBrooks Davis 25337306951SKonstantin Belousov vms = p->p_vmspace; 254426da3bcSAlfred Perlstein fp = NULL; 25569cdfcefSEdward Tomasz Napierala AUDIT_ARG_FD(fd); 25627bfa958SSimon L. B. Nielsen 2577707ccabSKonstantin Belousov /* 2585817298fSJohn Baldwin * Ignore old flags that used to be defined but did not do anything. 2595817298fSJohn Baldwin */ 2605817298fSJohn Baldwin flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040); 2615817298fSJohn Baldwin 2625817298fSJohn Baldwin /* 2637707ccabSKonstantin Belousov * Enforce the constraints. 2647707ccabSKonstantin Belousov * Mapping of length 0 is only allowed for old binaries. 2657707ccabSKonstantin Belousov * Anonymous mapping shall specify -1 as filedescriptor and 2667707ccabSKonstantin Belousov * zero position for new code. Be nice to ancient a.out 2677707ccabSKonstantin Belousov * binaries and correct pos for anonymous mapping, since old 2687707ccabSKonstantin Belousov * ld.so sometimes issues anonymous map requests with non-zero 2697707ccabSKonstantin Belousov * pos. 2707707ccabSKonstantin Belousov */ 2717707ccabSKonstantin Belousov if (!SV_CURPROC_FLAG(SV_AOUT)) { 27237306951SKonstantin Belousov if ((len == 0 && p->p_osrel >= P_OSREL_MAP_ANON) || 27369cdfcefSEdward Tomasz Napierala ((flags & MAP_ANON) != 0 && (fd != -1 || pos != 0))) 274df8bae1dSRodney W. Grimes return (EINVAL); 2757707ccabSKonstantin Belousov } else { 2767707ccabSKonstantin Belousov if ((flags & MAP_ANON) != 0) 2777707ccabSKonstantin Belousov pos = 0; 2787707ccabSKonstantin Belousov } 2799154ee6aSPeter Wemm 2802267af78SJulian Elischer if (flags & MAP_STACK) { 28169cdfcefSEdward Tomasz Napierala if ((fd != -1) || 2822267af78SJulian Elischer ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 2832267af78SJulian Elischer return (EINVAL); 2842267af78SJulian Elischer flags |= MAP_ANON; 2852267af78SJulian Elischer pos = 0; 2862907af2aSJulian Elischer } 2875817298fSJohn Baldwin if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE | 2885817298fSJohn Baldwin MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE | 28919bd0d9cSKonstantin Belousov MAP_PREFAULT_READ | MAP_GUARD | 2905fd3f8b3SJohn Baldwin #ifdef MAP_32BIT 2915fd3f8b3SJohn Baldwin MAP_32BIT | 2925fd3f8b3SJohn Baldwin #endif 2935fd3f8b3SJohn Baldwin MAP_ALIGNMENT_MASK)) != 0) 2945fd3f8b3SJohn Baldwin return (EINVAL); 29511c42bccSKonstantin Belousov if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL) 29611c42bccSKonstantin Belousov return (EINVAL); 29710204535SKonstantin Belousov if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED | MAP_PRIVATE)) 2985fd3f8b3SJohn Baldwin return (EINVAL); 2995fd3f8b3SJohn Baldwin if (prot != PROT_NONE && 3005fd3f8b3SJohn Baldwin (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) 3015fd3f8b3SJohn Baldwin return (EINVAL); 30219bd0d9cSKonstantin Belousov if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 || 30360221a57SAlan Cox pos != 0 || (flags & ~(MAP_FIXED | MAP_GUARD | MAP_EXCL | 304633d3b1cSKonstantin Belousov #ifdef MAP_32BIT 305633d3b1cSKonstantin Belousov MAP_32BIT | 306633d3b1cSKonstantin Belousov #endif 307633d3b1cSKonstantin Belousov MAP_ALIGNMENT_MASK)) != 0)) 30819bd0d9cSKonstantin Belousov return (EINVAL); 3092907af2aSJulian Elischer 3109154ee6aSPeter Wemm /* 31154f42e4bSPeter Wemm * Align the file position to a page boundary, 31254f42e4bSPeter Wemm * and save its page offset component. 3139154ee6aSPeter Wemm */ 31454f42e4bSPeter Wemm pageoff = (pos & PAGE_MASK); 31554f42e4bSPeter Wemm pos -= pageoff; 31654f42e4bSPeter Wemm 31777555b84SDoug Moore /* Compute size from len by rounding (on both ends). */ 31877555b84SDoug Moore size = len + pageoff; /* low end... */ 31997220a27SDoug Moore size = round_page(size); /* hi end */ 32077555b84SDoug Moore /* Check for rounding up to zero. */ 321f8c8b2e8SDoug Moore if (len > size) 32277555b84SDoug Moore return (ENOMEM); 3239154ee6aSPeter Wemm 3245aa60b6fSJohn Baldwin /* Ensure alignment is at least a page and fits in a pointer. */ 3255aa60b6fSJohn Baldwin align = flags & MAP_ALIGNMENT_MASK; 3265aa60b6fSJohn Baldwin if (align != 0 && align != MAP_ALIGNED_SUPER && 3275aa60b6fSJohn Baldwin (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY || 3285aa60b6fSJohn Baldwin align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT)) 3295aa60b6fSJohn Baldwin return (EINVAL); 3305aa60b6fSJohn Baldwin 331df8bae1dSRodney W. Grimes /* 3320d94caffSDavid Greenman * Check for illegal addresses. Watch out for address wrap... Note 3330d94caffSDavid Greenman * that VM_*_ADDRESS are not constants due to casts (argh). 334df8bae1dSRodney W. Grimes */ 335df8bae1dSRodney W. Grimes if (flags & MAP_FIXED) { 33654f42e4bSPeter Wemm /* 33754f42e4bSPeter Wemm * The specified address must have the same remainder 33854f42e4bSPeter Wemm * as the file offset taken modulo PAGE_SIZE, so it 33954f42e4bSPeter Wemm * should be aligned after adjustment by pageoff. 34054f42e4bSPeter Wemm */ 34154f42e4bSPeter Wemm addr -= pageoff; 34254f42e4bSPeter Wemm if (addr & PAGE_MASK) 34354f42e4bSPeter Wemm return (EINVAL); 34427bfa958SSimon L. B. Nielsen 34554f42e4bSPeter Wemm /* Address range must be all in user VM space. */ 3460f1e6ec5SMark Johnston if (!vm_map_range_valid(&vms->vm_map, addr, addr + size)) 347df8bae1dSRodney W. Grimes return (EINVAL); 348edb572a3SJohn Baldwin #ifdef MAP_32BIT 349edb572a3SJohn Baldwin if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR) 350edb572a3SJohn Baldwin return (EINVAL); 351edb572a3SJohn Baldwin } else if (flags & MAP_32BIT) { 352edb572a3SJohn Baldwin /* 353edb572a3SJohn Baldwin * For MAP_32BIT, override the hint if it is too high and 354edb572a3SJohn Baldwin * do not bother moving the mapping past the heap (since 355edb572a3SJohn Baldwin * the heap is usually above 2GB). 356edb572a3SJohn Baldwin */ 357edb572a3SJohn Baldwin if (addr + size > MAP_32BIT_MAX_ADDR) 358edb572a3SJohn Baldwin addr = 0; 359edb572a3SJohn Baldwin #endif 36091d5354aSJohn Baldwin } else { 361df8bae1dSRodney W. Grimes /* 36254f42e4bSPeter Wemm * XXX for non-fixed mappings where no hint is provided or 36354f42e4bSPeter Wemm * the hint would fall in the potential heap space, 36454f42e4bSPeter Wemm * place it after the end of the largest possible heap. 365df8bae1dSRodney W. Grimes * 36654f42e4bSPeter Wemm * There should really be a pmap call to determine a reasonable 36754f42e4bSPeter Wemm * location. 368df8bae1dSRodney W. Grimes */ 36991d5354aSJohn Baldwin if (addr == 0 || 3701f6889a1SMatthew Dillon (addr >= round_page((vm_offset_t)vms->vm_taddr) && 371c460ac3aSPeter Wemm addr < round_page((vm_offset_t)vms->vm_daddr + 372cd336badSMateusz Guzik lim_max(td, RLIMIT_DATA)))) 373c460ac3aSPeter Wemm addr = round_page((vm_offset_t)vms->vm_daddr + 374cd336badSMateusz Guzik lim_max(td, RLIMIT_DATA)); 37591d5354aSJohn Baldwin } 37677555b84SDoug Moore if (len == 0) { 3777077c426SJohn Baldwin /* 3787077c426SJohn Baldwin * Return success without mapping anything for old 3797077c426SJohn Baldwin * binaries that request a page-aligned mapping of 3807077c426SJohn Baldwin * length 0. For modern binaries, this function 3817077c426SJohn Baldwin * returns an error earlier. 3827077c426SJohn Baldwin */ 3837077c426SJohn Baldwin error = 0; 38419bd0d9cSKonstantin Belousov } else if ((flags & MAP_GUARD) != 0) { 38519bd0d9cSKonstantin Belousov error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE, 38619bd0d9cSKonstantin Belousov VM_PROT_NONE, flags, NULL, pos, FALSE, td); 38719bd0d9cSKonstantin Belousov } else if ((flags & MAP_ANON) != 0) { 388df8bae1dSRodney W. Grimes /* 389df8bae1dSRodney W. Grimes * Mapping blank space is trivial. 3907077c426SJohn Baldwin * 3917077c426SJohn Baldwin * This relies on VM_PROT_* matching PROT_*. 392df8bae1dSRodney W. Grimes */ 3937077c426SJohn Baldwin error = vm_mmap_object(&vms->vm_map, &addr, size, prot, 39474a1b66cSBrooks Davis max_prot, flags, NULL, pos, FALSE, td); 39530d4dd7eSAlexander Kabaev } else { 396df8bae1dSRodney W. Grimes /* 397a9d2f8d8SRobert Watson * Mapping file, get fp for validation and don't let the 398a9d2f8d8SRobert Watson * descriptor disappear on us if we block. Check capability 399a9d2f8d8SRobert Watson * rights, but also return the maximum rights to be combined 400a9d2f8d8SRobert Watson * with maxprot later. 401df8bae1dSRodney W. Grimes */ 4023379d2f9SMateusz Guzik cap_rights_init_one(&rights, CAP_MMAP); 403a9d2f8d8SRobert Watson if (prot & PROT_READ) 4043379d2f9SMateusz Guzik cap_rights_set_one(&rights, CAP_MMAP_R); 405a9d2f8d8SRobert Watson if ((flags & MAP_SHARED) != 0) { 406a9d2f8d8SRobert Watson if (prot & PROT_WRITE) 4073379d2f9SMateusz Guzik cap_rights_set_one(&rights, CAP_MMAP_W); 408a9d2f8d8SRobert Watson } 409a9d2f8d8SRobert Watson if (prot & PROT_EXEC) 4103379d2f9SMateusz Guzik cap_rights_set_one(&rights, CAP_MMAP_X); 41169cdfcefSEdward Tomasz Napierala error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp); 4127008be5bSPawel Jakub Dawidek if (error != 0) 413426da3bcSAlfred Perlstein goto done; 41410204535SKonstantin Belousov if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 && 41537306951SKonstantin Belousov p->p_osrel >= P_OSREL_MAP_FSTRICT) { 41610204535SKonstantin Belousov error = EINVAL; 41710204535SKonstantin Belousov goto done; 41810204535SKonstantin Belousov } 41918348a23SKyle Evans if (check_fp_fn != NULL) { 42018348a23SKyle Evans error = check_fp_fn(fp, prot, max_prot & cap_maxprot, 42118348a23SKyle Evans flags); 42218348a23SKyle Evans if (error != 0) 42318348a23SKyle Evans goto done; 42418348a23SKyle Evans } 425d301b358SKonstantin Belousov if (fp->f_ops == &shm_ops && shm_largepage(fp->f_data)) 426d301b358SKonstantin Belousov addr = orig_addr; 4275fd3f8b3SJohn Baldwin /* This relies on VM_PROT_* matching PROT_*. */ 4287077c426SJohn Baldwin error = fo_mmap(fp, &vms->vm_map, &addr, size, prot, 42974a1b66cSBrooks Davis max_prot & cap_maxprot, flags, pos, td); 43049874f6eSJoseph Koshy } 4317077c426SJohn Baldwin 432df8bae1dSRodney W. Grimes if (error == 0) 433b40ce416SJulian Elischer td->td_retval[0] = (register_t) (addr + pageoff); 434279d7226SMatthew Dillon done: 435279d7226SMatthew Dillon if (fp) 436b40ce416SJulian Elischer fdrop(fp, td); 437f6b5b182SJeff Roberson 438df8bae1dSRodney W. Grimes return (error); 439df8bae1dSRodney W. Grimes } 440df8bae1dSRodney W. Grimes 4410538aafcSKonstantin Belousov #if defined(COMPAT_FREEBSD6) 442c2815ad5SPeter Wemm int 443c2815ad5SPeter Wemm freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 444c2815ad5SPeter Wemm { 445c2815ad5SPeter Wemm 446496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot, 447496ab053SKonstantin Belousov uap->flags, uap->fd, uap->pos)); 448c2815ad5SPeter Wemm } 4490538aafcSKonstantin Belousov #endif 450c2815ad5SPeter Wemm 45105f0fdd2SPoul-Henning Kamp #ifdef COMPAT_43 452d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 45305f0fdd2SPoul-Henning Kamp struct ommap_args { 45405f0fdd2SPoul-Henning Kamp caddr_t addr; 45505f0fdd2SPoul-Henning Kamp int len; 45605f0fdd2SPoul-Henning Kamp int prot; 45705f0fdd2SPoul-Henning Kamp int flags; 45805f0fdd2SPoul-Henning Kamp int fd; 45905f0fdd2SPoul-Henning Kamp long pos; 46005f0fdd2SPoul-Henning Kamp }; 461d2d3e875SBruce Evans #endif 46205f0fdd2SPoul-Henning Kamp int 46369cdfcefSEdward Tomasz Napierala ommap(struct thread *td, struct ommap_args *uap) 46405f0fdd2SPoul-Henning Kamp { 46505f0fdd2SPoul-Henning Kamp static const char cvtbsdprot[8] = { 46605f0fdd2SPoul-Henning Kamp 0, 46705f0fdd2SPoul-Henning Kamp PROT_EXEC, 46805f0fdd2SPoul-Henning Kamp PROT_WRITE, 46905f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE, 47005f0fdd2SPoul-Henning Kamp PROT_READ, 47105f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_READ, 47205f0fdd2SPoul-Henning Kamp PROT_WRITE | PROT_READ, 47305f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE | PROT_READ, 47405f0fdd2SPoul-Henning Kamp }; 47569cdfcefSEdward Tomasz Napierala int flags, prot; 4760d94caffSDavid Greenman 47705f0fdd2SPoul-Henning Kamp #define OMAP_ANON 0x0002 47805f0fdd2SPoul-Henning Kamp #define OMAP_COPY 0x0020 47905f0fdd2SPoul-Henning Kamp #define OMAP_SHARED 0x0010 48005f0fdd2SPoul-Henning Kamp #define OMAP_FIXED 0x0100 48105f0fdd2SPoul-Henning Kamp 48269cdfcefSEdward Tomasz Napierala prot = cvtbsdprot[uap->prot & 0x7]; 4835dddee2dSKonstantin Belousov #if (defined(COMPAT_FREEBSD32) && defined(__amd64__)) || defined(__i386__) 484ee4116b8SKonstantin Belousov if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && 48569cdfcefSEdward Tomasz Napierala prot != 0) 48669cdfcefSEdward Tomasz Napierala prot |= PROT_EXEC; 487ee4116b8SKonstantin Belousov #endif 48869cdfcefSEdward Tomasz Napierala flags = 0; 48905f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_ANON) 49069cdfcefSEdward Tomasz Napierala flags |= MAP_ANON; 49105f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_COPY) 49269cdfcefSEdward Tomasz Napierala flags |= MAP_COPY; 49305f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_SHARED) 49469cdfcefSEdward Tomasz Napierala flags |= MAP_SHARED; 49505f0fdd2SPoul-Henning Kamp else 49669cdfcefSEdward Tomasz Napierala flags |= MAP_PRIVATE; 49705f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_FIXED) 49869cdfcefSEdward Tomasz Napierala flags |= MAP_FIXED; 499496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, flags, 500496ab053SKonstantin Belousov uap->fd, uap->pos)); 50105f0fdd2SPoul-Henning Kamp } 50205f0fdd2SPoul-Henning Kamp #endif /* COMPAT_43 */ 50305f0fdd2SPoul-Henning Kamp 504d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 505df8bae1dSRodney W. Grimes struct msync_args { 506651bb817SAlexander Langer void *addr; 507c899450bSPeter Wemm size_t len; 508e6c6af11SDavid Greenman int flags; 509df8bae1dSRodney W. Grimes }; 510d2d3e875SBruce Evans #endif 511df8bae1dSRodney W. Grimes int 51269cdfcefSEdward Tomasz Napierala sys_msync(struct thread *td, struct msync_args *uap) 513df8bae1dSRodney W. Grimes { 51469cdfcefSEdward Tomasz Napierala 515496ab053SKonstantin Belousov return (kern_msync(td, (uintptr_t)uap->addr, uap->len, uap->flags)); 51669cdfcefSEdward Tomasz Napierala } 51769cdfcefSEdward Tomasz Napierala 51869cdfcefSEdward Tomasz Napierala int 519496ab053SKonstantin Belousov kern_msync(struct thread *td, uintptr_t addr0, size_t size, int flags) 52069cdfcefSEdward Tomasz Napierala { 521496ab053SKonstantin Belousov vm_offset_t addr; 52269cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 523df8bae1dSRodney W. Grimes vm_map_t map; 524df8bae1dSRodney W. Grimes int rv; 525df8bae1dSRodney W. Grimes 526496ab053SKonstantin Belousov addr = addr0; 527dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 528dabee6feSPeter Wemm addr -= pageoff; 529dabee6feSPeter Wemm size += pageoff; 530dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5319154ee6aSPeter Wemm if (addr + size < addr) 532dabee6feSPeter Wemm return (EINVAL); 533dabee6feSPeter Wemm 534dabee6feSPeter Wemm if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 5351e62bc63SDavid Greenman return (EINVAL); 5361e62bc63SDavid Greenman 537b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 5389154ee6aSPeter Wemm 539df8bae1dSRodney W. Grimes /* 540df8bae1dSRodney W. Grimes * Clean the pages and interpret the return value. 541df8bae1dSRodney W. Grimes */ 542950f8459SAlan Cox rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 543e6c6af11SDavid Greenman (flags & MS_INVALIDATE) != 0); 544df8bae1dSRodney W. Grimes switch (rv) { 545df8bae1dSRodney W. Grimes case KERN_SUCCESS: 546d2c60af8SMatthew Dillon return (0); 547df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 548e103f5b1SPeter Holm return (ENOMEM); 549b7b7cd44SAlan Cox case KERN_INVALID_ARGUMENT: 550b7b7cd44SAlan Cox return (EBUSY); 551126d6082SKonstantin Belousov case KERN_FAILURE: 552126d6082SKonstantin Belousov return (EIO); 553df8bae1dSRodney W. Grimes default: 554df8bae1dSRodney W. Grimes return (EINVAL); 555df8bae1dSRodney W. Grimes } 556df8bae1dSRodney W. Grimes } 557df8bae1dSRodney W. Grimes 558d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 559df8bae1dSRodney W. Grimes struct munmap_args { 560651bb817SAlexander Langer void *addr; 5619154ee6aSPeter Wemm size_t len; 562df8bae1dSRodney W. Grimes }; 563d2d3e875SBruce Evans #endif 564df8bae1dSRodney W. Grimes int 56569cdfcefSEdward Tomasz Napierala sys_munmap(struct thread *td, struct munmap_args *uap) 56669cdfcefSEdward Tomasz Napierala { 56769cdfcefSEdward Tomasz Napierala 568496ab053SKonstantin Belousov return (kern_munmap(td, (uintptr_t)uap->addr, uap->len)); 56969cdfcefSEdward Tomasz Napierala } 57069cdfcefSEdward Tomasz Napierala 57169cdfcefSEdward Tomasz Napierala int 572496ab053SKonstantin Belousov kern_munmap(struct thread *td, uintptr_t addr0, size_t size) 573df8bae1dSRodney W. Grimes { 57449874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 57549874f6eSJoseph Koshy struct pmckern_map_out pkm; 57649874f6eSJoseph Koshy vm_map_entry_t entry; 577736ff8c3SMateusz Guzik bool pmc_handled; 57849874f6eSJoseph Koshy #endif 5790f1e6ec5SMark Johnston vm_offset_t addr, end; 58069cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 581df8bae1dSRodney W. Grimes vm_map_t map; 582e8f77c20SKonstantin Belousov int rv; 583df8bae1dSRodney W. Grimes 584d8834602SAlan Cox if (size == 0) 585d8834602SAlan Cox return (EINVAL); 586dabee6feSPeter Wemm 587496ab053SKonstantin Belousov addr = addr0; 588dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 589dabee6feSPeter Wemm addr -= pageoff; 590dabee6feSPeter Wemm size += pageoff; 591dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5920f1e6ec5SMark Johnston end = addr + size; 5930f1e6ec5SMark Johnston map = &td->td_proc->p_vmspace->vm_map; 5940f1e6ec5SMark Johnston if (!vm_map_range_valid(map, addr, end)) 595df8bae1dSRodney W. Grimes return (EINVAL); 5969154ee6aSPeter Wemm 597d8834602SAlan Cox vm_map_lock(map); 59849874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 599736ff8c3SMateusz Guzik pmc_handled = false; 600736ff8c3SMateusz Guzik if (PMC_HOOK_INSTALLED(PMC_FN_MUNMAP)) { 601736ff8c3SMateusz Guzik pmc_handled = true; 60249874f6eSJoseph Koshy /* 60349874f6eSJoseph Koshy * Inform hwpmc if the address range being unmapped contains 60449874f6eSJoseph Koshy * an executable region. 60549874f6eSJoseph Koshy */ 6060d419640SRyan Stone pkm.pm_address = (uintptr_t) NULL; 60749874f6eSJoseph Koshy if (vm_map_lookup_entry(map, addr, &entry)) { 6080f1e6ec5SMark Johnston for (; entry->start < end; 6097cdcf863SDoug Moore entry = vm_map_entry_succ(entry)) { 61049874f6eSJoseph Koshy if (vm_map_check_protection(map, entry->start, 61149874f6eSJoseph Koshy entry->end, VM_PROT_EXECUTE) == TRUE) { 61249874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 61349874f6eSJoseph Koshy pkm.pm_size = (size_t) size; 61449874f6eSJoseph Koshy break; 61549874f6eSJoseph Koshy } 61649874f6eSJoseph Koshy } 61749874f6eSJoseph Koshy } 618736ff8c3SMateusz Guzik } 61949874f6eSJoseph Koshy #endif 620e8f77c20SKonstantin Belousov rv = vm_map_delete(map, addr, end); 6210d419640SRyan Stone 6220d419640SRyan Stone #ifdef HWPMC_HOOKS 623e8f77c20SKonstantin Belousov if (rv == KERN_SUCCESS && __predict_false(pmc_handled)) { 6240d419640SRyan Stone /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 6250d419640SRyan Stone vm_map_lock_downgrade(map); 626d473d3a1SRyan Stone if (pkm.pm_address != (uintptr_t) NULL) 6270d419640SRyan Stone PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 6280d419640SRyan Stone vm_map_unlock_read(map); 629736ff8c3SMateusz Guzik } else 6300d419640SRyan Stone #endif 631736ff8c3SMateusz Guzik vm_map_unlock(map); 632736ff8c3SMateusz Guzik 633e8f77c20SKonstantin Belousov return (vm_mmap_to_errno(rv)); 634df8bae1dSRodney W. Grimes } 635df8bae1dSRodney W. Grimes 636d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 637df8bae1dSRodney W. Grimes struct mprotect_args { 638651bb817SAlexander Langer const void *addr; 6399154ee6aSPeter Wemm size_t len; 640df8bae1dSRodney W. Grimes int prot; 641df8bae1dSRodney W. Grimes }; 642d2d3e875SBruce Evans #endif 643df8bae1dSRodney W. Grimes int 64469cdfcefSEdward Tomasz Napierala sys_mprotect(struct thread *td, struct mprotect_args *uap) 645df8bae1dSRodney W. Grimes { 646df8bae1dSRodney W. Grimes 647496ab053SKonstantin Belousov return (kern_mprotect(td, (uintptr_t)uap->addr, uap->len, uap->prot)); 64869cdfcefSEdward Tomasz Napierala } 649df8bae1dSRodney W. Grimes 65069cdfcefSEdward Tomasz Napierala int 651496ab053SKonstantin Belousov kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot) 65269cdfcefSEdward Tomasz Napierala { 653496ab053SKonstantin Belousov vm_offset_t addr; 65469cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 65574a1b66cSBrooks Davis int vm_error, max_prot; 656*0659df6fSKonstantin Belousov int flags; 65769cdfcefSEdward Tomasz Napierala 658496ab053SKonstantin Belousov addr = addr0; 65974a1b66cSBrooks Davis if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) 66074a1b66cSBrooks Davis return (EINVAL); 66174a1b66cSBrooks Davis max_prot = PROT_MAX_EXTRACT(prot); 66274a1b66cSBrooks Davis prot = PROT_EXTRACT(prot); 663dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 664dabee6feSPeter Wemm addr -= pageoff; 665dabee6feSPeter Wemm size += pageoff; 666dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6676e1d2cf6SKonstantin Belousov #ifdef COMPAT_FREEBSD32 6686e1d2cf6SKonstantin Belousov if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 6696e1d2cf6SKonstantin Belousov if (((addr + size) & 0xffffffff) < addr) 6706e1d2cf6SKonstantin Belousov return (EINVAL); 6716e1d2cf6SKonstantin Belousov } else 6726e1d2cf6SKonstantin Belousov #endif 6739154ee6aSPeter Wemm if (addr + size < addr) 674dabee6feSPeter Wemm return (EINVAL); 675dabee6feSPeter Wemm 676*0659df6fSKonstantin Belousov flags = VM_MAP_PROTECT_SET_PROT; 677*0659df6fSKonstantin Belousov if (max_prot != 0) 678*0659df6fSKonstantin Belousov flags |= VM_MAP_PROTECT_SET_MAXPROT; 67974a1b66cSBrooks Davis vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map, 680*0659df6fSKonstantin Belousov addr, addr + size, prot, max_prot, flags); 68174a1b66cSBrooks Davis 68274a1b66cSBrooks Davis switch (vm_error) { 683df8bae1dSRodney W. Grimes case KERN_SUCCESS: 684df8bae1dSRodney W. Grimes return (0); 685df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 686df8bae1dSRodney W. Grimes return (EACCES); 6873364c323SKonstantin Belousov case KERN_RESOURCE_SHORTAGE: 6883364c323SKonstantin Belousov return (ENOMEM); 689*0659df6fSKonstantin Belousov case KERN_OUT_OF_BOUNDS: 690*0659df6fSKonstantin Belousov return (ENOTSUP); 691df8bae1dSRodney W. Grimes } 692df8bae1dSRodney W. Grimes return (EINVAL); 693df8bae1dSRodney W. Grimes } 694df8bae1dSRodney W. Grimes 695d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 696dabee6feSPeter Wemm struct minherit_args { 697651bb817SAlexander Langer void *addr; 6989154ee6aSPeter Wemm size_t len; 699dabee6feSPeter Wemm int inherit; 700dabee6feSPeter Wemm }; 701dabee6feSPeter Wemm #endif 702dabee6feSPeter Wemm int 70304e89ffbSKonstantin Belousov sys_minherit(struct thread *td, struct minherit_args *uap) 704dabee6feSPeter Wemm { 70552c81be1SEdward Tomasz Napierala 70652c81be1SEdward Tomasz Napierala return (kern_minherit(td, (uintptr_t)uap->addr, uap->len, 70752c81be1SEdward Tomasz Napierala uap->inherit)); 70852c81be1SEdward Tomasz Napierala } 70952c81be1SEdward Tomasz Napierala 71052c81be1SEdward Tomasz Napierala int 71152c81be1SEdward Tomasz Napierala kern_minherit(struct thread *td, uintptr_t addr0, size_t len, int inherit0) 71252c81be1SEdward Tomasz Napierala { 713dabee6feSPeter Wemm vm_offset_t addr; 714dabee6feSPeter Wemm vm_size_t size, pageoff; 71554d92145SMatthew Dillon vm_inherit_t inherit; 716dabee6feSPeter Wemm 71752c81be1SEdward Tomasz Napierala addr = (vm_offset_t)addr0; 71852c81be1SEdward Tomasz Napierala size = len; 71952c81be1SEdward Tomasz Napierala inherit = inherit0; 720dabee6feSPeter Wemm 721dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 722dabee6feSPeter Wemm addr -= pageoff; 723dabee6feSPeter Wemm size += pageoff; 724dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 7259154ee6aSPeter Wemm if (addr + size < addr) 726dabee6feSPeter Wemm return (EINVAL); 727dabee6feSPeter Wemm 728e0be79afSAlan Cox switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 729e0be79afSAlan Cox addr + size, inherit)) { 730dabee6feSPeter Wemm case KERN_SUCCESS: 731dabee6feSPeter Wemm return (0); 732dabee6feSPeter Wemm case KERN_PROTECTION_FAILURE: 733dabee6feSPeter Wemm return (EACCES); 734dabee6feSPeter Wemm } 735dabee6feSPeter Wemm return (EINVAL); 736dabee6feSPeter Wemm } 737dabee6feSPeter Wemm 738dabee6feSPeter Wemm #ifndef _SYS_SYSPROTO_H_ 739df8bae1dSRodney W. Grimes struct madvise_args { 740651bb817SAlexander Langer void *addr; 7419154ee6aSPeter Wemm size_t len; 742df8bae1dSRodney W. Grimes int behav; 743df8bae1dSRodney W. Grimes }; 744d2d3e875SBruce Evans #endif 7450d94caffSDavid Greenman 746df8bae1dSRodney W. Grimes int 74704e89ffbSKonstantin Belousov sys_madvise(struct thread *td, struct madvise_args *uap) 748df8bae1dSRodney W. Grimes { 74969cdfcefSEdward Tomasz Napierala 750496ab053SKonstantin Belousov return (kern_madvise(td, (uintptr_t)uap->addr, uap->len, uap->behav)); 75169cdfcefSEdward Tomasz Napierala } 75269cdfcefSEdward Tomasz Napierala 75369cdfcefSEdward Tomasz Napierala int 754496ab053SKonstantin Belousov kern_madvise(struct thread *td, uintptr_t addr0, size_t len, int behav) 75569cdfcefSEdward Tomasz Napierala { 75605ba50f5SJake Burkholder vm_map_t map; 757496ab053SKonstantin Belousov vm_offset_t addr, end, start; 75855648840SJohn Baldwin int flags; 759b4309055SMatthew Dillon 760b4309055SMatthew Dillon /* 761f4cf2141SWes Peters * Check for our special case, advising the swap pager we are 762f4cf2141SWes Peters * "immortal." 763f4cf2141SWes Peters */ 76469cdfcefSEdward Tomasz Napierala if (behav == MADV_PROTECT) { 76555648840SJohn Baldwin flags = PPROT_SET; 76655648840SJohn Baldwin return (kern_procctl(td, P_PID, td->td_proc->p_pid, 76755648840SJohn Baldwin PROC_SPROTECT, &flags)); 76869297bf8SJohn Baldwin } 76955648840SJohn Baldwin 770f4cf2141SWes Peters /* 771867a482dSJohn Dyson * Check for illegal addresses. Watch out for address wrap... Note 772867a482dSJohn Dyson * that VM_*_ADDRESS are not constants due to casts (argh). 773867a482dSJohn Dyson */ 77405ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 775496ab053SKonstantin Belousov addr = addr0; 7760f1e6ec5SMark Johnston if (!vm_map_range_valid(map, addr, addr + len)) 777867a482dSJohn Dyson return (EINVAL); 778867a482dSJohn Dyson 779867a482dSJohn Dyson /* 780867a482dSJohn Dyson * Since this routine is only advisory, we default to conservative 781867a482dSJohn Dyson * behavior. 782867a482dSJohn Dyson */ 78369cdfcefSEdward Tomasz Napierala start = trunc_page(addr); 78469cdfcefSEdward Tomasz Napierala end = round_page(addr + len); 785867a482dSJohn Dyson 7863e7cb27cSAlan Cox /* 7873e7cb27cSAlan Cox * vm_map_madvise() checks for illegal values of behav. 7883e7cb27cSAlan Cox */ 7893e7cb27cSAlan Cox return (vm_map_madvise(map, start, end, behav)); 790df8bae1dSRodney W. Grimes } 791df8bae1dSRodney W. Grimes 792d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 793df8bae1dSRodney W. Grimes struct mincore_args { 794651bb817SAlexander Langer const void *addr; 7959154ee6aSPeter Wemm size_t len; 796df8bae1dSRodney W. Grimes char *vec; 797df8bae1dSRodney W. Grimes }; 798d2d3e875SBruce Evans #endif 7990d94caffSDavid Greenman 800df8bae1dSRodney W. Grimes int 80104e89ffbSKonstantin Belousov sys_mincore(struct thread *td, struct mincore_args *uap) 802df8bae1dSRodney W. Grimes { 80346dc8e9dSDmitry Chagin 80446dc8e9dSDmitry Chagin return (kern_mincore(td, (uintptr_t)uap->addr, uap->len, uap->vec)); 80546dc8e9dSDmitry Chagin } 80646dc8e9dSDmitry Chagin 80746dc8e9dSDmitry Chagin int 80846dc8e9dSDmitry Chagin kern_mincore(struct thread *td, uintptr_t addr0, size_t len, char *vec) 80946dc8e9dSDmitry Chagin { 810867a482dSJohn Dyson pmap_t pmap; 811867a482dSJohn Dyson vm_map_t map; 81201cef4caSMark Johnston vm_map_entry_t current, entry; 813567e51e1SAlan Cox vm_object_t object; 81401cef4caSMark Johnston vm_offset_t addr, cend, end, first_addr; 81501cef4caSMark Johnston vm_paddr_t pa; 816567e51e1SAlan Cox vm_page_t m; 817567e51e1SAlan Cox vm_pindex_t pindex; 81801cef4caSMark Johnston int error, lastvecindex, mincoreinfo, vecindex; 819dd2622a8SAlan Cox unsigned int timestamp; 820df8bae1dSRodney W. Grimes 821867a482dSJohn Dyson /* 822867a482dSJohn Dyson * Make sure that the addresses presented are valid for user 823867a482dSJohn Dyson * mode. 824867a482dSJohn Dyson */ 82546dc8e9dSDmitry Chagin first_addr = addr = trunc_page(addr0); 826d0c9294bSMark Johnston end = round_page(addr0 + len); 82705ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 82805ba50f5SJake Burkholder if (end > vm_map_max(map) || end < addr) 829455dd7d4SKonstantin Belousov return (ENOMEM); 83002c04a2fSJohn Dyson 831b40ce416SJulian Elischer pmap = vmspace_pmap(td->td_proc->p_vmspace); 832867a482dSJohn Dyson 833eff50fcdSAlan Cox vm_map_lock_read(map); 834dd2622a8SAlan Cox RestartScan: 835dd2622a8SAlan Cox timestamp = map->timestamp; 836867a482dSJohn Dyson 837455dd7d4SKonstantin Belousov if (!vm_map_lookup_entry(map, addr, &entry)) { 838455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 839455dd7d4SKonstantin Belousov return (ENOMEM); 840455dd7d4SKonstantin Belousov } 841867a482dSJohn Dyson 842867a482dSJohn Dyson /* 843867a482dSJohn Dyson * Do this on a map entry basis so that if the pages are not 844867a482dSJohn Dyson * in the current processes address space, we can easily look 845867a482dSJohn Dyson * up the pages elsewhere. 846867a482dSJohn Dyson */ 847867a482dSJohn Dyson lastvecindex = -1; 8487cdcf863SDoug Moore while (entry->start < end) { 849867a482dSJohn Dyson /* 850455dd7d4SKonstantin Belousov * check for contiguity 851455dd7d4SKonstantin Belousov */ 8527cdcf863SDoug Moore current = entry; 8537cdcf863SDoug Moore entry = vm_map_entry_succ(current); 8547cdcf863SDoug Moore if (current->end < end && 8557cdcf863SDoug Moore entry->start > current->end) { 856455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 857455dd7d4SKonstantin Belousov return (ENOMEM); 858455dd7d4SKonstantin Belousov } 859455dd7d4SKonstantin Belousov 860455dd7d4SKonstantin Belousov /* 861867a482dSJohn Dyson * ignore submaps (for now) or null objects 862867a482dSJohn Dyson */ 8639fdfe602SMatthew Dillon if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 864867a482dSJohn Dyson current->object.vm_object == NULL) 865867a482dSJohn Dyson continue; 866867a482dSJohn Dyson 867867a482dSJohn Dyson /* 868867a482dSJohn Dyson * limit this scan to the current map entry and the 869867a482dSJohn Dyson * limits for the mincore call 870867a482dSJohn Dyson */ 871867a482dSJohn Dyson if (addr < current->start) 872867a482dSJohn Dyson addr = current->start; 873867a482dSJohn Dyson cend = current->end; 874867a482dSJohn Dyson if (cend > end) 875867a482dSJohn Dyson cend = end; 876867a482dSJohn Dyson 87701cef4caSMark Johnston for (; addr < cend; addr += PAGE_SIZE) { 878867a482dSJohn Dyson /* 879867a482dSJohn Dyson * Check pmap first, it is likely faster, also 880867a482dSJohn Dyson * it can provide info as to whether we are the 881867a482dSJohn Dyson * one referencing or modifying the page. 882867a482dSJohn Dyson */ 883567e51e1SAlan Cox m = NULL; 88401cef4caSMark Johnston object = NULL; 88501cef4caSMark Johnston retry: 88601cef4caSMark Johnston pa = 0; 88701cef4caSMark Johnston mincoreinfo = pmap_mincore(pmap, addr, &pa); 8883fbc2e00SKonstantin Belousov if (mincore_mapped) { 8893fbc2e00SKonstantin Belousov /* 8903fbc2e00SKonstantin Belousov * We only care about this pmap's 8913fbc2e00SKonstantin Belousov * mapping of the page, if any. 8923fbc2e00SKonstantin Belousov */ 89301cef4caSMark Johnston ; 89401cef4caSMark Johnston } else if (pa != 0) { 895867a482dSJohn Dyson /* 896567e51e1SAlan Cox * The page is mapped by this process but not 897567e51e1SAlan Cox * both accessed and modified. It is also 898567e51e1SAlan Cox * managed. Acquire the object lock so that 89901cef4caSMark Johnston * other mappings might be examined. The page's 90001cef4caSMark Johnston * identity may change at any point before its 90101cef4caSMark Johnston * object lock is acquired, so re-validate if 90201cef4caSMark Johnston * necessary. 903867a482dSJohn Dyson */ 90401cef4caSMark Johnston m = PHYS_TO_VM_PAGE(pa); 90501cef4caSMark Johnston while (object == NULL || m->object != object) { 906567e51e1SAlan Cox if (object != NULL) 90789f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 90823ed568cSMateusz Guzik object = atomic_load_ptr(&m->object); 90901cef4caSMark Johnston if (object == NULL) 910567e51e1SAlan Cox goto retry; 91101cef4caSMark Johnston VM_OBJECT_WLOCK(object); 912567e51e1SAlan Cox } 91301cef4caSMark Johnston if (pa != pmap_extract(pmap, addr)) 91401cef4caSMark Johnston goto retry; 9150012f373SJeff Roberson KASSERT(vm_page_all_valid(m), 916567e51e1SAlan Cox ("mincore: page %p is mapped but invalid", 917567e51e1SAlan Cox m)); 918567e51e1SAlan Cox } else if (mincoreinfo == 0) { 919567e51e1SAlan Cox /* 920567e51e1SAlan Cox * The page is not mapped by this process. If 921567e51e1SAlan Cox * the object implements managed pages, then 922567e51e1SAlan Cox * determine if the page is resident so that 923567e51e1SAlan Cox * the mappings might be examined. 924567e51e1SAlan Cox */ 925567e51e1SAlan Cox if (current->object.vm_object != object) { 926567e51e1SAlan Cox if (object != NULL) 92789f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 928567e51e1SAlan Cox object = current->object.vm_object; 92989f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 930567e51e1SAlan Cox } 931567e51e1SAlan Cox if (object->type == OBJT_DEFAULT || 932567e51e1SAlan Cox object->type == OBJT_SWAP || 933567e51e1SAlan Cox object->type == OBJT_VNODE) { 934567e51e1SAlan Cox pindex = OFF_TO_IDX(current->offset + 935567e51e1SAlan Cox (addr - current->start)); 936567e51e1SAlan Cox m = vm_page_lookup(object, pindex); 9370012f373SJeff Roberson if (m != NULL && vm_page_none_valid(m)) 938567e51e1SAlan Cox m = NULL; 939567e51e1SAlan Cox if (m != NULL) 940567e51e1SAlan Cox mincoreinfo = MINCORE_INCORE; 941567e51e1SAlan Cox } 942567e51e1SAlan Cox } 943567e51e1SAlan Cox if (m != NULL) { 94401cef4caSMark Johnston VM_OBJECT_ASSERT_WLOCKED(m->object); 94501cef4caSMark Johnston 94601cef4caSMark Johnston /* Examine other mappings of the page. */ 947567e51e1SAlan Cox if (m->dirty == 0 && pmap_is_modified(m)) 948567e51e1SAlan Cox vm_page_dirty(m); 949567e51e1SAlan Cox if (m->dirty != 0) 950867a482dSJohn Dyson mincoreinfo |= MINCORE_MODIFIED_OTHER; 95101cef4caSMark Johnston 952c46b90e9SAlan Cox /* 9533407fefeSKonstantin Belousov * The first test for PGA_REFERENCED is an 954c46b90e9SAlan Cox * optimization. The second test is 955c46b90e9SAlan Cox * required because a concurrent pmap 956c46b90e9SAlan Cox * operation could clear the last reference 9573407fefeSKonstantin Belousov * and set PGA_REFERENCED before the call to 958c46b90e9SAlan Cox * pmap_is_referenced(). 959c46b90e9SAlan Cox */ 9605cff1f4dSMark Johnston if ((m->a.flags & PGA_REFERENCED) != 0 || 961c46b90e9SAlan Cox pmap_is_referenced(m) || 9625cff1f4dSMark Johnston (m->a.flags & PGA_REFERENCED) != 0) 963867a482dSJohn Dyson mincoreinfo |= MINCORE_REFERENCED_OTHER; 9649b5a5d81SJohn Dyson } 965567e51e1SAlan Cox if (object != NULL) 96689f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 967867a482dSJohn Dyson 968867a482dSJohn Dyson /* 969dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 970dd2622a8SAlan Cox * the map, we release the lock. 971dd2622a8SAlan Cox */ 972dd2622a8SAlan Cox vm_map_unlock_read(map); 973dd2622a8SAlan Cox 974dd2622a8SAlan Cox /* 975867a482dSJohn Dyson * calculate index into user supplied byte vector 976867a482dSJohn Dyson */ 977d1780e8dSKonstantin Belousov vecindex = atop(addr - first_addr); 978867a482dSJohn Dyson 979867a482dSJohn Dyson /* 980867a482dSJohn Dyson * If we have skipped map entries, we need to make sure that 981867a482dSJohn Dyson * the byte vector is zeroed for those skipped entries. 982867a482dSJohn Dyson */ 983867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 9846a87d217SJohn Baldwin ++lastvecindex; 985867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 986867a482dSJohn Dyson if (error) { 987d2c60af8SMatthew Dillon error = EFAULT; 988d2c60af8SMatthew Dillon goto done2; 989867a482dSJohn Dyson } 990867a482dSJohn Dyson } 991867a482dSJohn Dyson 992867a482dSJohn Dyson /* 993867a482dSJohn Dyson * Pass the page information to the user 994867a482dSJohn Dyson */ 995867a482dSJohn Dyson error = subyte(vec + vecindex, mincoreinfo); 996867a482dSJohn Dyson if (error) { 997d2c60af8SMatthew Dillon error = EFAULT; 998d2c60af8SMatthew Dillon goto done2; 999867a482dSJohn Dyson } 1000dd2622a8SAlan Cox 1001dd2622a8SAlan Cox /* 1002dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 1003dd2622a8SAlan Cox * output may be invalid. 1004dd2622a8SAlan Cox */ 1005dd2622a8SAlan Cox vm_map_lock_read(map); 1006dd2622a8SAlan Cox if (timestamp != map->timestamp) 1007dd2622a8SAlan Cox goto RestartScan; 1008dd2622a8SAlan Cox 1009867a482dSJohn Dyson lastvecindex = vecindex; 101002c04a2fSJohn Dyson } 1011867a482dSJohn Dyson } 1012867a482dSJohn Dyson 1013867a482dSJohn Dyson /* 1014dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 1015dd2622a8SAlan Cox * the map, we release the lock. 1016dd2622a8SAlan Cox */ 1017dd2622a8SAlan Cox vm_map_unlock_read(map); 1018dd2622a8SAlan Cox 1019dd2622a8SAlan Cox /* 1020867a482dSJohn Dyson * Zero the last entries in the byte vector. 1021867a482dSJohn Dyson */ 1022d1780e8dSKonstantin Belousov vecindex = atop(end - first_addr); 1023867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 10246a87d217SJohn Baldwin ++lastvecindex; 1025867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 1026867a482dSJohn Dyson if (error) { 1027d2c60af8SMatthew Dillon error = EFAULT; 1028d2c60af8SMatthew Dillon goto done2; 1029867a482dSJohn Dyson } 1030867a482dSJohn Dyson } 1031867a482dSJohn Dyson 1032dd2622a8SAlan Cox /* 1033dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 1034dd2622a8SAlan Cox * output may be invalid. 1035dd2622a8SAlan Cox */ 1036dd2622a8SAlan Cox vm_map_lock_read(map); 1037dd2622a8SAlan Cox if (timestamp != map->timestamp) 1038dd2622a8SAlan Cox goto RestartScan; 1039eff50fcdSAlan Cox vm_map_unlock_read(map); 1040d2c60af8SMatthew Dillon done2: 1041d2c60af8SMatthew Dillon return (error); 1042df8bae1dSRodney W. Grimes } 1043df8bae1dSRodney W. Grimes 1044d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 1045df8bae1dSRodney W. Grimes struct mlock_args { 1046651bb817SAlexander Langer const void *addr; 1047df8bae1dSRodney W. Grimes size_t len; 1048df8bae1dSRodney W. Grimes }; 1049d2d3e875SBruce Evans #endif 1050df8bae1dSRodney W. Grimes int 105104e89ffbSKonstantin Belousov sys_mlock(struct thread *td, struct mlock_args *uap) 1052df8bae1dSRodney W. Grimes { 1053995d7069SGleb Smirnoff 1054496ab053SKonstantin Belousov return (kern_mlock(td->td_proc, td->td_ucred, 1055496ab053SKonstantin Belousov __DECONST(uintptr_t, uap->addr), uap->len)); 1056995d7069SGleb Smirnoff } 1057995d7069SGleb Smirnoff 1058995d7069SGleb Smirnoff int 1059496ab053SKonstantin Belousov kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len) 1060995d7069SGleb Smirnoff { 1061bb734798SDon Lewis vm_offset_t addr, end, last, start; 1062bb734798SDon Lewis vm_size_t npages, size; 10633ac7d297SAndrey Zonov vm_map_t map; 10641ba5ad42SEdward Tomasz Napierala unsigned long nsize; 1065bb734798SDon Lewis int error; 1066df8bae1dSRodney W. Grimes 1067cc426dd3SMateusz Guzik error = priv_check_cred(cred, PRIV_VM_MLOCK); 106847934cefSDon Lewis if (error) 106947934cefSDon Lewis return (error); 1070496ab053SKonstantin Belousov addr = addr0; 1071995d7069SGleb Smirnoff size = len; 1072bb734798SDon Lewis last = addr + size; 107316929939SDon Lewis start = trunc_page(addr); 1074bb734798SDon Lewis end = round_page(last); 1075bb734798SDon Lewis if (last < addr || end < addr) 1076df8bae1dSRodney W. Grimes return (EINVAL); 107716929939SDon Lewis npages = atop(end - start); 107854a3a114SMark Johnston if (npages > vm_page_max_user_wired) 107916929939SDon Lewis return (ENOMEM); 10803ac7d297SAndrey Zonov map = &proc->p_vmspace->vm_map; 108147934cefSDon Lewis PROC_LOCK(proc); 10823ac7d297SAndrey Zonov nsize = ptoa(npages + pmap_wired_count(map->pmap)); 1083f6f6d240SMateusz Guzik if (nsize > lim_cur_proc(proc, RLIMIT_MEMLOCK)) { 108447934cefSDon Lewis PROC_UNLOCK(proc); 10854a40e3d4SJohn Dyson return (ENOMEM); 108691d5354aSJohn Baldwin } 108747934cefSDon Lewis PROC_UNLOCK(proc); 1088afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10894b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 10901ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 10911ba5ad42SEdward Tomasz Napierala error = racct_set(proc, RACCT_MEMLOCK, nsize); 10921ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 10931ba5ad42SEdward Tomasz Napierala if (error != 0) 10941ba5ad42SEdward Tomasz Napierala return (ENOMEM); 10954b5c9cf6SEdward Tomasz Napierala } 1096afcc55f3SEdward Tomasz Napierala #endif 10973ac7d297SAndrey Zonov error = vm_map_wire(map, start, end, 109816929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1099afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11004b5c9cf6SEdward Tomasz Napierala if (racct_enable && error != KERN_SUCCESS) { 11011ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 11021ba5ad42SEdward Tomasz Napierala racct_set(proc, RACCT_MEMLOCK, 11033ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 11041ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 11051ba5ad42SEdward Tomasz Napierala } 1106afcc55f3SEdward Tomasz Napierala #endif 1107d301b358SKonstantin Belousov switch (error) { 1108d301b358SKonstantin Belousov case KERN_SUCCESS: 1109d301b358SKonstantin Belousov return (0); 1110d301b358SKonstantin Belousov case KERN_INVALID_ARGUMENT: 1111d301b358SKonstantin Belousov return (EINVAL); 1112d301b358SKonstantin Belousov default: 1113d301b358SKonstantin Belousov return (ENOMEM); 1114d301b358SKonstantin Belousov } 1115df8bae1dSRodney W. Grimes } 1116df8bae1dSRodney W. Grimes 1117d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 11184a40e3d4SJohn Dyson struct mlockall_args { 11194a40e3d4SJohn Dyson int how; 11204a40e3d4SJohn Dyson }; 11214a40e3d4SJohn Dyson #endif 11224a40e3d4SJohn Dyson 11234a40e3d4SJohn Dyson int 112404e89ffbSKonstantin Belousov sys_mlockall(struct thread *td, struct mlockall_args *uap) 11254a40e3d4SJohn Dyson { 1126abd498aaSBruce M Simpson vm_map_t map; 1127abd498aaSBruce M Simpson int error; 1128abd498aaSBruce M Simpson 1129abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 11307e19eda4SAndrey Zonov error = priv_check(td, PRIV_VM_MLOCK); 11317e19eda4SAndrey Zonov if (error) 11327e19eda4SAndrey Zonov return (error); 1133abd498aaSBruce M Simpson 1134abd498aaSBruce M Simpson if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1135abd498aaSBruce M Simpson return (EINVAL); 1136abd498aaSBruce M Simpson 1137abd498aaSBruce M Simpson /* 1138abd498aaSBruce M Simpson * If wiring all pages in the process would cause it to exceed 1139abd498aaSBruce M Simpson * a hard resource limit, return ENOMEM. 1140abd498aaSBruce M Simpson */ 11417e19eda4SAndrey Zonov if (!old_mlock && uap->how & MCL_CURRENT) { 11422554f86aSMateusz Guzik if (map->size > lim_cur(td, RLIMIT_MEMLOCK)) 1143abd498aaSBruce M Simpson return (ENOMEM); 114491d5354aSJohn Baldwin } 1145afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11464b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 11471ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11481ba5ad42SEdward Tomasz Napierala error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 11491ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11501ba5ad42SEdward Tomasz Napierala if (error != 0) 11511ba5ad42SEdward Tomasz Napierala return (ENOMEM); 11524b5c9cf6SEdward Tomasz Napierala } 1153afcc55f3SEdward Tomasz Napierala #endif 1154abd498aaSBruce M Simpson 1155abd498aaSBruce M Simpson if (uap->how & MCL_FUTURE) { 1156abd498aaSBruce M Simpson vm_map_lock(map); 1157abd498aaSBruce M Simpson vm_map_modflags(map, MAP_WIREFUTURE, 0); 1158abd498aaSBruce M Simpson vm_map_unlock(map); 1159abd498aaSBruce M Simpson error = 0; 1160abd498aaSBruce M Simpson } 1161abd498aaSBruce M Simpson 1162abd498aaSBruce M Simpson if (uap->how & MCL_CURRENT) { 1163abd498aaSBruce M Simpson /* 1164abd498aaSBruce M Simpson * P1003.1-2001 mandates that all currently mapped pages 1165abd498aaSBruce M Simpson * will be memory resident and locked (wired) upon return 1166abd498aaSBruce M Simpson * from mlockall(). vm_map_wire() will wire pages, by 1167abd498aaSBruce M Simpson * calling vm_fault_wire() for each page in the region. 1168abd498aaSBruce M Simpson */ 1169abd498aaSBruce M Simpson error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1170abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 117154a3a114SMark Johnston if (error == KERN_SUCCESS) 117254a3a114SMark Johnston error = 0; 117354a3a114SMark Johnston else if (error == KERN_RESOURCE_SHORTAGE) 117454a3a114SMark Johnston error = ENOMEM; 117554a3a114SMark Johnston else 117654a3a114SMark Johnston error = EAGAIN; 1177abd498aaSBruce M Simpson } 1178afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11794b5c9cf6SEdward Tomasz Napierala if (racct_enable && error != KERN_SUCCESS) { 11801ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11811ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 11823ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 11831ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11841ba5ad42SEdward Tomasz Napierala } 1185afcc55f3SEdward Tomasz Napierala #endif 1186abd498aaSBruce M Simpson 1187abd498aaSBruce M Simpson return (error); 11884a40e3d4SJohn Dyson } 11894a40e3d4SJohn Dyson 11904a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1191fa721254SAlfred Perlstein struct munlockall_args { 1192abd498aaSBruce M Simpson register_t dummy; 11934a40e3d4SJohn Dyson }; 11944a40e3d4SJohn Dyson #endif 11954a40e3d4SJohn Dyson 11964a40e3d4SJohn Dyson int 119704e89ffbSKonstantin Belousov sys_munlockall(struct thread *td, struct munlockall_args *uap) 11984a40e3d4SJohn Dyson { 1199abd498aaSBruce M Simpson vm_map_t map; 1200abd498aaSBruce M Simpson int error; 1201abd498aaSBruce M Simpson 1202abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 1203acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 1204abd498aaSBruce M Simpson if (error) 1205abd498aaSBruce M Simpson return (error); 1206abd498aaSBruce M Simpson 1207abd498aaSBruce M Simpson /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1208abd498aaSBruce M Simpson vm_map_lock(map); 1209abd498aaSBruce M Simpson vm_map_modflags(map, 0, MAP_WIREFUTURE); 1210abd498aaSBruce M Simpson vm_map_unlock(map); 1211abd498aaSBruce M Simpson 1212abd498aaSBruce M Simpson /* Forcibly unwire all pages. */ 1213abd498aaSBruce M Simpson error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1214abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1215afcc55f3SEdward Tomasz Napierala #ifdef RACCT 12164b5c9cf6SEdward Tomasz Napierala if (racct_enable && error == KERN_SUCCESS) { 12171ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 12181ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 0); 12191ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 12201ba5ad42SEdward Tomasz Napierala } 1221afcc55f3SEdward Tomasz Napierala #endif 1222abd498aaSBruce M Simpson 1223abd498aaSBruce M Simpson return (error); 12244a40e3d4SJohn Dyson } 12254a40e3d4SJohn Dyson 12264a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1227df8bae1dSRodney W. Grimes struct munlock_args { 1228651bb817SAlexander Langer const void *addr; 1229df8bae1dSRodney W. Grimes size_t len; 1230df8bae1dSRodney W. Grimes }; 1231d2d3e875SBruce Evans #endif 1232df8bae1dSRodney W. Grimes int 123369cdfcefSEdward Tomasz Napierala sys_munlock(struct thread *td, struct munlock_args *uap) 1234df8bae1dSRodney W. Grimes { 123569cdfcefSEdward Tomasz Napierala 1236496ab053SKonstantin Belousov return (kern_munlock(td, (uintptr_t)uap->addr, uap->len)); 123769cdfcefSEdward Tomasz Napierala } 123869cdfcefSEdward Tomasz Napierala 123969cdfcefSEdward Tomasz Napierala int 1240496ab053SKonstantin Belousov kern_munlock(struct thread *td, uintptr_t addr0, size_t size) 124169cdfcefSEdward Tomasz Napierala { 1242496ab053SKonstantin Belousov vm_offset_t addr, end, last, start; 1243fc2b1679SJeremie Le Hen #ifdef RACCT 1244c92b5069SJeremie Le Hen vm_map_t map; 1245fc2b1679SJeremie Le Hen #endif 1246df8bae1dSRodney W. Grimes int error; 1247df8bae1dSRodney W. Grimes 1248acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 124947934cefSDon Lewis if (error) 125047934cefSDon Lewis return (error); 1251496ab053SKonstantin Belousov addr = addr0; 1252bb734798SDon Lewis last = addr + size; 125316929939SDon Lewis start = trunc_page(addr); 1254bb734798SDon Lewis end = round_page(last); 1255bb734798SDon Lewis if (last < addr || end < addr) 1256df8bae1dSRodney W. Grimes return (EINVAL); 125716929939SDon Lewis error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 125816929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1259afcc55f3SEdward Tomasz Napierala #ifdef RACCT 12604b5c9cf6SEdward Tomasz Napierala if (racct_enable && error == KERN_SUCCESS) { 12611ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 1262c92b5069SJeremie Le Hen map = &td->td_proc->p_vmspace->vm_map; 1263c92b5069SJeremie Le Hen racct_set(td->td_proc, RACCT_MEMLOCK, 1264c92b5069SJeremie Le Hen ptoa(pmap_wired_count(map->pmap))); 12651ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 12661ba5ad42SEdward Tomasz Napierala } 1267afcc55f3SEdward Tomasz Napierala #endif 1268df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1269df8bae1dSRodney W. Grimes } 1270df8bae1dSRodney W. Grimes 1271df8bae1dSRodney W. Grimes /* 1272c8daea13SAlexander Kabaev * vm_mmap_vnode() 1273c8daea13SAlexander Kabaev * 1274c8daea13SAlexander Kabaev * Helper function for vm_mmap. Perform sanity check specific for mmap 1275c8daea13SAlexander Kabaev * operations on vnodes. 1276c8daea13SAlexander Kabaev */ 1277c8daea13SAlexander Kabaev int 1278c8daea13SAlexander Kabaev vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1279c8daea13SAlexander Kabaev vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 128084110e7eSKonstantin Belousov struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 128184110e7eSKonstantin Belousov boolean_t *writecounted) 1282c8daea13SAlexander Kabaev { 1283c8daea13SAlexander Kabaev struct vattr va; 1284c8daea13SAlexander Kabaev vm_object_t obj; 1285bd0e1bebSMark Johnston vm_ooffset_t foff; 12860359a12eSAttilio Rao struct ucred *cred; 128778022527SKonstantin Belousov int error, flags; 128878022527SKonstantin Belousov bool writex; 1289c8daea13SAlexander Kabaev 12900359a12eSAttilio Rao cred = td->td_ucred; 129178022527SKonstantin Belousov writex = (*maxprotp & VM_PROT_WRITE) != 0 && 129278022527SKonstantin Belousov (*flagsp & MAP_SHARED) != 0; 1293a92a971bSMateusz Guzik if ((error = vget(vp, LK_SHARED)) != 0) 1294c8daea13SAlexander Kabaev return (error); 12950df42647SRobert Watson AUDIT_ARG_VNODE1(vp); 129664345f0bSJohn Baldwin foff = *foffp; 1297c8daea13SAlexander Kabaev flags = *flagsp; 12988516dd18SPoul-Henning Kamp obj = vp->v_object; 1299c8daea13SAlexander Kabaev if (vp->v_type == VREG) { 1300c8daea13SAlexander Kabaev /* 1301c8daea13SAlexander Kabaev * Get the proper underlying object 1302c8daea13SAlexander Kabaev */ 13038516dd18SPoul-Henning Kamp if (obj == NULL) { 1304c8daea13SAlexander Kabaev error = EINVAL; 1305c8daea13SAlexander Kabaev goto done; 1306c8daea13SAlexander Kabaev } 1307e5f299ffSKonstantin Belousov if (obj->type == OBJT_VNODE && obj->handle != vp) { 1308c8daea13SAlexander Kabaev vput(vp); 1309c8daea13SAlexander Kabaev vp = (struct vnode *)obj->handle; 131084110e7eSKonstantin Belousov /* 131184110e7eSKonstantin Belousov * Bypass filesystems obey the mpsafety of the 131253f5f8a0SKonstantin Belousov * underlying fs. Tmpfs never bypasses. 131384110e7eSKonstantin Belousov */ 1314a92a971bSMateusz Guzik error = vget(vp, LK_SHARED); 13155050aa86SKonstantin Belousov if (error != 0) 131684110e7eSKonstantin Belousov return (error); 131784110e7eSKonstantin Belousov } 131878022527SKonstantin Belousov if (writex) { 131984110e7eSKonstantin Belousov *writecounted = TRUE; 1320fe7bcbafSKyle Evans vm_pager_update_writecount(obj, 0, objsize); 132184110e7eSKonstantin Belousov } 1322c8daea13SAlexander Kabaev } else { 1323c8daea13SAlexander Kabaev error = EINVAL; 1324c8daea13SAlexander Kabaev goto done; 1325c8daea13SAlexander Kabaev } 13260359a12eSAttilio Rao if ((error = VOP_GETATTR(vp, &va, cred))) 1327c8daea13SAlexander Kabaev goto done; 1328c92163dcSChristian S.J. Peron #ifdef MAC 13297077c426SJohn Baldwin /* This relies on VM_PROT_* matching PROT_*. */ 13307077c426SJohn Baldwin error = mac_vnode_check_mmap(cred, vp, (int)prot, flags); 1331c92163dcSChristian S.J. Peron if (error != 0) 1332c92163dcSChristian S.J. Peron goto done; 1333c92163dcSChristian S.J. Peron #endif 1334c8daea13SAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 1335c8daea13SAlexander Kabaev if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 13367077c426SJohn Baldwin if (prot & VM_PROT_WRITE) { 1337c8daea13SAlexander Kabaev error = EPERM; 1338c8daea13SAlexander Kabaev goto done; 1339c8daea13SAlexander Kabaev } 1340c8daea13SAlexander Kabaev *maxprotp &= ~VM_PROT_WRITE; 1341c8daea13SAlexander Kabaev } 1342c8daea13SAlexander Kabaev } 1343c8daea13SAlexander Kabaev /* 1344c8daea13SAlexander Kabaev * If it is a regular file without any references 1345c8daea13SAlexander Kabaev * we do not need to sync it. 1346c8daea13SAlexander Kabaev * Adjust object size to be the size of actual file. 1347c8daea13SAlexander Kabaev */ 1348c8daea13SAlexander Kabaev objsize = round_page(va.va_size); 1349c8daea13SAlexander Kabaev if (va.va_nlink == 0) 1350c8daea13SAlexander Kabaev flags |= MAP_NOSYNC; 13513d653db0SAlan Cox if (obj->type == OBJT_VNODE) { 1352e5f299ffSKonstantin Belousov obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, 1353e5f299ffSKonstantin Belousov cred); 1354c8daea13SAlexander Kabaev if (obj == NULL) { 135564345f0bSJohn Baldwin error = ENOMEM; 1356c8daea13SAlexander Kabaev goto done; 1357c8daea13SAlexander Kabaev } 13583d653db0SAlan Cox } else { 13593d653db0SAlan Cox KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP, 13603d653db0SAlan Cox ("wrong object type")); 1361f2410510SJeff Roberson vm_object_reference(obj); 13623d653db0SAlan Cox #if VM_NRESERVLEVEL > 0 1363f2410510SJeff Roberson if ((obj->flags & OBJ_COLORED) == 0) { 1364f2410510SJeff Roberson VM_OBJECT_WLOCK(obj); 13653d653db0SAlan Cox vm_object_color(obj, 0); 13663d653db0SAlan Cox VM_OBJECT_WUNLOCK(obj); 13673d653db0SAlan Cox } 1368f2410510SJeff Roberson #endif 1369f2410510SJeff Roberson } 1370c8daea13SAlexander Kabaev *objp = obj; 1371c8daea13SAlexander Kabaev *flagsp = flags; 137264345f0bSJohn Baldwin 1373643656cfSMateusz Guzik VOP_MMAPPED(vp); 13741e309003SDiomidis Spinellis 1375c8daea13SAlexander Kabaev done: 1376bafa6cfcSKonstantin Belousov if (error != 0 && *writecounted) { 1377bafa6cfcSKonstantin Belousov *writecounted = FALSE; 1378fe7bcbafSKyle Evans vm_pager_update_writecount(obj, objsize, 0); 1379bafa6cfcSKonstantin Belousov } 1380c8daea13SAlexander Kabaev vput(vp); 1381c8daea13SAlexander Kabaev return (error); 1382c8daea13SAlexander Kabaev } 1383c8daea13SAlexander Kabaev 1384c8daea13SAlexander Kabaev /* 138598df9218SJohn Baldwin * vm_mmap_cdev() 138698df9218SJohn Baldwin * 138798df9218SJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 138898df9218SJohn Baldwin * operations on cdevs. 138998df9218SJohn Baldwin */ 139098df9218SJohn Baldwin int 13917077c426SJohn Baldwin vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot, 13927077c426SJohn Baldwin vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw, 13937077c426SJohn Baldwin vm_ooffset_t *foff, vm_object_t *objp) 139498df9218SJohn Baldwin { 139598df9218SJohn Baldwin vm_object_t obj; 13967077c426SJohn Baldwin int error, flags; 139798df9218SJohn Baldwin 139898df9218SJohn Baldwin flags = *flagsp; 139998df9218SJohn Baldwin 140091a35e78SKonstantin Belousov if (dsw->d_flags & D_MMAP_ANON) { 14017077c426SJohn Baldwin *objp = NULL; 14027077c426SJohn Baldwin *foff = 0; 140398df9218SJohn Baldwin *maxprotp = VM_PROT_ALL; 140498df9218SJohn Baldwin *flagsp |= MAP_ANON; 140598df9218SJohn Baldwin return (0); 140698df9218SJohn Baldwin } 140798df9218SJohn Baldwin /* 140864345f0bSJohn Baldwin * cdevs do not provide private mappings of any kind. 140998df9218SJohn Baldwin */ 141098df9218SJohn Baldwin if ((*maxprotp & VM_PROT_WRITE) == 0 && 14117077c426SJohn Baldwin (prot & VM_PROT_WRITE) != 0) 141298df9218SJohn Baldwin return (EACCES); 14137077c426SJohn Baldwin if (flags & (MAP_PRIVATE|MAP_COPY)) 141498df9218SJohn Baldwin return (EINVAL); 141598df9218SJohn Baldwin /* 141698df9218SJohn Baldwin * Force device mappings to be shared. 141798df9218SJohn Baldwin */ 141898df9218SJohn Baldwin flags |= MAP_SHARED; 141998df9218SJohn Baldwin #ifdef MAC_XXX 14207077c426SJohn Baldwin error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot); 14217077c426SJohn Baldwin if (error != 0) 142298df9218SJohn Baldwin return (error); 142398df9218SJohn Baldwin #endif 142464345f0bSJohn Baldwin /* 142564345f0bSJohn Baldwin * First, try d_mmap_single(). If that is not implemented 142664345f0bSJohn Baldwin * (returns ENODEV), fall back to using the device pager. 142764345f0bSJohn Baldwin * Note that d_mmap_single() must return a reference to the 142864345f0bSJohn Baldwin * object (it needs to bump the reference count of the object 142964345f0bSJohn Baldwin * it returns somehow). 143064345f0bSJohn Baldwin * 143164345f0bSJohn Baldwin * XXX assumes VM_PROT_* == PROT_* 143264345f0bSJohn Baldwin */ 143364345f0bSJohn Baldwin error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 143464345f0bSJohn Baldwin if (error != ENODEV) 143564345f0bSJohn Baldwin return (error); 14363364c323SKonstantin Belousov obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 14373364c323SKonstantin Belousov td->td_ucred); 143898df9218SJohn Baldwin if (obj == NULL) 143998df9218SJohn Baldwin return (EINVAL); 144098df9218SJohn Baldwin *objp = obj; 144198df9218SJohn Baldwin *flagsp = flags; 144298df9218SJohn Baldwin return (0); 144398df9218SJohn Baldwin } 144498df9218SJohn Baldwin 144598df9218SJohn Baldwin /* 1446d2c60af8SMatthew Dillon * vm_mmap() 1447d2c60af8SMatthew Dillon * 14487077c426SJohn Baldwin * Internal version of mmap used by exec, sys5 shared memory, and 14497077c426SJohn Baldwin * various device drivers. Handle is either a vnode pointer, a 14507077c426SJohn Baldwin * character device, or NULL for MAP_ANON. 1451df8bae1dSRodney W. Grimes */ 1452df8bae1dSRodney W. Grimes int 1453b9dcd593SBruce Evans vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1454b9dcd593SBruce Evans vm_prot_t maxprot, int flags, 145598df9218SJohn Baldwin objtype_t handle_type, void *handle, 1456b9dcd593SBruce Evans vm_ooffset_t foff) 1457df8bae1dSRodney W. Grimes { 14587077c426SJohn Baldwin vm_object_t object; 1459b40ce416SJulian Elischer struct thread *td = curthread; 14607077c426SJohn Baldwin int error; 146184110e7eSKonstantin Belousov boolean_t writecounted; 1462df8bae1dSRodney W. Grimes 1463df8bae1dSRodney W. Grimes if (size == 0) 14647077c426SJohn Baldwin return (EINVAL); 1465df8bae1dSRodney W. Grimes 1466749474f2SPeter Wemm size = round_page(size); 1467010ba384SMark Johnston object = NULL; 14687077c426SJohn Baldwin writecounted = FALSE; 14697077c426SJohn Baldwin 14707077c426SJohn Baldwin /* 14717077c426SJohn Baldwin * Lookup/allocate object. 14727077c426SJohn Baldwin */ 14737077c426SJohn Baldwin switch (handle_type) { 14747077c426SJohn Baldwin case OBJT_DEVICE: { 14757077c426SJohn Baldwin struct cdevsw *dsw; 14767077c426SJohn Baldwin struct cdev *cdev; 14777077c426SJohn Baldwin int ref; 14787077c426SJohn Baldwin 14797077c426SJohn Baldwin cdev = handle; 14807077c426SJohn Baldwin dsw = dev_refthread(cdev, &ref); 14817077c426SJohn Baldwin if (dsw == NULL) 14827077c426SJohn Baldwin return (ENXIO); 14837077c426SJohn Baldwin error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev, 14847077c426SJohn Baldwin dsw, &foff, &object); 14857077c426SJohn Baldwin dev_relthread(cdev, ref); 14867077c426SJohn Baldwin break; 14877077c426SJohn Baldwin } 14887077c426SJohn Baldwin case OBJT_VNODE: 14897077c426SJohn Baldwin error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 14907077c426SJohn Baldwin handle, &foff, &object, &writecounted); 14917077c426SJohn Baldwin break; 14927077c426SJohn Baldwin case OBJT_DEFAULT: 14937077c426SJohn Baldwin if (handle == NULL) { 14947077c426SJohn Baldwin error = 0; 14957077c426SJohn Baldwin break; 14967077c426SJohn Baldwin } 14977077c426SJohn Baldwin /* FALLTHROUGH */ 14987077c426SJohn Baldwin default: 14997077c426SJohn Baldwin error = EINVAL; 15007077c426SJohn Baldwin break; 15017077c426SJohn Baldwin } 15027077c426SJohn Baldwin if (error) 15037077c426SJohn Baldwin return (error); 15047077c426SJohn Baldwin 15057077c426SJohn Baldwin error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 15067077c426SJohn Baldwin foff, writecounted, td); 15077077c426SJohn Baldwin if (error != 0 && object != NULL) { 15087077c426SJohn Baldwin /* 15097077c426SJohn Baldwin * If this mapping was accounted for in the vnode's 15107077c426SJohn Baldwin * writecount, then undo that now. 15117077c426SJohn Baldwin */ 15127077c426SJohn Baldwin if (writecounted) 1513fe7bcbafSKyle Evans vm_pager_release_writecount(object, 0, size); 15147077c426SJohn Baldwin vm_object_deallocate(object); 15157077c426SJohn Baldwin } 15167077c426SJohn Baldwin return (error); 15177077c426SJohn Baldwin } 15187077c426SJohn Baldwin 15197077c426SJohn Baldwin int 152067a659d2SKonstantin Belousov kern_mmap_racct_check(struct thread *td, vm_map_t map, vm_size_t size) 15217077c426SJohn Baldwin { 152267a659d2SKonstantin Belousov int error; 1523df8bae1dSRodney W. Grimes 15242554f86aSMateusz Guzik RACCT_PROC_LOCK(td->td_proc); 15252554f86aSMateusz Guzik if (map->size + size > lim_cur(td, RLIMIT_VMEM)) { 15262554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 1527070f64feSMatthew Dillon return (ENOMEM); 1528070f64feSMatthew Dillon } 1529a6492969SAlan Cox if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 15302554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 15311ba5ad42SEdward Tomasz Napierala return (ENOMEM); 15321ba5ad42SEdward Tomasz Napierala } 15337e19eda4SAndrey Zonov if (!old_mlock && map->flags & MAP_WIREFUTURE) { 15343ac7d297SAndrey Zonov if (ptoa(pmap_wired_count(map->pmap)) + size > 15352554f86aSMateusz Guzik lim_cur(td, RLIMIT_MEMLOCK)) { 153667a659d2SKonstantin Belousov racct_set_force(td->td_proc, RACCT_VMEM, map->size); 15372554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 15387e19eda4SAndrey Zonov return (ENOMEM); 15397e19eda4SAndrey Zonov } 15407e19eda4SAndrey Zonov error = racct_set(td->td_proc, RACCT_MEMLOCK, 15413ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap)) + size); 15427e19eda4SAndrey Zonov if (error != 0) { 154367a659d2SKonstantin Belousov racct_set_force(td->td_proc, RACCT_VMEM, map->size); 15442554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 15457e19eda4SAndrey Zonov return (error); 15467e19eda4SAndrey Zonov } 15477e19eda4SAndrey Zonov } 15482554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 154967a659d2SKonstantin Belousov return (0); 155067a659d2SKonstantin Belousov } 155167a659d2SKonstantin Belousov 155267a659d2SKonstantin Belousov /* 155367a659d2SKonstantin Belousov * Internal version of mmap that maps a specific VM object into an 155467a659d2SKonstantin Belousov * map. Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap. 155567a659d2SKonstantin Belousov */ 155667a659d2SKonstantin Belousov int 155767a659d2SKonstantin Belousov vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 155867a659d2SKonstantin Belousov vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff, 155967a659d2SKonstantin Belousov boolean_t writecounted, struct thread *td) 156067a659d2SKonstantin Belousov { 156167a659d2SKonstantin Belousov vm_offset_t max_addr; 156267a659d2SKonstantin Belousov int docow, error, findspace, rv; 156367a659d2SKonstantin Belousov bool curmap, fitit; 156467a659d2SKonstantin Belousov 156567a659d2SKonstantin Belousov curmap = map == &td->td_proc->p_vmspace->vm_map; 156667a659d2SKonstantin Belousov if (curmap) { 156767a659d2SKonstantin Belousov error = kern_mmap_racct_check(td, map, size); 156867a659d2SKonstantin Belousov if (error != 0) 156967a659d2SKonstantin Belousov return (error); 1570a6492969SAlan Cox } 1571070f64feSMatthew Dillon 1572df8bae1dSRodney W. Grimes /* 1573bc9ad247SDavid Greenman * We currently can only deal with page aligned file offsets. 15747077c426SJohn Baldwin * The mmap() system call already enforces this by subtracting 15757077c426SJohn Baldwin * the page offset from the file offset, but checking here 15767077c426SJohn Baldwin * catches errors in device drivers (e.g. d_single_mmap() 15777077c426SJohn Baldwin * callbacks) and other internal mapping requests (such as in 15787077c426SJohn Baldwin * exec). 1579bc9ad247SDavid Greenman */ 1580bc9ad247SDavid Greenman if (foff & PAGE_MASK) 1581bc9ad247SDavid Greenman return (EINVAL); 1582bc9ad247SDavid Greenman 158306cb7259SDavid Greenman if ((flags & MAP_FIXED) == 0) { 158406cb7259SDavid Greenman fitit = TRUE; 158506cb7259SDavid Greenman *addr = round_page(*addr); 158606cb7259SDavid Greenman } else { 158706cb7259SDavid Greenman if (*addr != trunc_page(*addr)) 158806cb7259SDavid Greenman return (EINVAL); 158906cb7259SDavid Greenman fitit = FALSE; 159006cb7259SDavid Greenman } 159184110e7eSKonstantin Belousov 15925f55e841SDavid Greenman if (flags & MAP_ANON) { 15937077c426SJohn Baldwin if (object != NULL || foff != 0) 15947077c426SJohn Baldwin return (EINVAL); 1595c8daea13SAlexander Kabaev docow = 0; 159674ffb9afSAlan Cox } else if (flags & MAP_PREFAULT_READ) 159774ffb9afSAlan Cox docow = MAP_PREFAULT; 159874ffb9afSAlan Cox else 15994738fa09SAlan Cox docow = MAP_PREFAULT_PARTIAL; 1600df8bae1dSRodney W. Grimes 16014f79d873SMatthew Dillon if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 16024738fa09SAlan Cox docow |= MAP_COPY_ON_WRITE; 16034f79d873SMatthew Dillon if (flags & MAP_NOSYNC) 16044f79d873SMatthew Dillon docow |= MAP_DISABLE_SYNCER; 16059730a5daSPaul Saab if (flags & MAP_NOCORE) 16069730a5daSPaul Saab docow |= MAP_DISABLE_COREDUMP; 16078211bd45SKonstantin Belousov /* Shared memory is also shared with children. */ 16088211bd45SKonstantin Belousov if (flags & MAP_SHARED) 16098211bd45SKonstantin Belousov docow |= MAP_INHERIT_SHARE; 161084110e7eSKonstantin Belousov if (writecounted) 1611fe7bcbafSKyle Evans docow |= MAP_WRITECOUNT; 16124648ba0aSKonstantin Belousov if (flags & MAP_STACK) { 16134648ba0aSKonstantin Belousov if (object != NULL) 16144648ba0aSKonstantin Belousov return (EINVAL); 16154648ba0aSKonstantin Belousov docow |= MAP_STACK_GROWS_DOWN; 16164648ba0aSKonstantin Belousov } 161711c42bccSKonstantin Belousov if ((flags & MAP_EXCL) != 0) 161811c42bccSKonstantin Belousov docow |= MAP_CHECK_EXCL; 161919bd0d9cSKonstantin Belousov if ((flags & MAP_GUARD) != 0) 162019bd0d9cSKonstantin Belousov docow |= MAP_CREATE_GUARD; 16215850152dSJohn Dyson 16224648ba0aSKonstantin Belousov if (fitit) { 16235aa60b6fSJohn Baldwin if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER) 16245aa60b6fSJohn Baldwin findspace = VMFS_SUPER_SPACE; 16255aa60b6fSJohn Baldwin else if ((flags & MAP_ALIGNMENT_MASK) != 0) 16265aa60b6fSJohn Baldwin findspace = VMFS_ALIGNED_SPACE(flags >> 16275aa60b6fSJohn Baldwin MAP_ALIGNMENT_SHIFT); 16282267af78SJulian Elischer else 16295aa60b6fSJohn Baldwin findspace = VMFS_OPTIMAL_SPACE; 16306a97a3f7SKonstantin Belousov max_addr = 0; 1631edb572a3SJohn Baldwin #ifdef MAP_32BIT 16326a97a3f7SKonstantin Belousov if ((flags & MAP_32BIT) != 0) 16336a97a3f7SKonstantin Belousov max_addr = MAP_32BIT_MAX_ADDR; 1634edb572a3SJohn Baldwin #endif 16356a97a3f7SKonstantin Belousov if (curmap) { 16366a97a3f7SKonstantin Belousov rv = vm_map_find_min(map, object, foff, addr, size, 16376a97a3f7SKonstantin Belousov round_page((vm_offset_t)td->td_proc->p_vmspace-> 16386a97a3f7SKonstantin Belousov vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr, 16396a97a3f7SKonstantin Belousov findspace, prot, maxprot, docow); 16406a97a3f7SKonstantin Belousov } else { 16416a97a3f7SKonstantin Belousov rv = vm_map_find(map, object, foff, addr, size, 16426a97a3f7SKonstantin Belousov max_addr, findspace, prot, maxprot, docow); 16436a97a3f7SKonstantin Belousov } 16444648ba0aSKonstantin Belousov } else { 1645b8ca4ef2SAlan Cox rv = vm_map_fixed(map, object, foff, *addr, size, 1646bd7e5f99SJohn Dyson prot, maxprot, docow); 16474648ba0aSKonstantin Belousov } 1648bd7e5f99SJohn Dyson 1649f9230ad6SAlan Cox if (rv == KERN_SUCCESS) { 16507fb0c17eSDavid Greenman /* 1651f9230ad6SAlan Cox * If the process has requested that all future mappings 1652f9230ad6SAlan Cox * be wired, then heed this. 1653f9230ad6SAlan Cox */ 165454a3a114SMark Johnston if ((map->flags & MAP_WIREFUTURE) != 0) { 165554a3a114SMark Johnston vm_map_lock(map); 165654a3a114SMark Johnston if ((map->flags & MAP_WIREFUTURE) != 0) 16578cd6a80dSMark Johnston (void)vm_map_wire_locked(map, *addr, 165854a3a114SMark Johnston *addr + size, VM_MAP_WIRE_USER | 165954a3a114SMark Johnston ((flags & MAP_STACK) ? VM_MAP_WIRE_HOLESOK : 166054a3a114SMark Johnston VM_MAP_WIRE_NOHOLES)); 166154a3a114SMark Johnston vm_map_unlock(map); 16621472f4f4SKonstantin Belousov } 1663df8bae1dSRodney W. Grimes } 16642e32165cSKonstantin Belousov return (vm_mmap_to_errno(rv)); 16652e32165cSKonstantin Belousov } 16662e32165cSKonstantin Belousov 1667f9230ad6SAlan Cox /* 1668f9230ad6SAlan Cox * Translate a Mach VM return code to zero on success or the appropriate errno 1669f9230ad6SAlan Cox * on failure. 1670f9230ad6SAlan Cox */ 16712e32165cSKonstantin Belousov int 16722e32165cSKonstantin Belousov vm_mmap_to_errno(int rv) 16732e32165cSKonstantin Belousov { 16742e32165cSKonstantin Belousov 1675df8bae1dSRodney W. Grimes switch (rv) { 1676df8bae1dSRodney W. Grimes case KERN_SUCCESS: 1677df8bae1dSRodney W. Grimes return (0); 1678df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 1679df8bae1dSRodney W. Grimes case KERN_NO_SPACE: 1680df8bae1dSRodney W. Grimes return (ENOMEM); 1681df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 1682df8bae1dSRodney W. Grimes return (EACCES); 1683df8bae1dSRodney W. Grimes default: 1684df8bae1dSRodney W. Grimes return (EINVAL); 1685df8bae1dSRodney W. Grimes } 1686df8bae1dSRodney W. Grimes } 1687