160727d8bSWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1988 University of Utah. 5df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 6df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 7df8bae1dSRodney W. Grimes * 8df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 9df8bae1dSRodney W. Grimes * the Systems Programming Group of the University of Utah Computer 10df8bae1dSRodney W. Grimes * Science Department. 11df8bae1dSRodney W. Grimes * 12df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 13df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 14df8bae1dSRodney W. Grimes * are met: 15df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 17df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 18df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 19df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 20fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 21df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 22df8bae1dSRodney W. Grimes * without specific prior written permission. 23df8bae1dSRodney W. Grimes * 24df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34df8bae1dSRodney W. Grimes * SUCH DAMAGE. 35df8bae1dSRodney W. Grimes * 36df8bae1dSRodney W. Grimes * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 37df8bae1dSRodney W. Grimes * 38df8bae1dSRodney W. Grimes * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 39df8bae1dSRodney W. Grimes */ 40df8bae1dSRodney W. Grimes 41df8bae1dSRodney W. Grimes /* 42df8bae1dSRodney W. Grimes * Mapped file (mmap) interface to VM 43df8bae1dSRodney W. Grimes */ 44df8bae1dSRodney W. Grimes 45874651b1SDavid E. O'Brien #include <sys/cdefs.h> 46874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 47874651b1SDavid E. O'Brien 4849874f6eSJoseph Koshy #include "opt_hwpmc_hooks.h" 493d653db0SAlan Cox #include "opt_vm.h" 50e9822d92SJoerg Wunsch 51df8bae1dSRodney W. Grimes #include <sys/param.h> 52df8bae1dSRodney W. Grimes #include <sys/systm.h> 534a144410SRobert Watson #include <sys/capsicum.h> 54a9d2f8d8SRobert Watson #include <sys/kernel.h> 55fb919e4dSMark Murray #include <sys/lock.h> 5623955314SAlfred Perlstein #include <sys/mutex.h> 57d2d3e875SBruce Evans #include <sys/sysproto.h> 585dc7e31aSKonstantin Belousov #include <sys/elf.h> 59df8bae1dSRodney W. Grimes #include <sys/filedesc.h> 60acd3428bSRobert Watson #include <sys/priv.h> 61df8bae1dSRodney W. Grimes #include <sys/proc.h> 6255648840SJohn Baldwin #include <sys/procctl.h> 631ba5ad42SEdward Tomasz Napierala #include <sys/racct.h> 64070f64feSMatthew Dillon #include <sys/resource.h> 65070f64feSMatthew Dillon #include <sys/resourcevar.h> 6689f6b863SAttilio Rao #include <sys/rwlock.h> 677e19eda4SAndrey Zonov #include <sys/sysctl.h> 68df8bae1dSRodney W. Grimes #include <sys/vnode.h> 693ac4d1efSBruce Evans #include <sys/fcntl.h> 70df8bae1dSRodney W. Grimes #include <sys/file.h> 71df8bae1dSRodney W. Grimes #include <sys/mman.h> 72b483c7f6SGuido van Rooij #include <sys/mount.h> 73df8bae1dSRodney W. Grimes #include <sys/conf.h> 744183b6b6SPeter Wemm #include <sys/stat.h> 7555648840SJohn Baldwin #include <sys/syscallsubr.h> 76497a8238SKonstantin Belousov #include <sys/sysent.h> 77efeaf95aSDavid Greenman #include <sys/vmmeter.h> 78a7f67facSKonstantin Belousov #if defined(__amd64__) || defined(__i386__) /* for i386_read_exec */ 79a7f67facSKonstantin Belousov #include <machine/md_var.h> 80a7f67facSKonstantin Belousov #endif 81df8bae1dSRodney W. Grimes 8251d1f690SRobert Watson #include <security/audit/audit.h> 83aed55708SRobert Watson #include <security/mac/mac_framework.h> 84aed55708SRobert Watson 85df8bae1dSRodney W. Grimes #include <vm/vm.h> 86efeaf95aSDavid Greenman #include <vm/vm_param.h> 87efeaf95aSDavid Greenman #include <vm/pmap.h> 88efeaf95aSDavid Greenman #include <vm/vm_map.h> 89efeaf95aSDavid Greenman #include <vm/vm_object.h> 901c7c3c6aSMatthew Dillon #include <vm/vm_page.h> 91df8bae1dSRodney W. Grimes #include <vm/vm_pager.h> 92b5e8ce9fSBruce Evans #include <vm/vm_pageout.h> 93efeaf95aSDavid Greenman #include <vm/vm_extern.h> 94867a482dSJohn Dyson #include <vm/vm_page.h> 9584110e7eSKonstantin Belousov #include <vm/vnode_pager.h> 96df8bae1dSRodney W. Grimes 9749874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 9849874f6eSJoseph Koshy #include <sys/pmckern.h> 9949874f6eSJoseph Koshy #endif 10049874f6eSJoseph Koshy 1017e19eda4SAndrey Zonov int old_mlock = 0; 102af3b2549SHans Petter Selasky SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0, 1037e19eda4SAndrey Zonov "Do not apply RLIMIT_MEMLOCK on mlockall"); 1043fbc2e00SKonstantin Belousov static int mincore_mapped = 1; 1053fbc2e00SKonstantin Belousov SYSCTL_INT(_vm, OID_AUTO, mincore_mapped, CTLFLAG_RWTUN, &mincore_mapped, 0, 1063fbc2e00SKonstantin Belousov "mincore reports mappings, not residency"); 10774a1b66cSBrooks Davis static int imply_prot_max = 0; 10874a1b66cSBrooks Davis SYSCTL_INT(_vm, OID_AUTO, imply_prot_max, CTLFLAG_RWTUN, &imply_prot_max, 0, 10974a1b66cSBrooks Davis "Imply maximum page permissions in mmap() when none are specified"); 1107e19eda4SAndrey Zonov 111edb572a3SJohn Baldwin #ifdef MAP_32BIT 112edb572a3SJohn Baldwin #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) 113d2d3e875SBruce Evans #endif 1140d94caffSDavid Greenman 115edb572a3SJohn Baldwin #ifndef _SYS_SYSPROTO_H_ 116edb572a3SJohn Baldwin struct sbrk_args { 117edb572a3SJohn Baldwin int incr; 118edb572a3SJohn Baldwin }; 119edb572a3SJohn Baldwin #endif 120edb572a3SJohn Baldwin 121df8bae1dSRodney W. Grimes int 12204e89ffbSKonstantin Belousov sys_sbrk(struct thread *td, struct sbrk_args *uap) 123df8bae1dSRodney W. Grimes { 124df8bae1dSRodney W. Grimes /* Not yet implemented */ 125df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 126df8bae1dSRodney W. Grimes } 127df8bae1dSRodney W. Grimes 128d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 129df8bae1dSRodney W. Grimes struct sstk_args { 130df8bae1dSRodney W. Grimes int incr; 131df8bae1dSRodney W. Grimes }; 132d2d3e875SBruce Evans #endif 1330d94caffSDavid Greenman 134df8bae1dSRodney W. Grimes int 13504e89ffbSKonstantin Belousov sys_sstk(struct thread *td, struct sstk_args *uap) 136df8bae1dSRodney W. Grimes { 137df8bae1dSRodney W. Grimes /* Not yet implemented */ 138df8bae1dSRodney W. Grimes return (EOPNOTSUPP); 139df8bae1dSRodney W. Grimes } 140df8bae1dSRodney W. Grimes 1411930e303SPoul-Henning Kamp #if defined(COMPAT_43) 142df8bae1dSRodney W. Grimes int 143d48719bdSBrooks Davis ogetpagesize(struct thread *td, struct ogetpagesize_args *uap) 144df8bae1dSRodney W. Grimes { 14504e89ffbSKonstantin Belousov 146b40ce416SJulian Elischer td->td_retval[0] = PAGE_SIZE; 147df8bae1dSRodney W. Grimes return (0); 148df8bae1dSRodney W. Grimes } 1491930e303SPoul-Henning Kamp #endif /* COMPAT_43 */ 150df8bae1dSRodney W. Grimes 15154f42e4bSPeter Wemm 15254f42e4bSPeter Wemm /* 15354f42e4bSPeter Wemm * Memory Map (mmap) system call. Note that the file offset 15454f42e4bSPeter Wemm * and address are allowed to be NOT page aligned, though if 15554f42e4bSPeter Wemm * the MAP_FIXED flag it set, both must have the same remainder 15654f42e4bSPeter Wemm * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 15754f42e4bSPeter Wemm * page-aligned, the actual mapping starts at trunc_page(addr) 15854f42e4bSPeter Wemm * and the return value is adjusted up by the page offset. 159b4309055SMatthew Dillon * 160b4309055SMatthew Dillon * Generally speaking, only character devices which are themselves 161b4309055SMatthew Dillon * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 162b4309055SMatthew Dillon * there would be no cache coherency between a descriptor and a VM mapping 163b4309055SMatthew Dillon * both to the same character device. 16454f42e4bSPeter Wemm */ 165d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 166df8bae1dSRodney W. Grimes struct mmap_args { 167651bb817SAlexander Langer void *addr; 168df8bae1dSRodney W. Grimes size_t len; 169df8bae1dSRodney W. Grimes int prot; 170df8bae1dSRodney W. Grimes int flags; 171df8bae1dSRodney W. Grimes int fd; 172df8bae1dSRodney W. Grimes long pad; 173df8bae1dSRodney W. Grimes off_t pos; 174df8bae1dSRodney W. Grimes }; 175d2d3e875SBruce Evans #endif 176df8bae1dSRodney W. Grimes 177df8bae1dSRodney W. Grimes int 17869cdfcefSEdward Tomasz Napierala sys_mmap(struct thread *td, struct mmap_args *uap) 17969cdfcefSEdward Tomasz Napierala { 18069cdfcefSEdward Tomasz Napierala 181496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot, 182496ab053SKonstantin Belousov uap->flags, uap->fd, uap->pos)); 18369cdfcefSEdward Tomasz Napierala } 18469cdfcefSEdward Tomasz Napierala 18569cdfcefSEdward Tomasz Napierala int 1865dc7e31aSKonstantin Belousov kern_mmap_maxprot(struct proc *p, int prot) 1875dc7e31aSKonstantin Belousov { 1885dc7e31aSKonstantin Belousov 1895dc7e31aSKonstantin Belousov if ((p->p_flag2 & P2_PROTMAX_DISABLE) != 0 || 1905dc7e31aSKonstantin Belousov (p->p_fctl0 & NT_FREEBSD_FCTL_PROTMAX_DISABLE) != 0) 1915dc7e31aSKonstantin Belousov return (_PROT_ALL); 1925dc7e31aSKonstantin Belousov if (((p->p_flag2 & P2_PROTMAX_ENABLE) != 0 || imply_prot_max) && 1935dc7e31aSKonstantin Belousov prot != PROT_NONE) 1945dc7e31aSKonstantin Belousov return (prot); 1955dc7e31aSKonstantin Belousov return (_PROT_ALL); 1965dc7e31aSKonstantin Belousov } 1975dc7e31aSKonstantin Belousov 1985dc7e31aSKonstantin Belousov int 19977555b84SDoug Moore kern_mmap(struct thread *td, uintptr_t addr0, size_t len, int prot, int flags, 200496ab053SKonstantin Belousov int fd, off_t pos) 201df8bae1dSRodney W. Grimes { 202496ab053SKonstantin Belousov struct vmspace *vms; 203c8daea13SAlexander Kabaev struct file *fp; 20437306951SKonstantin Belousov struct proc *p; 205496ab053SKonstantin Belousov vm_offset_t addr; 20677555b84SDoug Moore vm_size_t pageoff, size; 2077077c426SJohn Baldwin vm_prot_t cap_maxprot; 20874a1b66cSBrooks Davis int align, error, max_prot; 209a9d2f8d8SRobert Watson cap_rights_t rights; 210df8bae1dSRodney W. Grimes 21174a1b66cSBrooks Davis if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) 21274a1b66cSBrooks Davis return (EINVAL); 21374a1b66cSBrooks Davis max_prot = PROT_MAX_EXTRACT(prot); 21474a1b66cSBrooks Davis prot = PROT_EXTRACT(prot); 21574a1b66cSBrooks Davis if (max_prot != 0 && (max_prot & prot) != prot) 21674a1b66cSBrooks Davis return (EINVAL); 21737306951SKonstantin Belousov 21837306951SKonstantin Belousov p = td->td_proc; 21937306951SKonstantin Belousov 22074a1b66cSBrooks Davis /* 22174a1b66cSBrooks Davis * Always honor PROT_MAX if set. If not, default to all 22274a1b66cSBrooks Davis * permissions unless we're implying maximum permissions. 22374a1b66cSBrooks Davis */ 22474a1b66cSBrooks Davis if (max_prot == 0) 2255dc7e31aSKonstantin Belousov max_prot = kern_mmap_maxprot(p, prot); 22674a1b66cSBrooks Davis 22737306951SKonstantin Belousov vms = p->p_vmspace; 228426da3bcSAlfred Perlstein fp = NULL; 22969cdfcefSEdward Tomasz Napierala AUDIT_ARG_FD(fd); 230496ab053SKonstantin Belousov addr = addr0; 23127bfa958SSimon L. B. Nielsen 2327707ccabSKonstantin Belousov /* 2335817298fSJohn Baldwin * Ignore old flags that used to be defined but did not do anything. 2345817298fSJohn Baldwin */ 2355817298fSJohn Baldwin flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040); 2365817298fSJohn Baldwin 2375817298fSJohn Baldwin /* 2387707ccabSKonstantin Belousov * Enforce the constraints. 2397707ccabSKonstantin Belousov * Mapping of length 0 is only allowed for old binaries. 2407707ccabSKonstantin Belousov * Anonymous mapping shall specify -1 as filedescriptor and 2417707ccabSKonstantin Belousov * zero position for new code. Be nice to ancient a.out 2427707ccabSKonstantin Belousov * binaries and correct pos for anonymous mapping, since old 2437707ccabSKonstantin Belousov * ld.so sometimes issues anonymous map requests with non-zero 2447707ccabSKonstantin Belousov * pos. 2457707ccabSKonstantin Belousov */ 2467707ccabSKonstantin Belousov if (!SV_CURPROC_FLAG(SV_AOUT)) { 24737306951SKonstantin Belousov if ((len == 0 && p->p_osrel >= P_OSREL_MAP_ANON) || 24869cdfcefSEdward Tomasz Napierala ((flags & MAP_ANON) != 0 && (fd != -1 || pos != 0))) 249df8bae1dSRodney W. Grimes return (EINVAL); 2507707ccabSKonstantin Belousov } else { 2517707ccabSKonstantin Belousov if ((flags & MAP_ANON) != 0) 2527707ccabSKonstantin Belousov pos = 0; 2537707ccabSKonstantin Belousov } 2549154ee6aSPeter Wemm 2552267af78SJulian Elischer if (flags & MAP_STACK) { 25669cdfcefSEdward Tomasz Napierala if ((fd != -1) || 2572267af78SJulian Elischer ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) 2582267af78SJulian Elischer return (EINVAL); 2592267af78SJulian Elischer flags |= MAP_ANON; 2602267af78SJulian Elischer pos = 0; 2612907af2aSJulian Elischer } 2625817298fSJohn Baldwin if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE | 2635817298fSJohn Baldwin MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE | 26419bd0d9cSKonstantin Belousov MAP_PREFAULT_READ | MAP_GUARD | 2655fd3f8b3SJohn Baldwin #ifdef MAP_32BIT 2665fd3f8b3SJohn Baldwin MAP_32BIT | 2675fd3f8b3SJohn Baldwin #endif 2685fd3f8b3SJohn Baldwin MAP_ALIGNMENT_MASK)) != 0) 2695fd3f8b3SJohn Baldwin return (EINVAL); 27011c42bccSKonstantin Belousov if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL) 27111c42bccSKonstantin Belousov return (EINVAL); 27210204535SKonstantin Belousov if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED | MAP_PRIVATE)) 2735fd3f8b3SJohn Baldwin return (EINVAL); 2745fd3f8b3SJohn Baldwin if (prot != PROT_NONE && 2755fd3f8b3SJohn Baldwin (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) 2765fd3f8b3SJohn Baldwin return (EINVAL); 27719bd0d9cSKonstantin Belousov if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 || 27860221a57SAlan Cox pos != 0 || (flags & ~(MAP_FIXED | MAP_GUARD | MAP_EXCL | 279633d3b1cSKonstantin Belousov #ifdef MAP_32BIT 280633d3b1cSKonstantin Belousov MAP_32BIT | 281633d3b1cSKonstantin Belousov #endif 282633d3b1cSKonstantin Belousov MAP_ALIGNMENT_MASK)) != 0)) 28319bd0d9cSKonstantin Belousov return (EINVAL); 2842907af2aSJulian Elischer 2859154ee6aSPeter Wemm /* 28654f42e4bSPeter Wemm * Align the file position to a page boundary, 28754f42e4bSPeter Wemm * and save its page offset component. 2889154ee6aSPeter Wemm */ 28954f42e4bSPeter Wemm pageoff = (pos & PAGE_MASK); 29054f42e4bSPeter Wemm pos -= pageoff; 29154f42e4bSPeter Wemm 29277555b84SDoug Moore /* Compute size from len by rounding (on both ends). */ 29377555b84SDoug Moore size = len + pageoff; /* low end... */ 29497220a27SDoug Moore size = round_page(size); /* hi end */ 29577555b84SDoug Moore /* Check for rounding up to zero. */ 296f8c8b2e8SDoug Moore if (len > size) 29777555b84SDoug Moore return (ENOMEM); 2989154ee6aSPeter Wemm 2995aa60b6fSJohn Baldwin /* Ensure alignment is at least a page and fits in a pointer. */ 3005aa60b6fSJohn Baldwin align = flags & MAP_ALIGNMENT_MASK; 3015aa60b6fSJohn Baldwin if (align != 0 && align != MAP_ALIGNED_SUPER && 3025aa60b6fSJohn Baldwin (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY || 3035aa60b6fSJohn Baldwin align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT)) 3045aa60b6fSJohn Baldwin return (EINVAL); 3055aa60b6fSJohn Baldwin 306df8bae1dSRodney W. Grimes /* 3070d94caffSDavid Greenman * Check for illegal addresses. Watch out for address wrap... Note 3080d94caffSDavid Greenman * that VM_*_ADDRESS are not constants due to casts (argh). 309df8bae1dSRodney W. Grimes */ 310df8bae1dSRodney W. Grimes if (flags & MAP_FIXED) { 31154f42e4bSPeter Wemm /* 31254f42e4bSPeter Wemm * The specified address must have the same remainder 31354f42e4bSPeter Wemm * as the file offset taken modulo PAGE_SIZE, so it 31454f42e4bSPeter Wemm * should be aligned after adjustment by pageoff. 31554f42e4bSPeter Wemm */ 31654f42e4bSPeter Wemm addr -= pageoff; 31754f42e4bSPeter Wemm if (addr & PAGE_MASK) 31854f42e4bSPeter Wemm return (EINVAL); 31927bfa958SSimon L. B. Nielsen 32054f42e4bSPeter Wemm /* Address range must be all in user VM space. */ 32105ba50f5SJake Burkholder if (addr < vm_map_min(&vms->vm_map) || 32205ba50f5SJake Burkholder addr + size > vm_map_max(&vms->vm_map)) 323df8bae1dSRodney W. Grimes return (EINVAL); 324bbc0ec52SDavid Greenman if (addr + size < addr) 325df8bae1dSRodney W. Grimes return (EINVAL); 326edb572a3SJohn Baldwin #ifdef MAP_32BIT 327edb572a3SJohn Baldwin if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR) 328edb572a3SJohn Baldwin return (EINVAL); 329edb572a3SJohn Baldwin } else if (flags & MAP_32BIT) { 330edb572a3SJohn Baldwin /* 331edb572a3SJohn Baldwin * For MAP_32BIT, override the hint if it is too high and 332edb572a3SJohn Baldwin * do not bother moving the mapping past the heap (since 333edb572a3SJohn Baldwin * the heap is usually above 2GB). 334edb572a3SJohn Baldwin */ 335edb572a3SJohn Baldwin if (addr + size > MAP_32BIT_MAX_ADDR) 336edb572a3SJohn Baldwin addr = 0; 337edb572a3SJohn Baldwin #endif 33891d5354aSJohn Baldwin } else { 339df8bae1dSRodney W. Grimes /* 34054f42e4bSPeter Wemm * XXX for non-fixed mappings where no hint is provided or 34154f42e4bSPeter Wemm * the hint would fall in the potential heap space, 34254f42e4bSPeter Wemm * place it after the end of the largest possible heap. 343df8bae1dSRodney W. Grimes * 34454f42e4bSPeter Wemm * There should really be a pmap call to determine a reasonable 34554f42e4bSPeter Wemm * location. 346df8bae1dSRodney W. Grimes */ 34791d5354aSJohn Baldwin if (addr == 0 || 3481f6889a1SMatthew Dillon (addr >= round_page((vm_offset_t)vms->vm_taddr) && 349c460ac3aSPeter Wemm addr < round_page((vm_offset_t)vms->vm_daddr + 350cd336badSMateusz Guzik lim_max(td, RLIMIT_DATA)))) 351c460ac3aSPeter Wemm addr = round_page((vm_offset_t)vms->vm_daddr + 352cd336badSMateusz Guzik lim_max(td, RLIMIT_DATA)); 35391d5354aSJohn Baldwin } 35477555b84SDoug Moore if (len == 0) { 3557077c426SJohn Baldwin /* 3567077c426SJohn Baldwin * Return success without mapping anything for old 3577077c426SJohn Baldwin * binaries that request a page-aligned mapping of 3587077c426SJohn Baldwin * length 0. For modern binaries, this function 3597077c426SJohn Baldwin * returns an error earlier. 3607077c426SJohn Baldwin */ 3617077c426SJohn Baldwin error = 0; 36219bd0d9cSKonstantin Belousov } else if ((flags & MAP_GUARD) != 0) { 36319bd0d9cSKonstantin Belousov error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE, 36419bd0d9cSKonstantin Belousov VM_PROT_NONE, flags, NULL, pos, FALSE, td); 36519bd0d9cSKonstantin Belousov } else if ((flags & MAP_ANON) != 0) { 366df8bae1dSRodney W. Grimes /* 367df8bae1dSRodney W. Grimes * Mapping blank space is trivial. 3687077c426SJohn Baldwin * 3697077c426SJohn Baldwin * This relies on VM_PROT_* matching PROT_*. 370df8bae1dSRodney W. Grimes */ 3717077c426SJohn Baldwin error = vm_mmap_object(&vms->vm_map, &addr, size, prot, 37274a1b66cSBrooks Davis max_prot, flags, NULL, pos, FALSE, td); 37330d4dd7eSAlexander Kabaev } else { 374df8bae1dSRodney W. Grimes /* 375a9d2f8d8SRobert Watson * Mapping file, get fp for validation and don't let the 376a9d2f8d8SRobert Watson * descriptor disappear on us if we block. Check capability 377a9d2f8d8SRobert Watson * rights, but also return the maximum rights to be combined 378a9d2f8d8SRobert Watson * with maxprot later. 379df8bae1dSRodney W. Grimes */ 3807008be5bSPawel Jakub Dawidek cap_rights_init(&rights, CAP_MMAP); 381a9d2f8d8SRobert Watson if (prot & PROT_READ) 3827008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_R); 383a9d2f8d8SRobert Watson if ((flags & MAP_SHARED) != 0) { 384a9d2f8d8SRobert Watson if (prot & PROT_WRITE) 3857008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_W); 386a9d2f8d8SRobert Watson } 387a9d2f8d8SRobert Watson if (prot & PROT_EXEC) 3887008be5bSPawel Jakub Dawidek cap_rights_set(&rights, CAP_MMAP_X); 38969cdfcefSEdward Tomasz Napierala error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp); 3907008be5bSPawel Jakub Dawidek if (error != 0) 391426da3bcSAlfred Perlstein goto done; 39210204535SKonstantin Belousov if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 && 39337306951SKonstantin Belousov p->p_osrel >= P_OSREL_MAP_FSTRICT) { 39410204535SKonstantin Belousov error = EINVAL; 39510204535SKonstantin Belousov goto done; 39610204535SKonstantin Belousov } 3975fd3f8b3SJohn Baldwin 3985fd3f8b3SJohn Baldwin /* This relies on VM_PROT_* matching PROT_*. */ 3997077c426SJohn Baldwin error = fo_mmap(fp, &vms->vm_map, &addr, size, prot, 40074a1b66cSBrooks Davis max_prot & cap_maxprot, flags, pos, td); 40149874f6eSJoseph Koshy } 4027077c426SJohn Baldwin 403df8bae1dSRodney W. Grimes if (error == 0) 404b40ce416SJulian Elischer td->td_retval[0] = (register_t) (addr + pageoff); 405279d7226SMatthew Dillon done: 406279d7226SMatthew Dillon if (fp) 407b40ce416SJulian Elischer fdrop(fp, td); 408f6b5b182SJeff Roberson 409df8bae1dSRodney W. Grimes return (error); 410df8bae1dSRodney W. Grimes } 411df8bae1dSRodney W. Grimes 4120538aafcSKonstantin Belousov #if defined(COMPAT_FREEBSD6) 413c2815ad5SPeter Wemm int 414c2815ad5SPeter Wemm freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) 415c2815ad5SPeter Wemm { 416c2815ad5SPeter Wemm 417496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot, 418496ab053SKonstantin Belousov uap->flags, uap->fd, uap->pos)); 419c2815ad5SPeter Wemm } 4200538aafcSKonstantin Belousov #endif 421c2815ad5SPeter Wemm 42205f0fdd2SPoul-Henning Kamp #ifdef COMPAT_43 423d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 42405f0fdd2SPoul-Henning Kamp struct ommap_args { 42505f0fdd2SPoul-Henning Kamp caddr_t addr; 42605f0fdd2SPoul-Henning Kamp int len; 42705f0fdd2SPoul-Henning Kamp int prot; 42805f0fdd2SPoul-Henning Kamp int flags; 42905f0fdd2SPoul-Henning Kamp int fd; 43005f0fdd2SPoul-Henning Kamp long pos; 43105f0fdd2SPoul-Henning Kamp }; 432d2d3e875SBruce Evans #endif 43305f0fdd2SPoul-Henning Kamp int 43469cdfcefSEdward Tomasz Napierala ommap(struct thread *td, struct ommap_args *uap) 43505f0fdd2SPoul-Henning Kamp { 43605f0fdd2SPoul-Henning Kamp static const char cvtbsdprot[8] = { 43705f0fdd2SPoul-Henning Kamp 0, 43805f0fdd2SPoul-Henning Kamp PROT_EXEC, 43905f0fdd2SPoul-Henning Kamp PROT_WRITE, 44005f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE, 44105f0fdd2SPoul-Henning Kamp PROT_READ, 44205f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_READ, 44305f0fdd2SPoul-Henning Kamp PROT_WRITE | PROT_READ, 44405f0fdd2SPoul-Henning Kamp PROT_EXEC | PROT_WRITE | PROT_READ, 44505f0fdd2SPoul-Henning Kamp }; 44669cdfcefSEdward Tomasz Napierala int flags, prot; 4470d94caffSDavid Greenman 44805f0fdd2SPoul-Henning Kamp #define OMAP_ANON 0x0002 44905f0fdd2SPoul-Henning Kamp #define OMAP_COPY 0x0020 45005f0fdd2SPoul-Henning Kamp #define OMAP_SHARED 0x0010 45105f0fdd2SPoul-Henning Kamp #define OMAP_FIXED 0x0100 45205f0fdd2SPoul-Henning Kamp 45369cdfcefSEdward Tomasz Napierala prot = cvtbsdprot[uap->prot & 0x7]; 4545dddee2dSKonstantin Belousov #if (defined(COMPAT_FREEBSD32) && defined(__amd64__)) || defined(__i386__) 455ee4116b8SKonstantin Belousov if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && 45669cdfcefSEdward Tomasz Napierala prot != 0) 45769cdfcefSEdward Tomasz Napierala prot |= PROT_EXEC; 458ee4116b8SKonstantin Belousov #endif 45969cdfcefSEdward Tomasz Napierala flags = 0; 46005f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_ANON) 46169cdfcefSEdward Tomasz Napierala flags |= MAP_ANON; 46205f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_COPY) 46369cdfcefSEdward Tomasz Napierala flags |= MAP_COPY; 46405f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_SHARED) 46569cdfcefSEdward Tomasz Napierala flags |= MAP_SHARED; 46605f0fdd2SPoul-Henning Kamp else 46769cdfcefSEdward Tomasz Napierala flags |= MAP_PRIVATE; 46805f0fdd2SPoul-Henning Kamp if (uap->flags & OMAP_FIXED) 46969cdfcefSEdward Tomasz Napierala flags |= MAP_FIXED; 470496ab053SKonstantin Belousov return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, flags, 471496ab053SKonstantin Belousov uap->fd, uap->pos)); 47205f0fdd2SPoul-Henning Kamp } 47305f0fdd2SPoul-Henning Kamp #endif /* COMPAT_43 */ 47405f0fdd2SPoul-Henning Kamp 47505f0fdd2SPoul-Henning Kamp 476d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 477df8bae1dSRodney W. Grimes struct msync_args { 478651bb817SAlexander Langer void *addr; 479c899450bSPeter Wemm size_t len; 480e6c6af11SDavid Greenman int flags; 481df8bae1dSRodney W. Grimes }; 482d2d3e875SBruce Evans #endif 483df8bae1dSRodney W. Grimes int 48469cdfcefSEdward Tomasz Napierala sys_msync(struct thread *td, struct msync_args *uap) 485df8bae1dSRodney W. Grimes { 48669cdfcefSEdward Tomasz Napierala 487496ab053SKonstantin Belousov return (kern_msync(td, (uintptr_t)uap->addr, uap->len, uap->flags)); 48869cdfcefSEdward Tomasz Napierala } 48969cdfcefSEdward Tomasz Napierala 49069cdfcefSEdward Tomasz Napierala int 491496ab053SKonstantin Belousov kern_msync(struct thread *td, uintptr_t addr0, size_t size, int flags) 49269cdfcefSEdward Tomasz Napierala { 493496ab053SKonstantin Belousov vm_offset_t addr; 49469cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 495df8bae1dSRodney W. Grimes vm_map_t map; 496df8bae1dSRodney W. Grimes int rv; 497df8bae1dSRodney W. Grimes 498496ab053SKonstantin Belousov addr = addr0; 499dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 500dabee6feSPeter Wemm addr -= pageoff; 501dabee6feSPeter Wemm size += pageoff; 502dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5039154ee6aSPeter Wemm if (addr + size < addr) 504dabee6feSPeter Wemm return (EINVAL); 505dabee6feSPeter Wemm 506dabee6feSPeter Wemm if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 5071e62bc63SDavid Greenman return (EINVAL); 5081e62bc63SDavid Greenman 509b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 5109154ee6aSPeter Wemm 511df8bae1dSRodney W. Grimes /* 512df8bae1dSRodney W. Grimes * Clean the pages and interpret the return value. 513df8bae1dSRodney W. Grimes */ 514950f8459SAlan Cox rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, 515e6c6af11SDavid Greenman (flags & MS_INVALIDATE) != 0); 516df8bae1dSRodney W. Grimes switch (rv) { 517df8bae1dSRodney W. Grimes case KERN_SUCCESS: 518d2c60af8SMatthew Dillon return (0); 519df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 520e103f5b1SPeter Holm return (ENOMEM); 521b7b7cd44SAlan Cox case KERN_INVALID_ARGUMENT: 522b7b7cd44SAlan Cox return (EBUSY); 523126d6082SKonstantin Belousov case KERN_FAILURE: 524126d6082SKonstantin Belousov return (EIO); 525df8bae1dSRodney W. Grimes default: 526df8bae1dSRodney W. Grimes return (EINVAL); 527df8bae1dSRodney W. Grimes } 528df8bae1dSRodney W. Grimes } 529df8bae1dSRodney W. Grimes 530d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 531df8bae1dSRodney W. Grimes struct munmap_args { 532651bb817SAlexander Langer void *addr; 5339154ee6aSPeter Wemm size_t len; 534df8bae1dSRodney W. Grimes }; 535d2d3e875SBruce Evans #endif 536df8bae1dSRodney W. Grimes int 53769cdfcefSEdward Tomasz Napierala sys_munmap(struct thread *td, struct munmap_args *uap) 53869cdfcefSEdward Tomasz Napierala { 53969cdfcefSEdward Tomasz Napierala 540496ab053SKonstantin Belousov return (kern_munmap(td, (uintptr_t)uap->addr, uap->len)); 54169cdfcefSEdward Tomasz Napierala } 54269cdfcefSEdward Tomasz Napierala 54369cdfcefSEdward Tomasz Napierala int 544496ab053SKonstantin Belousov kern_munmap(struct thread *td, uintptr_t addr0, size_t size) 545df8bae1dSRodney W. Grimes { 54649874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 54749874f6eSJoseph Koshy struct pmckern_map_out pkm; 54849874f6eSJoseph Koshy vm_map_entry_t entry; 549736ff8c3SMateusz Guzik bool pmc_handled; 55049874f6eSJoseph Koshy #endif 551496ab053SKonstantin Belousov vm_offset_t addr; 55269cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 553df8bae1dSRodney W. Grimes vm_map_t map; 554df8bae1dSRodney W. Grimes 555d8834602SAlan Cox if (size == 0) 556d8834602SAlan Cox return (EINVAL); 557dabee6feSPeter Wemm 558496ab053SKonstantin Belousov addr = addr0; 559dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 560dabee6feSPeter Wemm addr -= pageoff; 561dabee6feSPeter Wemm size += pageoff; 562dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 5639154ee6aSPeter Wemm if (addr + size < addr) 564df8bae1dSRodney W. Grimes return (EINVAL); 5659154ee6aSPeter Wemm 566df8bae1dSRodney W. Grimes /* 56705ba50f5SJake Burkholder * Check for illegal addresses. Watch out for address wrap... 568df8bae1dSRodney W. Grimes */ 569b40ce416SJulian Elischer map = &td->td_proc->p_vmspace->vm_map; 57005ba50f5SJake Burkholder if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) 57105ba50f5SJake Burkholder return (EINVAL); 572d8834602SAlan Cox vm_map_lock(map); 57349874f6eSJoseph Koshy #ifdef HWPMC_HOOKS 574736ff8c3SMateusz Guzik pmc_handled = false; 575736ff8c3SMateusz Guzik if (PMC_HOOK_INSTALLED(PMC_FN_MUNMAP)) { 576736ff8c3SMateusz Guzik pmc_handled = true; 57749874f6eSJoseph Koshy /* 57849874f6eSJoseph Koshy * Inform hwpmc if the address range being unmapped contains 57949874f6eSJoseph Koshy * an executable region. 58049874f6eSJoseph Koshy */ 5810d419640SRyan Stone pkm.pm_address = (uintptr_t) NULL; 58249874f6eSJoseph Koshy if (vm_map_lookup_entry(map, addr, &entry)) { 5831c5196c3SKonstantin Belousov for (; entry->start < addr + size; 5847cdcf863SDoug Moore entry = vm_map_entry_succ(entry)) { 58549874f6eSJoseph Koshy if (vm_map_check_protection(map, entry->start, 58649874f6eSJoseph Koshy entry->end, VM_PROT_EXECUTE) == TRUE) { 58749874f6eSJoseph Koshy pkm.pm_address = (uintptr_t) addr; 58849874f6eSJoseph Koshy pkm.pm_size = (size_t) size; 58949874f6eSJoseph Koshy break; 59049874f6eSJoseph Koshy } 59149874f6eSJoseph Koshy } 59249874f6eSJoseph Koshy } 593736ff8c3SMateusz Guzik } 59449874f6eSJoseph Koshy #endif 595655c3490SKonstantin Belousov vm_map_delete(map, addr, addr + size); 5960d419640SRyan Stone 5970d419640SRyan Stone #ifdef HWPMC_HOOKS 598736ff8c3SMateusz Guzik if (__predict_false(pmc_handled)) { 5990d419640SRyan Stone /* downgrade the lock to prevent a LOR with the pmc-sx lock */ 6000d419640SRyan Stone vm_map_lock_downgrade(map); 601d473d3a1SRyan Stone if (pkm.pm_address != (uintptr_t) NULL) 6020d419640SRyan Stone PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); 6030d419640SRyan Stone vm_map_unlock_read(map); 604736ff8c3SMateusz Guzik } else 6050d419640SRyan Stone #endif 606736ff8c3SMateusz Guzik vm_map_unlock(map); 607736ff8c3SMateusz Guzik 6080d419640SRyan Stone /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ 609df8bae1dSRodney W. Grimes return (0); 610df8bae1dSRodney W. Grimes } 611df8bae1dSRodney W. Grimes 612d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 613df8bae1dSRodney W. Grimes struct mprotect_args { 614651bb817SAlexander Langer const void *addr; 6159154ee6aSPeter Wemm size_t len; 616df8bae1dSRodney W. Grimes int prot; 617df8bae1dSRodney W. Grimes }; 618d2d3e875SBruce Evans #endif 619df8bae1dSRodney W. Grimes int 62069cdfcefSEdward Tomasz Napierala sys_mprotect(struct thread *td, struct mprotect_args *uap) 621df8bae1dSRodney W. Grimes { 622df8bae1dSRodney W. Grimes 623496ab053SKonstantin Belousov return (kern_mprotect(td, (uintptr_t)uap->addr, uap->len, uap->prot)); 62469cdfcefSEdward Tomasz Napierala } 625df8bae1dSRodney W. Grimes 62669cdfcefSEdward Tomasz Napierala int 627496ab053SKonstantin Belousov kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot) 62869cdfcefSEdward Tomasz Napierala { 629496ab053SKonstantin Belousov vm_offset_t addr; 63069cdfcefSEdward Tomasz Napierala vm_size_t pageoff; 63174a1b66cSBrooks Davis int vm_error, max_prot; 63269cdfcefSEdward Tomasz Napierala 633496ab053SKonstantin Belousov addr = addr0; 63474a1b66cSBrooks Davis if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) 63574a1b66cSBrooks Davis return (EINVAL); 63674a1b66cSBrooks Davis max_prot = PROT_MAX_EXTRACT(prot); 63774a1b66cSBrooks Davis prot = PROT_EXTRACT(prot); 638dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 639dabee6feSPeter Wemm addr -= pageoff; 640dabee6feSPeter Wemm size += pageoff; 641dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6426e1d2cf6SKonstantin Belousov #ifdef COMPAT_FREEBSD32 6436e1d2cf6SKonstantin Belousov if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 6446e1d2cf6SKonstantin Belousov if (((addr + size) & 0xffffffff) < addr) 6456e1d2cf6SKonstantin Belousov return (EINVAL); 6466e1d2cf6SKonstantin Belousov } else 6476e1d2cf6SKonstantin Belousov #endif 6489154ee6aSPeter Wemm if (addr + size < addr) 649dabee6feSPeter Wemm return (EINVAL); 650dabee6feSPeter Wemm 65174a1b66cSBrooks Davis vm_error = KERN_SUCCESS; 65274a1b66cSBrooks Davis if (max_prot != 0) { 65374a1b66cSBrooks Davis if ((max_prot & prot) != prot) 65474a1b66cSBrooks Davis return (EINVAL); 65574a1b66cSBrooks Davis vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map, 65674a1b66cSBrooks Davis addr, addr + size, max_prot, TRUE); 65774a1b66cSBrooks Davis } 65874a1b66cSBrooks Davis if (vm_error == KERN_SUCCESS) 65974a1b66cSBrooks Davis vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map, 66074a1b66cSBrooks Davis addr, addr + size, prot, FALSE); 66174a1b66cSBrooks Davis 66274a1b66cSBrooks Davis switch (vm_error) { 663df8bae1dSRodney W. Grimes case KERN_SUCCESS: 664df8bae1dSRodney W. Grimes return (0); 665df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 666df8bae1dSRodney W. Grimes return (EACCES); 6673364c323SKonstantin Belousov case KERN_RESOURCE_SHORTAGE: 6683364c323SKonstantin Belousov return (ENOMEM); 669df8bae1dSRodney W. Grimes } 670df8bae1dSRodney W. Grimes return (EINVAL); 671df8bae1dSRodney W. Grimes } 672df8bae1dSRodney W. Grimes 673d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 674dabee6feSPeter Wemm struct minherit_args { 675651bb817SAlexander Langer void *addr; 6769154ee6aSPeter Wemm size_t len; 677dabee6feSPeter Wemm int inherit; 678dabee6feSPeter Wemm }; 679dabee6feSPeter Wemm #endif 680dabee6feSPeter Wemm int 68104e89ffbSKonstantin Belousov sys_minherit(struct thread *td, struct minherit_args *uap) 682dabee6feSPeter Wemm { 683dabee6feSPeter Wemm vm_offset_t addr; 684dabee6feSPeter Wemm vm_size_t size, pageoff; 68554d92145SMatthew Dillon vm_inherit_t inherit; 686dabee6feSPeter Wemm 687dabee6feSPeter Wemm addr = (vm_offset_t)uap->addr; 6889154ee6aSPeter Wemm size = uap->len; 689dabee6feSPeter Wemm inherit = uap->inherit; 690dabee6feSPeter Wemm 691dabee6feSPeter Wemm pageoff = (addr & PAGE_MASK); 692dabee6feSPeter Wemm addr -= pageoff; 693dabee6feSPeter Wemm size += pageoff; 694dabee6feSPeter Wemm size = (vm_size_t) round_page(size); 6959154ee6aSPeter Wemm if (addr + size < addr) 696dabee6feSPeter Wemm return (EINVAL); 697dabee6feSPeter Wemm 698e0be79afSAlan Cox switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, 699e0be79afSAlan Cox addr + size, inherit)) { 700dabee6feSPeter Wemm case KERN_SUCCESS: 701dabee6feSPeter Wemm return (0); 702dabee6feSPeter Wemm case KERN_PROTECTION_FAILURE: 703dabee6feSPeter Wemm return (EACCES); 704dabee6feSPeter Wemm } 705dabee6feSPeter Wemm return (EINVAL); 706dabee6feSPeter Wemm } 707dabee6feSPeter Wemm 708dabee6feSPeter Wemm #ifndef _SYS_SYSPROTO_H_ 709df8bae1dSRodney W. Grimes struct madvise_args { 710651bb817SAlexander Langer void *addr; 7119154ee6aSPeter Wemm size_t len; 712df8bae1dSRodney W. Grimes int behav; 713df8bae1dSRodney W. Grimes }; 714d2d3e875SBruce Evans #endif 7150d94caffSDavid Greenman 716df8bae1dSRodney W. Grimes int 71704e89ffbSKonstantin Belousov sys_madvise(struct thread *td, struct madvise_args *uap) 718df8bae1dSRodney W. Grimes { 71969cdfcefSEdward Tomasz Napierala 720496ab053SKonstantin Belousov return (kern_madvise(td, (uintptr_t)uap->addr, uap->len, uap->behav)); 72169cdfcefSEdward Tomasz Napierala } 72269cdfcefSEdward Tomasz Napierala 72369cdfcefSEdward Tomasz Napierala int 724496ab053SKonstantin Belousov kern_madvise(struct thread *td, uintptr_t addr0, size_t len, int behav) 72569cdfcefSEdward Tomasz Napierala { 72605ba50f5SJake Burkholder vm_map_t map; 727496ab053SKonstantin Belousov vm_offset_t addr, end, start; 72855648840SJohn Baldwin int flags; 729b4309055SMatthew Dillon 730b4309055SMatthew Dillon /* 731f4cf2141SWes Peters * Check for our special case, advising the swap pager we are 732f4cf2141SWes Peters * "immortal." 733f4cf2141SWes Peters */ 73469cdfcefSEdward Tomasz Napierala if (behav == MADV_PROTECT) { 73555648840SJohn Baldwin flags = PPROT_SET; 73655648840SJohn Baldwin return (kern_procctl(td, P_PID, td->td_proc->p_pid, 73755648840SJohn Baldwin PROC_SPROTECT, &flags)); 73869297bf8SJohn Baldwin } 73955648840SJohn Baldwin 740f4cf2141SWes Peters /* 741867a482dSJohn Dyson * Check for illegal addresses. Watch out for address wrap... Note 742867a482dSJohn Dyson * that VM_*_ADDRESS are not constants due to casts (argh). 743867a482dSJohn Dyson */ 74405ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 745496ab053SKonstantin Belousov addr = addr0; 74669cdfcefSEdward Tomasz Napierala if (addr < vm_map_min(map) || addr + len > vm_map_max(map)) 747867a482dSJohn Dyson return (EINVAL); 74869cdfcefSEdward Tomasz Napierala if ((addr + len) < addr) 749867a482dSJohn Dyson return (EINVAL); 750867a482dSJohn Dyson 751867a482dSJohn Dyson /* 752867a482dSJohn Dyson * Since this routine is only advisory, we default to conservative 753867a482dSJohn Dyson * behavior. 754867a482dSJohn Dyson */ 75569cdfcefSEdward Tomasz Napierala start = trunc_page(addr); 75669cdfcefSEdward Tomasz Napierala end = round_page(addr + len); 757867a482dSJohn Dyson 7583e7cb27cSAlan Cox /* 7593e7cb27cSAlan Cox * vm_map_madvise() checks for illegal values of behav. 7603e7cb27cSAlan Cox */ 7613e7cb27cSAlan Cox return (vm_map_madvise(map, start, end, behav)); 762df8bae1dSRodney W. Grimes } 763df8bae1dSRodney W. Grimes 764d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 765df8bae1dSRodney W. Grimes struct mincore_args { 766651bb817SAlexander Langer const void *addr; 7679154ee6aSPeter Wemm size_t len; 768df8bae1dSRodney W. Grimes char *vec; 769df8bae1dSRodney W. Grimes }; 770d2d3e875SBruce Evans #endif 7710d94caffSDavid Greenman 772df8bae1dSRodney W. Grimes int 77304e89ffbSKonstantin Belousov sys_mincore(struct thread *td, struct mincore_args *uap) 774df8bae1dSRodney W. Grimes { 77546dc8e9dSDmitry Chagin 77646dc8e9dSDmitry Chagin return (kern_mincore(td, (uintptr_t)uap->addr, uap->len, uap->vec)); 77746dc8e9dSDmitry Chagin } 77846dc8e9dSDmitry Chagin 77946dc8e9dSDmitry Chagin int 78046dc8e9dSDmitry Chagin kern_mincore(struct thread *td, uintptr_t addr0, size_t len, char *vec) 78146dc8e9dSDmitry Chagin { 782867a482dSJohn Dyson pmap_t pmap; 783867a482dSJohn Dyson vm_map_t map; 78401cef4caSMark Johnston vm_map_entry_t current, entry; 785567e51e1SAlan Cox vm_object_t object; 78601cef4caSMark Johnston vm_offset_t addr, cend, end, first_addr; 78701cef4caSMark Johnston vm_paddr_t pa; 788567e51e1SAlan Cox vm_page_t m; 789567e51e1SAlan Cox vm_pindex_t pindex; 79001cef4caSMark Johnston int error, lastvecindex, mincoreinfo, vecindex; 791dd2622a8SAlan Cox unsigned int timestamp; 792df8bae1dSRodney W. Grimes 793867a482dSJohn Dyson /* 794867a482dSJohn Dyson * Make sure that the addresses presented are valid for user 795867a482dSJohn Dyson * mode. 796867a482dSJohn Dyson */ 79746dc8e9dSDmitry Chagin first_addr = addr = trunc_page(addr0); 798d0c9294bSMark Johnston end = round_page(addr0 + len); 79905ba50f5SJake Burkholder map = &td->td_proc->p_vmspace->vm_map; 80005ba50f5SJake Burkholder if (end > vm_map_max(map) || end < addr) 801455dd7d4SKonstantin Belousov return (ENOMEM); 80202c04a2fSJohn Dyson 803b40ce416SJulian Elischer pmap = vmspace_pmap(td->td_proc->p_vmspace); 804867a482dSJohn Dyson 805eff50fcdSAlan Cox vm_map_lock_read(map); 806dd2622a8SAlan Cox RestartScan: 807dd2622a8SAlan Cox timestamp = map->timestamp; 808867a482dSJohn Dyson 809455dd7d4SKonstantin Belousov if (!vm_map_lookup_entry(map, addr, &entry)) { 810455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 811455dd7d4SKonstantin Belousov return (ENOMEM); 812455dd7d4SKonstantin Belousov } 813867a482dSJohn Dyson 814867a482dSJohn Dyson /* 815867a482dSJohn Dyson * Do this on a map entry basis so that if the pages are not 816867a482dSJohn Dyson * in the current processes address space, we can easily look 817867a482dSJohn Dyson * up the pages elsewhere. 818867a482dSJohn Dyson */ 819867a482dSJohn Dyson lastvecindex = -1; 8207cdcf863SDoug Moore while (entry->start < end) { 821867a482dSJohn Dyson 822867a482dSJohn Dyson /* 823455dd7d4SKonstantin Belousov * check for contiguity 824455dd7d4SKonstantin Belousov */ 8257cdcf863SDoug Moore current = entry; 8267cdcf863SDoug Moore entry = vm_map_entry_succ(current); 8277cdcf863SDoug Moore if (current->end < end && 8287cdcf863SDoug Moore entry->start > current->end) { 829455dd7d4SKonstantin Belousov vm_map_unlock_read(map); 830455dd7d4SKonstantin Belousov return (ENOMEM); 831455dd7d4SKonstantin Belousov } 832455dd7d4SKonstantin Belousov 833455dd7d4SKonstantin Belousov /* 834867a482dSJohn Dyson * ignore submaps (for now) or null objects 835867a482dSJohn Dyson */ 8369fdfe602SMatthew Dillon if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || 837867a482dSJohn Dyson current->object.vm_object == NULL) 838867a482dSJohn Dyson continue; 839867a482dSJohn Dyson 840867a482dSJohn Dyson /* 841867a482dSJohn Dyson * limit this scan to the current map entry and the 842867a482dSJohn Dyson * limits for the mincore call 843867a482dSJohn Dyson */ 844867a482dSJohn Dyson if (addr < current->start) 845867a482dSJohn Dyson addr = current->start; 846867a482dSJohn Dyson cend = current->end; 847867a482dSJohn Dyson if (cend > end) 848867a482dSJohn Dyson cend = end; 849867a482dSJohn Dyson 85001cef4caSMark Johnston for (; addr < cend; addr += PAGE_SIZE) { 851867a482dSJohn Dyson /* 852867a482dSJohn Dyson * Check pmap first, it is likely faster, also 853867a482dSJohn Dyson * it can provide info as to whether we are the 854867a482dSJohn Dyson * one referencing or modifying the page. 855867a482dSJohn Dyson */ 856567e51e1SAlan Cox m = NULL; 85701cef4caSMark Johnston object = NULL; 85801cef4caSMark Johnston retry: 85901cef4caSMark Johnston pa = 0; 86001cef4caSMark Johnston mincoreinfo = pmap_mincore(pmap, addr, &pa); 8613fbc2e00SKonstantin Belousov if (mincore_mapped) { 8623fbc2e00SKonstantin Belousov /* 8633fbc2e00SKonstantin Belousov * We only care about this pmap's 8643fbc2e00SKonstantin Belousov * mapping of the page, if any. 8653fbc2e00SKonstantin Belousov */ 86601cef4caSMark Johnston ; 86701cef4caSMark Johnston } else if (pa != 0) { 868867a482dSJohn Dyson /* 869567e51e1SAlan Cox * The page is mapped by this process but not 870567e51e1SAlan Cox * both accessed and modified. It is also 871567e51e1SAlan Cox * managed. Acquire the object lock so that 87201cef4caSMark Johnston * other mappings might be examined. The page's 87301cef4caSMark Johnston * identity may change at any point before its 87401cef4caSMark Johnston * object lock is acquired, so re-validate if 87501cef4caSMark Johnston * necessary. 876867a482dSJohn Dyson */ 87701cef4caSMark Johnston m = PHYS_TO_VM_PAGE(pa); 87801cef4caSMark Johnston while (object == NULL || m->object != object) { 879567e51e1SAlan Cox if (object != NULL) 88089f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 88101cef4caSMark Johnston object = (vm_object_t)atomic_load_ptr( 88201cef4caSMark Johnston &m->object); 88301cef4caSMark Johnston if (object == NULL) 884567e51e1SAlan Cox goto retry; 88501cef4caSMark Johnston VM_OBJECT_WLOCK(object); 886567e51e1SAlan Cox } 88701cef4caSMark Johnston if (pa != pmap_extract(pmap, addr)) 88801cef4caSMark Johnston goto retry; 8890012f373SJeff Roberson KASSERT(vm_page_all_valid(m), 890567e51e1SAlan Cox ("mincore: page %p is mapped but invalid", 891567e51e1SAlan Cox m)); 892567e51e1SAlan Cox } else if (mincoreinfo == 0) { 893567e51e1SAlan Cox /* 894567e51e1SAlan Cox * The page is not mapped by this process. If 895567e51e1SAlan Cox * the object implements managed pages, then 896567e51e1SAlan Cox * determine if the page is resident so that 897567e51e1SAlan Cox * the mappings might be examined. 898567e51e1SAlan Cox */ 899567e51e1SAlan Cox if (current->object.vm_object != object) { 900567e51e1SAlan Cox if (object != NULL) 90189f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 902567e51e1SAlan Cox object = current->object.vm_object; 90389f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 904567e51e1SAlan Cox } 905567e51e1SAlan Cox if (object->type == OBJT_DEFAULT || 906567e51e1SAlan Cox object->type == OBJT_SWAP || 907567e51e1SAlan Cox object->type == OBJT_VNODE) { 908567e51e1SAlan Cox pindex = OFF_TO_IDX(current->offset + 909567e51e1SAlan Cox (addr - current->start)); 910567e51e1SAlan Cox m = vm_page_lookup(object, pindex); 9110012f373SJeff Roberson if (m != NULL && vm_page_none_valid(m)) 912567e51e1SAlan Cox m = NULL; 913567e51e1SAlan Cox if (m != NULL) 914567e51e1SAlan Cox mincoreinfo = MINCORE_INCORE; 915567e51e1SAlan Cox } 916567e51e1SAlan Cox } 917567e51e1SAlan Cox if (m != NULL) { 91801cef4caSMark Johnston VM_OBJECT_ASSERT_WLOCKED(m->object); 91901cef4caSMark Johnston 92001cef4caSMark Johnston /* Examine other mappings of the page. */ 921567e51e1SAlan Cox if (m->dirty == 0 && pmap_is_modified(m)) 922567e51e1SAlan Cox vm_page_dirty(m); 923567e51e1SAlan Cox if (m->dirty != 0) 924867a482dSJohn Dyson mincoreinfo |= MINCORE_MODIFIED_OTHER; 92501cef4caSMark Johnston 926c46b90e9SAlan Cox /* 9273407fefeSKonstantin Belousov * The first test for PGA_REFERENCED is an 928c46b90e9SAlan Cox * optimization. The second test is 929c46b90e9SAlan Cox * required because a concurrent pmap 930c46b90e9SAlan Cox * operation could clear the last reference 9313407fefeSKonstantin Belousov * and set PGA_REFERENCED before the call to 932c46b90e9SAlan Cox * pmap_is_referenced(). 933c46b90e9SAlan Cox */ 934e8bcf696SMark Johnston if ((m->aflags & PGA_REFERENCED) != 0 || 935c46b90e9SAlan Cox pmap_is_referenced(m) || 936e8bcf696SMark Johnston (m->aflags & PGA_REFERENCED) != 0) 937867a482dSJohn Dyson mincoreinfo |= MINCORE_REFERENCED_OTHER; 9389b5a5d81SJohn Dyson } 939567e51e1SAlan Cox if (object != NULL) 94089f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 941867a482dSJohn Dyson 942867a482dSJohn Dyson /* 943dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 944dd2622a8SAlan Cox * the map, we release the lock. 945dd2622a8SAlan Cox */ 946dd2622a8SAlan Cox vm_map_unlock_read(map); 947dd2622a8SAlan Cox 948dd2622a8SAlan Cox /* 949867a482dSJohn Dyson * calculate index into user supplied byte vector 950867a482dSJohn Dyson */ 951d1780e8dSKonstantin Belousov vecindex = atop(addr - first_addr); 952867a482dSJohn Dyson 953867a482dSJohn Dyson /* 954867a482dSJohn Dyson * If we have skipped map entries, we need to make sure that 955867a482dSJohn Dyson * the byte vector is zeroed for those skipped entries. 956867a482dSJohn Dyson */ 957867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 9586a87d217SJohn Baldwin ++lastvecindex; 959867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 960867a482dSJohn Dyson if (error) { 961d2c60af8SMatthew Dillon error = EFAULT; 962d2c60af8SMatthew Dillon goto done2; 963867a482dSJohn Dyson } 964867a482dSJohn Dyson } 965867a482dSJohn Dyson 966867a482dSJohn Dyson /* 967867a482dSJohn Dyson * Pass the page information to the user 968867a482dSJohn Dyson */ 969867a482dSJohn Dyson error = subyte(vec + vecindex, mincoreinfo); 970867a482dSJohn Dyson if (error) { 971d2c60af8SMatthew Dillon error = EFAULT; 972d2c60af8SMatthew Dillon goto done2; 973867a482dSJohn Dyson } 974dd2622a8SAlan Cox 975dd2622a8SAlan Cox /* 976dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 977dd2622a8SAlan Cox * output may be invalid. 978dd2622a8SAlan Cox */ 979dd2622a8SAlan Cox vm_map_lock_read(map); 980dd2622a8SAlan Cox if (timestamp != map->timestamp) 981dd2622a8SAlan Cox goto RestartScan; 982dd2622a8SAlan Cox 983867a482dSJohn Dyson lastvecindex = vecindex; 98402c04a2fSJohn Dyson } 985867a482dSJohn Dyson } 986867a482dSJohn Dyson 987867a482dSJohn Dyson /* 988dd2622a8SAlan Cox * subyte may page fault. In case it needs to modify 989dd2622a8SAlan Cox * the map, we release the lock. 990dd2622a8SAlan Cox */ 991dd2622a8SAlan Cox vm_map_unlock_read(map); 992dd2622a8SAlan Cox 993dd2622a8SAlan Cox /* 994867a482dSJohn Dyson * Zero the last entries in the byte vector. 995867a482dSJohn Dyson */ 996d1780e8dSKonstantin Belousov vecindex = atop(end - first_addr); 997867a482dSJohn Dyson while ((lastvecindex + 1) < vecindex) { 9986a87d217SJohn Baldwin ++lastvecindex; 999867a482dSJohn Dyson error = subyte(vec + lastvecindex, 0); 1000867a482dSJohn Dyson if (error) { 1001d2c60af8SMatthew Dillon error = EFAULT; 1002d2c60af8SMatthew Dillon goto done2; 1003867a482dSJohn Dyson } 1004867a482dSJohn Dyson } 1005867a482dSJohn Dyson 1006dd2622a8SAlan Cox /* 1007dd2622a8SAlan Cox * If the map has changed, due to the subyte, the previous 1008dd2622a8SAlan Cox * output may be invalid. 1009dd2622a8SAlan Cox */ 1010dd2622a8SAlan Cox vm_map_lock_read(map); 1011dd2622a8SAlan Cox if (timestamp != map->timestamp) 1012dd2622a8SAlan Cox goto RestartScan; 1013eff50fcdSAlan Cox vm_map_unlock_read(map); 1014d2c60af8SMatthew Dillon done2: 1015d2c60af8SMatthew Dillon return (error); 1016df8bae1dSRodney W. Grimes } 1017df8bae1dSRodney W. Grimes 1018d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 1019df8bae1dSRodney W. Grimes struct mlock_args { 1020651bb817SAlexander Langer const void *addr; 1021df8bae1dSRodney W. Grimes size_t len; 1022df8bae1dSRodney W. Grimes }; 1023d2d3e875SBruce Evans #endif 1024df8bae1dSRodney W. Grimes int 102504e89ffbSKonstantin Belousov sys_mlock(struct thread *td, struct mlock_args *uap) 1026df8bae1dSRodney W. Grimes { 1027995d7069SGleb Smirnoff 1028496ab053SKonstantin Belousov return (kern_mlock(td->td_proc, td->td_ucred, 1029496ab053SKonstantin Belousov __DECONST(uintptr_t, uap->addr), uap->len)); 1030995d7069SGleb Smirnoff } 1031995d7069SGleb Smirnoff 1032995d7069SGleb Smirnoff int 1033496ab053SKonstantin Belousov kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len) 1034995d7069SGleb Smirnoff { 1035bb734798SDon Lewis vm_offset_t addr, end, last, start; 1036bb734798SDon Lewis vm_size_t npages, size; 10373ac7d297SAndrey Zonov vm_map_t map; 10381ba5ad42SEdward Tomasz Napierala unsigned long nsize; 1039bb734798SDon Lewis int error; 1040df8bae1dSRodney W. Grimes 1041cc426dd3SMateusz Guzik error = priv_check_cred(cred, PRIV_VM_MLOCK); 104247934cefSDon Lewis if (error) 104347934cefSDon Lewis return (error); 1044496ab053SKonstantin Belousov addr = addr0; 1045995d7069SGleb Smirnoff size = len; 1046bb734798SDon Lewis last = addr + size; 104716929939SDon Lewis start = trunc_page(addr); 1048bb734798SDon Lewis end = round_page(last); 1049bb734798SDon Lewis if (last < addr || end < addr) 1050df8bae1dSRodney W. Grimes return (EINVAL); 105116929939SDon Lewis npages = atop(end - start); 105254a3a114SMark Johnston if (npages > vm_page_max_user_wired) 105316929939SDon Lewis return (ENOMEM); 10543ac7d297SAndrey Zonov map = &proc->p_vmspace->vm_map; 105547934cefSDon Lewis PROC_LOCK(proc); 10563ac7d297SAndrey Zonov nsize = ptoa(npages + pmap_wired_count(map->pmap)); 1057f6f6d240SMateusz Guzik if (nsize > lim_cur_proc(proc, RLIMIT_MEMLOCK)) { 105847934cefSDon Lewis PROC_UNLOCK(proc); 10594a40e3d4SJohn Dyson return (ENOMEM); 106091d5354aSJohn Baldwin } 106147934cefSDon Lewis PROC_UNLOCK(proc); 1062afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10634b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 10641ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 10651ba5ad42SEdward Tomasz Napierala error = racct_set(proc, RACCT_MEMLOCK, nsize); 10661ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 10671ba5ad42SEdward Tomasz Napierala if (error != 0) 10681ba5ad42SEdward Tomasz Napierala return (ENOMEM); 10694b5c9cf6SEdward Tomasz Napierala } 1070afcc55f3SEdward Tomasz Napierala #endif 10713ac7d297SAndrey Zonov error = vm_map_wire(map, start, end, 107216929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1073afcc55f3SEdward Tomasz Napierala #ifdef RACCT 10744b5c9cf6SEdward Tomasz Napierala if (racct_enable && error != KERN_SUCCESS) { 10751ba5ad42SEdward Tomasz Napierala PROC_LOCK(proc); 10761ba5ad42SEdward Tomasz Napierala racct_set(proc, RACCT_MEMLOCK, 10773ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 10781ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(proc); 10791ba5ad42SEdward Tomasz Napierala } 1080afcc55f3SEdward Tomasz Napierala #endif 1081df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1082df8bae1dSRodney W. Grimes } 1083df8bae1dSRodney W. Grimes 1084d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_ 10854a40e3d4SJohn Dyson struct mlockall_args { 10864a40e3d4SJohn Dyson int how; 10874a40e3d4SJohn Dyson }; 10884a40e3d4SJohn Dyson #endif 10894a40e3d4SJohn Dyson 10904a40e3d4SJohn Dyson int 109104e89ffbSKonstantin Belousov sys_mlockall(struct thread *td, struct mlockall_args *uap) 10924a40e3d4SJohn Dyson { 1093abd498aaSBruce M Simpson vm_map_t map; 1094abd498aaSBruce M Simpson int error; 1095abd498aaSBruce M Simpson 1096abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 10977e19eda4SAndrey Zonov error = priv_check(td, PRIV_VM_MLOCK); 10987e19eda4SAndrey Zonov if (error) 10997e19eda4SAndrey Zonov return (error); 1100abd498aaSBruce M Simpson 1101abd498aaSBruce M Simpson if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) 1102abd498aaSBruce M Simpson return (EINVAL); 1103abd498aaSBruce M Simpson 1104abd498aaSBruce M Simpson /* 1105abd498aaSBruce M Simpson * If wiring all pages in the process would cause it to exceed 1106abd498aaSBruce M Simpson * a hard resource limit, return ENOMEM. 1107abd498aaSBruce M Simpson */ 11087e19eda4SAndrey Zonov if (!old_mlock && uap->how & MCL_CURRENT) { 11092554f86aSMateusz Guzik if (map->size > lim_cur(td, RLIMIT_MEMLOCK)) 1110abd498aaSBruce M Simpson return (ENOMEM); 111191d5354aSJohn Baldwin } 1112afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11134b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 11141ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11151ba5ad42SEdward Tomasz Napierala error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); 11161ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11171ba5ad42SEdward Tomasz Napierala if (error != 0) 11181ba5ad42SEdward Tomasz Napierala return (ENOMEM); 11194b5c9cf6SEdward Tomasz Napierala } 1120afcc55f3SEdward Tomasz Napierala #endif 1121abd498aaSBruce M Simpson 1122abd498aaSBruce M Simpson if (uap->how & MCL_FUTURE) { 1123abd498aaSBruce M Simpson vm_map_lock(map); 1124abd498aaSBruce M Simpson vm_map_modflags(map, MAP_WIREFUTURE, 0); 1125abd498aaSBruce M Simpson vm_map_unlock(map); 1126abd498aaSBruce M Simpson error = 0; 1127abd498aaSBruce M Simpson } 1128abd498aaSBruce M Simpson 1129abd498aaSBruce M Simpson if (uap->how & MCL_CURRENT) { 1130abd498aaSBruce M Simpson /* 1131abd498aaSBruce M Simpson * P1003.1-2001 mandates that all currently mapped pages 1132abd498aaSBruce M Simpson * will be memory resident and locked (wired) upon return 1133abd498aaSBruce M Simpson * from mlockall(). vm_map_wire() will wire pages, by 1134abd498aaSBruce M Simpson * calling vm_fault_wire() for each page in the region. 1135abd498aaSBruce M Simpson */ 1136abd498aaSBruce M Simpson error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), 1137abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 113854a3a114SMark Johnston if (error == KERN_SUCCESS) 113954a3a114SMark Johnston error = 0; 114054a3a114SMark Johnston else if (error == KERN_RESOURCE_SHORTAGE) 114154a3a114SMark Johnston error = ENOMEM; 114254a3a114SMark Johnston else 114354a3a114SMark Johnston error = EAGAIN; 1144abd498aaSBruce M Simpson } 1145afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11464b5c9cf6SEdward Tomasz Napierala if (racct_enable && error != KERN_SUCCESS) { 11471ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11481ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 11493ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 11501ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11511ba5ad42SEdward Tomasz Napierala } 1152afcc55f3SEdward Tomasz Napierala #endif 1153abd498aaSBruce M Simpson 1154abd498aaSBruce M Simpson return (error); 11554a40e3d4SJohn Dyson } 11564a40e3d4SJohn Dyson 11574a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1158fa721254SAlfred Perlstein struct munlockall_args { 1159abd498aaSBruce M Simpson register_t dummy; 11604a40e3d4SJohn Dyson }; 11614a40e3d4SJohn Dyson #endif 11624a40e3d4SJohn Dyson 11634a40e3d4SJohn Dyson int 116404e89ffbSKonstantin Belousov sys_munlockall(struct thread *td, struct munlockall_args *uap) 11654a40e3d4SJohn Dyson { 1166abd498aaSBruce M Simpson vm_map_t map; 1167abd498aaSBruce M Simpson int error; 1168abd498aaSBruce M Simpson 1169abd498aaSBruce M Simpson map = &td->td_proc->p_vmspace->vm_map; 1170acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 1171abd498aaSBruce M Simpson if (error) 1172abd498aaSBruce M Simpson return (error); 1173abd498aaSBruce M Simpson 1174abd498aaSBruce M Simpson /* Clear the MAP_WIREFUTURE flag from this vm_map. */ 1175abd498aaSBruce M Simpson vm_map_lock(map); 1176abd498aaSBruce M Simpson vm_map_modflags(map, 0, MAP_WIREFUTURE); 1177abd498aaSBruce M Simpson vm_map_unlock(map); 1178abd498aaSBruce M Simpson 1179abd498aaSBruce M Simpson /* Forcibly unwire all pages. */ 1180abd498aaSBruce M Simpson error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), 1181abd498aaSBruce M Simpson VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); 1182afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11834b5c9cf6SEdward Tomasz Napierala if (racct_enable && error == KERN_SUCCESS) { 11841ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11851ba5ad42SEdward Tomasz Napierala racct_set(td->td_proc, RACCT_MEMLOCK, 0); 11861ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11871ba5ad42SEdward Tomasz Napierala } 1188afcc55f3SEdward Tomasz Napierala #endif 1189abd498aaSBruce M Simpson 1190abd498aaSBruce M Simpson return (error); 11914a40e3d4SJohn Dyson } 11924a40e3d4SJohn Dyson 11934a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_ 1194df8bae1dSRodney W. Grimes struct munlock_args { 1195651bb817SAlexander Langer const void *addr; 1196df8bae1dSRodney W. Grimes size_t len; 1197df8bae1dSRodney W. Grimes }; 1198d2d3e875SBruce Evans #endif 1199df8bae1dSRodney W. Grimes int 120069cdfcefSEdward Tomasz Napierala sys_munlock(struct thread *td, struct munlock_args *uap) 1201df8bae1dSRodney W. Grimes { 120269cdfcefSEdward Tomasz Napierala 1203496ab053SKonstantin Belousov return (kern_munlock(td, (uintptr_t)uap->addr, uap->len)); 120469cdfcefSEdward Tomasz Napierala } 120569cdfcefSEdward Tomasz Napierala 120669cdfcefSEdward Tomasz Napierala int 1207496ab053SKonstantin Belousov kern_munlock(struct thread *td, uintptr_t addr0, size_t size) 120869cdfcefSEdward Tomasz Napierala { 1209496ab053SKonstantin Belousov vm_offset_t addr, end, last, start; 1210fc2b1679SJeremie Le Hen #ifdef RACCT 1211c92b5069SJeremie Le Hen vm_map_t map; 1212fc2b1679SJeremie Le Hen #endif 1213df8bae1dSRodney W. Grimes int error; 1214df8bae1dSRodney W. Grimes 1215acd3428bSRobert Watson error = priv_check(td, PRIV_VM_MUNLOCK); 121647934cefSDon Lewis if (error) 121747934cefSDon Lewis return (error); 1218496ab053SKonstantin Belousov addr = addr0; 1219bb734798SDon Lewis last = addr + size; 122016929939SDon Lewis start = trunc_page(addr); 1221bb734798SDon Lewis end = round_page(last); 1222bb734798SDon Lewis if (last < addr || end < addr) 1223df8bae1dSRodney W. Grimes return (EINVAL); 122416929939SDon Lewis error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, 122516929939SDon Lewis VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 1226afcc55f3SEdward Tomasz Napierala #ifdef RACCT 12274b5c9cf6SEdward Tomasz Napierala if (racct_enable && error == KERN_SUCCESS) { 12281ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 1229c92b5069SJeremie Le Hen map = &td->td_proc->p_vmspace->vm_map; 1230c92b5069SJeremie Le Hen racct_set(td->td_proc, RACCT_MEMLOCK, 1231c92b5069SJeremie Le Hen ptoa(pmap_wired_count(map->pmap))); 12321ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 12331ba5ad42SEdward Tomasz Napierala } 1234afcc55f3SEdward Tomasz Napierala #endif 1235df8bae1dSRodney W. Grimes return (error == KERN_SUCCESS ? 0 : ENOMEM); 1236df8bae1dSRodney W. Grimes } 1237df8bae1dSRodney W. Grimes 1238df8bae1dSRodney W. Grimes /* 1239c8daea13SAlexander Kabaev * vm_mmap_vnode() 1240c8daea13SAlexander Kabaev * 1241c8daea13SAlexander Kabaev * Helper function for vm_mmap. Perform sanity check specific for mmap 1242c8daea13SAlexander Kabaev * operations on vnodes. 1243c8daea13SAlexander Kabaev */ 1244c8daea13SAlexander Kabaev int 1245c8daea13SAlexander Kabaev vm_mmap_vnode(struct thread *td, vm_size_t objsize, 1246c8daea13SAlexander Kabaev vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, 124784110e7eSKonstantin Belousov struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, 124884110e7eSKonstantin Belousov boolean_t *writecounted) 1249c8daea13SAlexander Kabaev { 1250c8daea13SAlexander Kabaev struct vattr va; 1251c8daea13SAlexander Kabaev vm_object_t obj; 1252bd0e1bebSMark Johnston vm_ooffset_t foff; 12530359a12eSAttilio Rao struct ucred *cred; 125478022527SKonstantin Belousov int error, flags; 125578022527SKonstantin Belousov bool writex; 1256c8daea13SAlexander Kabaev 12570359a12eSAttilio Rao cred = td->td_ucred; 125878022527SKonstantin Belousov writex = (*maxprotp & VM_PROT_WRITE) != 0 && 125978022527SKonstantin Belousov (*flagsp & MAP_SHARED) != 0; 126078022527SKonstantin Belousov if ((error = vget(vp, LK_SHARED, td)) != 0) 1261c8daea13SAlexander Kabaev return (error); 12620df42647SRobert Watson AUDIT_ARG_VNODE1(vp); 126364345f0bSJohn Baldwin foff = *foffp; 1264c8daea13SAlexander Kabaev flags = *flagsp; 12658516dd18SPoul-Henning Kamp obj = vp->v_object; 1266c8daea13SAlexander Kabaev if (vp->v_type == VREG) { 1267c8daea13SAlexander Kabaev /* 1268c8daea13SAlexander Kabaev * Get the proper underlying object 1269c8daea13SAlexander Kabaev */ 12708516dd18SPoul-Henning Kamp if (obj == NULL) { 1271c8daea13SAlexander Kabaev error = EINVAL; 1272c8daea13SAlexander Kabaev goto done; 1273c8daea13SAlexander Kabaev } 1274e5f299ffSKonstantin Belousov if (obj->type == OBJT_VNODE && obj->handle != vp) { 1275c8daea13SAlexander Kabaev vput(vp); 1276c8daea13SAlexander Kabaev vp = (struct vnode *)obj->handle; 127784110e7eSKonstantin Belousov /* 127884110e7eSKonstantin Belousov * Bypass filesystems obey the mpsafety of the 127953f5f8a0SKonstantin Belousov * underlying fs. Tmpfs never bypasses. 128084110e7eSKonstantin Belousov */ 128178022527SKonstantin Belousov error = vget(vp, LK_SHARED, td); 12825050aa86SKonstantin Belousov if (error != 0) 128384110e7eSKonstantin Belousov return (error); 128484110e7eSKonstantin Belousov } 128578022527SKonstantin Belousov if (writex) { 128684110e7eSKonstantin Belousov *writecounted = TRUE; 1287fe7bcbafSKyle Evans vm_pager_update_writecount(obj, 0, objsize); 128884110e7eSKonstantin Belousov } 1289c8daea13SAlexander Kabaev } else { 1290c8daea13SAlexander Kabaev error = EINVAL; 1291c8daea13SAlexander Kabaev goto done; 1292c8daea13SAlexander Kabaev } 12930359a12eSAttilio Rao if ((error = VOP_GETATTR(vp, &va, cred))) 1294c8daea13SAlexander Kabaev goto done; 1295c92163dcSChristian S.J. Peron #ifdef MAC 12967077c426SJohn Baldwin /* This relies on VM_PROT_* matching PROT_*. */ 12977077c426SJohn Baldwin error = mac_vnode_check_mmap(cred, vp, (int)prot, flags); 1298c92163dcSChristian S.J. Peron if (error != 0) 1299c92163dcSChristian S.J. Peron goto done; 1300c92163dcSChristian S.J. Peron #endif 1301c8daea13SAlexander Kabaev if ((flags & MAP_SHARED) != 0) { 1302c8daea13SAlexander Kabaev if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { 13037077c426SJohn Baldwin if (prot & VM_PROT_WRITE) { 1304c8daea13SAlexander Kabaev error = EPERM; 1305c8daea13SAlexander Kabaev goto done; 1306c8daea13SAlexander Kabaev } 1307c8daea13SAlexander Kabaev *maxprotp &= ~VM_PROT_WRITE; 1308c8daea13SAlexander Kabaev } 1309c8daea13SAlexander Kabaev } 1310c8daea13SAlexander Kabaev /* 1311c8daea13SAlexander Kabaev * If it is a regular file without any references 1312c8daea13SAlexander Kabaev * we do not need to sync it. 1313c8daea13SAlexander Kabaev * Adjust object size to be the size of actual file. 1314c8daea13SAlexander Kabaev */ 1315c8daea13SAlexander Kabaev objsize = round_page(va.va_size); 1316c8daea13SAlexander Kabaev if (va.va_nlink == 0) 1317c8daea13SAlexander Kabaev flags |= MAP_NOSYNC; 13183d653db0SAlan Cox if (obj->type == OBJT_VNODE) { 1319e5f299ffSKonstantin Belousov obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, 1320e5f299ffSKonstantin Belousov cred); 1321c8daea13SAlexander Kabaev if (obj == NULL) { 132264345f0bSJohn Baldwin error = ENOMEM; 1323c8daea13SAlexander Kabaev goto done; 1324c8daea13SAlexander Kabaev } 13253d653db0SAlan Cox } else { 13263d653db0SAlan Cox KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP, 13273d653db0SAlan Cox ("wrong object type")); 1328*f2410510SJeff Roberson vm_object_reference(obj); 13293d653db0SAlan Cox #if VM_NRESERVLEVEL > 0 1330*f2410510SJeff Roberson if ((obj->flags & OBJ_COLORED) == 0) { 1331*f2410510SJeff Roberson VM_OBJECT_WLOCK(obj); 13323d653db0SAlan Cox vm_object_color(obj, 0); 13333d653db0SAlan Cox VM_OBJECT_WUNLOCK(obj); 13343d653db0SAlan Cox } 1335*f2410510SJeff Roberson #endif 1336*f2410510SJeff Roberson } 1337c8daea13SAlexander Kabaev *objp = obj; 1338c8daea13SAlexander Kabaev *flagsp = flags; 133964345f0bSJohn Baldwin 13400359a12eSAttilio Rao vfs_mark_atime(vp, cred); 13411e309003SDiomidis Spinellis 1342c8daea13SAlexander Kabaev done: 1343bafa6cfcSKonstantin Belousov if (error != 0 && *writecounted) { 1344bafa6cfcSKonstantin Belousov *writecounted = FALSE; 1345fe7bcbafSKyle Evans vm_pager_update_writecount(obj, objsize, 0); 1346bafa6cfcSKonstantin Belousov } 1347c8daea13SAlexander Kabaev vput(vp); 1348c8daea13SAlexander Kabaev return (error); 1349c8daea13SAlexander Kabaev } 1350c8daea13SAlexander Kabaev 1351c8daea13SAlexander Kabaev /* 135298df9218SJohn Baldwin * vm_mmap_cdev() 135398df9218SJohn Baldwin * 135498df9218SJohn Baldwin * Helper function for vm_mmap. Perform sanity check specific for mmap 135598df9218SJohn Baldwin * operations on cdevs. 135698df9218SJohn Baldwin */ 135798df9218SJohn Baldwin int 13587077c426SJohn Baldwin vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot, 13597077c426SJohn Baldwin vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw, 13607077c426SJohn Baldwin vm_ooffset_t *foff, vm_object_t *objp) 136198df9218SJohn Baldwin { 136298df9218SJohn Baldwin vm_object_t obj; 13637077c426SJohn Baldwin int error, flags; 136498df9218SJohn Baldwin 136598df9218SJohn Baldwin flags = *flagsp; 136698df9218SJohn Baldwin 136791a35e78SKonstantin Belousov if (dsw->d_flags & D_MMAP_ANON) { 13687077c426SJohn Baldwin *objp = NULL; 13697077c426SJohn Baldwin *foff = 0; 137098df9218SJohn Baldwin *maxprotp = VM_PROT_ALL; 137198df9218SJohn Baldwin *flagsp |= MAP_ANON; 137298df9218SJohn Baldwin return (0); 137398df9218SJohn Baldwin } 137498df9218SJohn Baldwin /* 137564345f0bSJohn Baldwin * cdevs do not provide private mappings of any kind. 137698df9218SJohn Baldwin */ 137798df9218SJohn Baldwin if ((*maxprotp & VM_PROT_WRITE) == 0 && 13787077c426SJohn Baldwin (prot & VM_PROT_WRITE) != 0) 137998df9218SJohn Baldwin return (EACCES); 13807077c426SJohn Baldwin if (flags & (MAP_PRIVATE|MAP_COPY)) 138198df9218SJohn Baldwin return (EINVAL); 138298df9218SJohn Baldwin /* 138398df9218SJohn Baldwin * Force device mappings to be shared. 138498df9218SJohn Baldwin */ 138598df9218SJohn Baldwin flags |= MAP_SHARED; 138698df9218SJohn Baldwin #ifdef MAC_XXX 13877077c426SJohn Baldwin error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot); 13887077c426SJohn Baldwin if (error != 0) 138998df9218SJohn Baldwin return (error); 139098df9218SJohn Baldwin #endif 139164345f0bSJohn Baldwin /* 139264345f0bSJohn Baldwin * First, try d_mmap_single(). If that is not implemented 139364345f0bSJohn Baldwin * (returns ENODEV), fall back to using the device pager. 139464345f0bSJohn Baldwin * Note that d_mmap_single() must return a reference to the 139564345f0bSJohn Baldwin * object (it needs to bump the reference count of the object 139664345f0bSJohn Baldwin * it returns somehow). 139764345f0bSJohn Baldwin * 139864345f0bSJohn Baldwin * XXX assumes VM_PROT_* == PROT_* 139964345f0bSJohn Baldwin */ 140064345f0bSJohn Baldwin error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); 140164345f0bSJohn Baldwin if (error != ENODEV) 140264345f0bSJohn Baldwin return (error); 14033364c323SKonstantin Belousov obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, 14043364c323SKonstantin Belousov td->td_ucred); 140598df9218SJohn Baldwin if (obj == NULL) 140698df9218SJohn Baldwin return (EINVAL); 140798df9218SJohn Baldwin *objp = obj; 140898df9218SJohn Baldwin *flagsp = flags; 140998df9218SJohn Baldwin return (0); 141098df9218SJohn Baldwin } 141198df9218SJohn Baldwin 141298df9218SJohn Baldwin /* 1413d2c60af8SMatthew Dillon * vm_mmap() 1414d2c60af8SMatthew Dillon * 14157077c426SJohn Baldwin * Internal version of mmap used by exec, sys5 shared memory, and 14167077c426SJohn Baldwin * various device drivers. Handle is either a vnode pointer, a 14177077c426SJohn Baldwin * character device, or NULL for MAP_ANON. 1418df8bae1dSRodney W. Grimes */ 1419df8bae1dSRodney W. Grimes int 1420b9dcd593SBruce Evans vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1421b9dcd593SBruce Evans vm_prot_t maxprot, int flags, 142298df9218SJohn Baldwin objtype_t handle_type, void *handle, 1423b9dcd593SBruce Evans vm_ooffset_t foff) 1424df8bae1dSRodney W. Grimes { 14257077c426SJohn Baldwin vm_object_t object; 1426b40ce416SJulian Elischer struct thread *td = curthread; 14277077c426SJohn Baldwin int error; 142884110e7eSKonstantin Belousov boolean_t writecounted; 1429df8bae1dSRodney W. Grimes 1430df8bae1dSRodney W. Grimes if (size == 0) 14317077c426SJohn Baldwin return (EINVAL); 1432df8bae1dSRodney W. Grimes 1433749474f2SPeter Wemm size = round_page(size); 1434010ba384SMark Johnston object = NULL; 14357077c426SJohn Baldwin writecounted = FALSE; 14367077c426SJohn Baldwin 14377077c426SJohn Baldwin /* 14387077c426SJohn Baldwin * Lookup/allocate object. 14397077c426SJohn Baldwin */ 14407077c426SJohn Baldwin switch (handle_type) { 14417077c426SJohn Baldwin case OBJT_DEVICE: { 14427077c426SJohn Baldwin struct cdevsw *dsw; 14437077c426SJohn Baldwin struct cdev *cdev; 14447077c426SJohn Baldwin int ref; 14457077c426SJohn Baldwin 14467077c426SJohn Baldwin cdev = handle; 14477077c426SJohn Baldwin dsw = dev_refthread(cdev, &ref); 14487077c426SJohn Baldwin if (dsw == NULL) 14497077c426SJohn Baldwin return (ENXIO); 14507077c426SJohn Baldwin error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev, 14517077c426SJohn Baldwin dsw, &foff, &object); 14527077c426SJohn Baldwin dev_relthread(cdev, ref); 14537077c426SJohn Baldwin break; 14547077c426SJohn Baldwin } 14557077c426SJohn Baldwin case OBJT_VNODE: 14567077c426SJohn Baldwin error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, 14577077c426SJohn Baldwin handle, &foff, &object, &writecounted); 14587077c426SJohn Baldwin break; 14597077c426SJohn Baldwin case OBJT_DEFAULT: 14607077c426SJohn Baldwin if (handle == NULL) { 14617077c426SJohn Baldwin error = 0; 14627077c426SJohn Baldwin break; 14637077c426SJohn Baldwin } 14647077c426SJohn Baldwin /* FALLTHROUGH */ 14657077c426SJohn Baldwin default: 14667077c426SJohn Baldwin error = EINVAL; 14677077c426SJohn Baldwin break; 14687077c426SJohn Baldwin } 14697077c426SJohn Baldwin if (error) 14707077c426SJohn Baldwin return (error); 14717077c426SJohn Baldwin 14727077c426SJohn Baldwin error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, 14737077c426SJohn Baldwin foff, writecounted, td); 14747077c426SJohn Baldwin if (error != 0 && object != NULL) { 14757077c426SJohn Baldwin /* 14767077c426SJohn Baldwin * If this mapping was accounted for in the vnode's 14777077c426SJohn Baldwin * writecount, then undo that now. 14787077c426SJohn Baldwin */ 14797077c426SJohn Baldwin if (writecounted) 1480fe7bcbafSKyle Evans vm_pager_release_writecount(object, 0, size); 14817077c426SJohn Baldwin vm_object_deallocate(object); 14827077c426SJohn Baldwin } 14837077c426SJohn Baldwin return (error); 14847077c426SJohn Baldwin } 14857077c426SJohn Baldwin 14867077c426SJohn Baldwin /* 14877077c426SJohn Baldwin * Internal version of mmap that maps a specific VM object into an 14887077c426SJohn Baldwin * map. Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap. 14897077c426SJohn Baldwin */ 14907077c426SJohn Baldwin int 14917077c426SJohn Baldwin vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 14927077c426SJohn Baldwin vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff, 14937077c426SJohn Baldwin boolean_t writecounted, struct thread *td) 14947077c426SJohn Baldwin { 14956a97a3f7SKonstantin Belousov boolean_t curmap, fitit; 14966a97a3f7SKonstantin Belousov vm_offset_t max_addr; 14977077c426SJohn Baldwin int docow, error, findspace, rv; 1498df8bae1dSRodney W. Grimes 14996a97a3f7SKonstantin Belousov curmap = map == &td->td_proc->p_vmspace->vm_map; 15006a97a3f7SKonstantin Belousov if (curmap) { 15012554f86aSMateusz Guzik RACCT_PROC_LOCK(td->td_proc); 15022554f86aSMateusz Guzik if (map->size + size > lim_cur(td, RLIMIT_VMEM)) { 15032554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 1504070f64feSMatthew Dillon return (ENOMEM); 1505070f64feSMatthew Dillon } 1506a6492969SAlan Cox if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { 15072554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 15081ba5ad42SEdward Tomasz Napierala return (ENOMEM); 15091ba5ad42SEdward Tomasz Napierala } 15107e19eda4SAndrey Zonov if (!old_mlock && map->flags & MAP_WIREFUTURE) { 15113ac7d297SAndrey Zonov if (ptoa(pmap_wired_count(map->pmap)) + size > 15122554f86aSMateusz Guzik lim_cur(td, RLIMIT_MEMLOCK)) { 15137e19eda4SAndrey Zonov racct_set_force(td->td_proc, RACCT_VMEM, 15147e19eda4SAndrey Zonov map->size); 15152554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 15167e19eda4SAndrey Zonov return (ENOMEM); 15177e19eda4SAndrey Zonov } 15187e19eda4SAndrey Zonov error = racct_set(td->td_proc, RACCT_MEMLOCK, 15193ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap)) + size); 15207e19eda4SAndrey Zonov if (error != 0) { 15217e19eda4SAndrey Zonov racct_set_force(td->td_proc, RACCT_VMEM, 15227e19eda4SAndrey Zonov map->size); 15232554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 15247e19eda4SAndrey Zonov return (error); 15257e19eda4SAndrey Zonov } 15267e19eda4SAndrey Zonov } 15272554f86aSMateusz Guzik RACCT_PROC_UNLOCK(td->td_proc); 1528a6492969SAlan Cox } 1529070f64feSMatthew Dillon 1530df8bae1dSRodney W. Grimes /* 1531bc9ad247SDavid Greenman * We currently can only deal with page aligned file offsets. 15327077c426SJohn Baldwin * The mmap() system call already enforces this by subtracting 15337077c426SJohn Baldwin * the page offset from the file offset, but checking here 15347077c426SJohn Baldwin * catches errors in device drivers (e.g. d_single_mmap() 15357077c426SJohn Baldwin * callbacks) and other internal mapping requests (such as in 15367077c426SJohn Baldwin * exec). 1537bc9ad247SDavid Greenman */ 1538bc9ad247SDavid Greenman if (foff & PAGE_MASK) 1539bc9ad247SDavid Greenman return (EINVAL); 1540bc9ad247SDavid Greenman 154106cb7259SDavid Greenman if ((flags & MAP_FIXED) == 0) { 154206cb7259SDavid Greenman fitit = TRUE; 154306cb7259SDavid Greenman *addr = round_page(*addr); 154406cb7259SDavid Greenman } else { 154506cb7259SDavid Greenman if (*addr != trunc_page(*addr)) 154606cb7259SDavid Greenman return (EINVAL); 154706cb7259SDavid Greenman fitit = FALSE; 154806cb7259SDavid Greenman } 154984110e7eSKonstantin Belousov 15505f55e841SDavid Greenman if (flags & MAP_ANON) { 15517077c426SJohn Baldwin if (object != NULL || foff != 0) 15527077c426SJohn Baldwin return (EINVAL); 1553c8daea13SAlexander Kabaev docow = 0; 155474ffb9afSAlan Cox } else if (flags & MAP_PREFAULT_READ) 155574ffb9afSAlan Cox docow = MAP_PREFAULT; 155674ffb9afSAlan Cox else 15574738fa09SAlan Cox docow = MAP_PREFAULT_PARTIAL; 1558df8bae1dSRodney W. Grimes 15594f79d873SMatthew Dillon if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 15604738fa09SAlan Cox docow |= MAP_COPY_ON_WRITE; 15614f79d873SMatthew Dillon if (flags & MAP_NOSYNC) 15624f79d873SMatthew Dillon docow |= MAP_DISABLE_SYNCER; 15639730a5daSPaul Saab if (flags & MAP_NOCORE) 15649730a5daSPaul Saab docow |= MAP_DISABLE_COREDUMP; 15658211bd45SKonstantin Belousov /* Shared memory is also shared with children. */ 15668211bd45SKonstantin Belousov if (flags & MAP_SHARED) 15678211bd45SKonstantin Belousov docow |= MAP_INHERIT_SHARE; 156884110e7eSKonstantin Belousov if (writecounted) 1569fe7bcbafSKyle Evans docow |= MAP_WRITECOUNT; 15704648ba0aSKonstantin Belousov if (flags & MAP_STACK) { 15714648ba0aSKonstantin Belousov if (object != NULL) 15724648ba0aSKonstantin Belousov return (EINVAL); 15734648ba0aSKonstantin Belousov docow |= MAP_STACK_GROWS_DOWN; 15744648ba0aSKonstantin Belousov } 157511c42bccSKonstantin Belousov if ((flags & MAP_EXCL) != 0) 157611c42bccSKonstantin Belousov docow |= MAP_CHECK_EXCL; 157719bd0d9cSKonstantin Belousov if ((flags & MAP_GUARD) != 0) 157819bd0d9cSKonstantin Belousov docow |= MAP_CREATE_GUARD; 15795850152dSJohn Dyson 15804648ba0aSKonstantin Belousov if (fitit) { 15815aa60b6fSJohn Baldwin if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER) 15825aa60b6fSJohn Baldwin findspace = VMFS_SUPER_SPACE; 15835aa60b6fSJohn Baldwin else if ((flags & MAP_ALIGNMENT_MASK) != 0) 15845aa60b6fSJohn Baldwin findspace = VMFS_ALIGNED_SPACE(flags >> 15855aa60b6fSJohn Baldwin MAP_ALIGNMENT_SHIFT); 15862267af78SJulian Elischer else 15875aa60b6fSJohn Baldwin findspace = VMFS_OPTIMAL_SPACE; 15886a97a3f7SKonstantin Belousov max_addr = 0; 1589edb572a3SJohn Baldwin #ifdef MAP_32BIT 15906a97a3f7SKonstantin Belousov if ((flags & MAP_32BIT) != 0) 15916a97a3f7SKonstantin Belousov max_addr = MAP_32BIT_MAX_ADDR; 1592edb572a3SJohn Baldwin #endif 15936a97a3f7SKonstantin Belousov if (curmap) { 15946a97a3f7SKonstantin Belousov rv = vm_map_find_min(map, object, foff, addr, size, 15956a97a3f7SKonstantin Belousov round_page((vm_offset_t)td->td_proc->p_vmspace-> 15966a97a3f7SKonstantin Belousov vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr, 15976a97a3f7SKonstantin Belousov findspace, prot, maxprot, docow); 15986a97a3f7SKonstantin Belousov } else { 15996a97a3f7SKonstantin Belousov rv = vm_map_find(map, object, foff, addr, size, 16006a97a3f7SKonstantin Belousov max_addr, findspace, prot, maxprot, docow); 16016a97a3f7SKonstantin Belousov } 16024648ba0aSKonstantin Belousov } else { 1603b8ca4ef2SAlan Cox rv = vm_map_fixed(map, object, foff, *addr, size, 1604bd7e5f99SJohn Dyson prot, maxprot, docow); 16054648ba0aSKonstantin Belousov } 1606bd7e5f99SJohn Dyson 1607f9230ad6SAlan Cox if (rv == KERN_SUCCESS) { 16087fb0c17eSDavid Greenman /* 1609f9230ad6SAlan Cox * If the process has requested that all future mappings 1610f9230ad6SAlan Cox * be wired, then heed this. 1611f9230ad6SAlan Cox */ 161254a3a114SMark Johnston if ((map->flags & MAP_WIREFUTURE) != 0) { 161354a3a114SMark Johnston vm_map_lock(map); 161454a3a114SMark Johnston if ((map->flags & MAP_WIREFUTURE) != 0) 16158cd6a80dSMark Johnston (void)vm_map_wire_locked(map, *addr, 161654a3a114SMark Johnston *addr + size, VM_MAP_WIRE_USER | 161754a3a114SMark Johnston ((flags & MAP_STACK) ? VM_MAP_WIRE_HOLESOK : 161854a3a114SMark Johnston VM_MAP_WIRE_NOHOLES)); 161954a3a114SMark Johnston vm_map_unlock(map); 16201472f4f4SKonstantin Belousov } 1621df8bae1dSRodney W. Grimes } 16222e32165cSKonstantin Belousov return (vm_mmap_to_errno(rv)); 16232e32165cSKonstantin Belousov } 16242e32165cSKonstantin Belousov 1625f9230ad6SAlan Cox /* 1626f9230ad6SAlan Cox * Translate a Mach VM return code to zero on success or the appropriate errno 1627f9230ad6SAlan Cox * on failure. 1628f9230ad6SAlan Cox */ 16292e32165cSKonstantin Belousov int 16302e32165cSKonstantin Belousov vm_mmap_to_errno(int rv) 16312e32165cSKonstantin Belousov { 16322e32165cSKonstantin Belousov 1633df8bae1dSRodney W. Grimes switch (rv) { 1634df8bae1dSRodney W. Grimes case KERN_SUCCESS: 1635df8bae1dSRodney W. Grimes return (0); 1636df8bae1dSRodney W. Grimes case KERN_INVALID_ADDRESS: 1637df8bae1dSRodney W. Grimes case KERN_NO_SPACE: 1638df8bae1dSRodney W. Grimes return (ENOMEM); 1639df8bae1dSRodney W. Grimes case KERN_PROTECTION_FAILURE: 1640df8bae1dSRodney W. Grimes return (EACCES); 1641df8bae1dSRodney W. Grimes default: 1642df8bae1dSRodney W. Grimes return (EINVAL); 1643df8bae1dSRodney W. Grimes } 1644df8bae1dSRodney W. Grimes } 1645