xref: /freebsd/sys/vm/vm_mmap.c (revision 74a1b66cf41de99e2f6ee54f59926c8839a9e10b)
160727d8bSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1988 University of Utah.
5df8bae1dSRodney W. Grimes  * Copyright (c) 1991, 1993
6df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
7df8bae1dSRodney W. Grimes  *
8df8bae1dSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
9df8bae1dSRodney W. Grimes  * the Systems Programming Group of the University of Utah Computer
10df8bae1dSRodney W. Grimes  * Science Department.
11df8bae1dSRodney W. Grimes  *
12df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
13df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
14df8bae1dSRodney W. Grimes  * are met:
15df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
17df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
18df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
19df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
20fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
21df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
22df8bae1dSRodney W. Grimes  *    without specific prior written permission.
23df8bae1dSRodney W. Grimes  *
24df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
35df8bae1dSRodney W. Grimes  *
36df8bae1dSRodney W. Grimes  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
37df8bae1dSRodney W. Grimes  *
38df8bae1dSRodney W. Grimes  *	@(#)vm_mmap.c	8.4 (Berkeley) 1/12/94
39df8bae1dSRodney W. Grimes  */
40df8bae1dSRodney W. Grimes 
41df8bae1dSRodney W. Grimes /*
42df8bae1dSRodney W. Grimes  * Mapped file (mmap) interface to VM
43df8bae1dSRodney W. Grimes  */
44df8bae1dSRodney W. Grimes 
45874651b1SDavid E. O'Brien #include <sys/cdefs.h>
46874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$");
47874651b1SDavid E. O'Brien 
4849874f6eSJoseph Koshy #include "opt_hwpmc_hooks.h"
493d653db0SAlan Cox #include "opt_vm.h"
50e9822d92SJoerg Wunsch 
51df8bae1dSRodney W. Grimes #include <sys/param.h>
52df8bae1dSRodney W. Grimes #include <sys/systm.h>
534a144410SRobert Watson #include <sys/capsicum.h>
54a9d2f8d8SRobert Watson #include <sys/kernel.h>
55fb919e4dSMark Murray #include <sys/lock.h>
5623955314SAlfred Perlstein #include <sys/mutex.h>
57d2d3e875SBruce Evans #include <sys/sysproto.h>
58df8bae1dSRodney W. Grimes #include <sys/filedesc.h>
59acd3428bSRobert Watson #include <sys/priv.h>
60df8bae1dSRodney W. Grimes #include <sys/proc.h>
6155648840SJohn Baldwin #include <sys/procctl.h>
621ba5ad42SEdward Tomasz Napierala #include <sys/racct.h>
63070f64feSMatthew Dillon #include <sys/resource.h>
64070f64feSMatthew Dillon #include <sys/resourcevar.h>
6589f6b863SAttilio Rao #include <sys/rwlock.h>
667e19eda4SAndrey Zonov #include <sys/sysctl.h>
67df8bae1dSRodney W. Grimes #include <sys/vnode.h>
683ac4d1efSBruce Evans #include <sys/fcntl.h>
69df8bae1dSRodney W. Grimes #include <sys/file.h>
70df8bae1dSRodney W. Grimes #include <sys/mman.h>
71b483c7f6SGuido van Rooij #include <sys/mount.h>
72df8bae1dSRodney W. Grimes #include <sys/conf.h>
734183b6b6SPeter Wemm #include <sys/stat.h>
7455648840SJohn Baldwin #include <sys/syscallsubr.h>
75497a8238SKonstantin Belousov #include <sys/sysent.h>
76efeaf95aSDavid Greenman #include <sys/vmmeter.h>
77a7f67facSKonstantin Belousov #if defined(__amd64__) || defined(__i386__) /* for i386_read_exec */
78a7f67facSKonstantin Belousov #include <machine/md_var.h>
79a7f67facSKonstantin Belousov #endif
80df8bae1dSRodney W. Grimes 
8151d1f690SRobert Watson #include <security/audit/audit.h>
82aed55708SRobert Watson #include <security/mac/mac_framework.h>
83aed55708SRobert Watson 
84df8bae1dSRodney W. Grimes #include <vm/vm.h>
85efeaf95aSDavid Greenman #include <vm/vm_param.h>
86efeaf95aSDavid Greenman #include <vm/pmap.h>
87efeaf95aSDavid Greenman #include <vm/vm_map.h>
88efeaf95aSDavid Greenman #include <vm/vm_object.h>
891c7c3c6aSMatthew Dillon #include <vm/vm_page.h>
90df8bae1dSRodney W. Grimes #include <vm/vm_pager.h>
91b5e8ce9fSBruce Evans #include <vm/vm_pageout.h>
92efeaf95aSDavid Greenman #include <vm/vm_extern.h>
93867a482dSJohn Dyson #include <vm/vm_page.h>
9484110e7eSKonstantin Belousov #include <vm/vnode_pager.h>
95df8bae1dSRodney W. Grimes 
9649874f6eSJoseph Koshy #ifdef HWPMC_HOOKS
9749874f6eSJoseph Koshy #include <sys/pmckern.h>
9849874f6eSJoseph Koshy #endif
9949874f6eSJoseph Koshy 
1007e19eda4SAndrey Zonov int old_mlock = 0;
101af3b2549SHans Petter Selasky SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0,
1027e19eda4SAndrey Zonov     "Do not apply RLIMIT_MEMLOCK on mlockall");
1033fbc2e00SKonstantin Belousov static int mincore_mapped = 1;
1043fbc2e00SKonstantin Belousov SYSCTL_INT(_vm, OID_AUTO, mincore_mapped, CTLFLAG_RWTUN, &mincore_mapped, 0,
1053fbc2e00SKonstantin Belousov     "mincore reports mappings, not residency");
106*74a1b66cSBrooks Davis static int imply_prot_max = 0;
107*74a1b66cSBrooks Davis SYSCTL_INT(_vm, OID_AUTO, imply_prot_max, CTLFLAG_RWTUN, &imply_prot_max, 0,
108*74a1b66cSBrooks Davis     "Imply maximum page permissions in mmap() when none are specified");
1097e19eda4SAndrey Zonov 
110edb572a3SJohn Baldwin #ifdef MAP_32BIT
111edb572a3SJohn Baldwin #define	MAP_32BIT_MAX_ADDR	((vm_offset_t)1 << 31)
112d2d3e875SBruce Evans #endif
1130d94caffSDavid Greenman 
114edb572a3SJohn Baldwin #ifndef _SYS_SYSPROTO_H_
115edb572a3SJohn Baldwin struct sbrk_args {
116edb572a3SJohn Baldwin 	int incr;
117edb572a3SJohn Baldwin };
118edb572a3SJohn Baldwin #endif
119edb572a3SJohn Baldwin 
120df8bae1dSRodney W. Grimes int
12104e89ffbSKonstantin Belousov sys_sbrk(struct thread *td, struct sbrk_args *uap)
122df8bae1dSRodney W. Grimes {
123df8bae1dSRodney W. Grimes 	/* Not yet implemented */
124df8bae1dSRodney W. Grimes 	return (EOPNOTSUPP);
125df8bae1dSRodney W. Grimes }
126df8bae1dSRodney W. Grimes 
127d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
128df8bae1dSRodney W. Grimes struct sstk_args {
129df8bae1dSRodney W. Grimes 	int incr;
130df8bae1dSRodney W. Grimes };
131d2d3e875SBruce Evans #endif
1320d94caffSDavid Greenman 
133df8bae1dSRodney W. Grimes int
13404e89ffbSKonstantin Belousov sys_sstk(struct thread *td, struct sstk_args *uap)
135df8bae1dSRodney W. Grimes {
136df8bae1dSRodney W. Grimes 	/* Not yet implemented */
137df8bae1dSRodney W. Grimes 	return (EOPNOTSUPP);
138df8bae1dSRodney W. Grimes }
139df8bae1dSRodney W. Grimes 
1401930e303SPoul-Henning Kamp #if defined(COMPAT_43)
141df8bae1dSRodney W. Grimes int
142d48719bdSBrooks Davis ogetpagesize(struct thread *td, struct ogetpagesize_args *uap)
143df8bae1dSRodney W. Grimes {
14404e89ffbSKonstantin Belousov 
145b40ce416SJulian Elischer 	td->td_retval[0] = PAGE_SIZE;
146df8bae1dSRodney W. Grimes 	return (0);
147df8bae1dSRodney W. Grimes }
1481930e303SPoul-Henning Kamp #endif				/* COMPAT_43 */
149df8bae1dSRodney W. Grimes 
15054f42e4bSPeter Wemm 
15154f42e4bSPeter Wemm /*
15254f42e4bSPeter Wemm  * Memory Map (mmap) system call.  Note that the file offset
15354f42e4bSPeter Wemm  * and address are allowed to be NOT page aligned, though if
15454f42e4bSPeter Wemm  * the MAP_FIXED flag it set, both must have the same remainder
15554f42e4bSPeter Wemm  * modulo the PAGE_SIZE (POSIX 1003.1b).  If the address is not
15654f42e4bSPeter Wemm  * page-aligned, the actual mapping starts at trunc_page(addr)
15754f42e4bSPeter Wemm  * and the return value is adjusted up by the page offset.
158b4309055SMatthew Dillon  *
159b4309055SMatthew Dillon  * Generally speaking, only character devices which are themselves
160b4309055SMatthew Dillon  * memory-based, such as a video framebuffer, can be mmap'd.  Otherwise
161b4309055SMatthew Dillon  * there would be no cache coherency between a descriptor and a VM mapping
162b4309055SMatthew Dillon  * both to the same character device.
16354f42e4bSPeter Wemm  */
164d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
165df8bae1dSRodney W. Grimes struct mmap_args {
166651bb817SAlexander Langer 	void *addr;
167df8bae1dSRodney W. Grimes 	size_t len;
168df8bae1dSRodney W. Grimes 	int prot;
169df8bae1dSRodney W. Grimes 	int flags;
170df8bae1dSRodney W. Grimes 	int fd;
171df8bae1dSRodney W. Grimes 	long pad;
172df8bae1dSRodney W. Grimes 	off_t pos;
173df8bae1dSRodney W. Grimes };
174d2d3e875SBruce Evans #endif
175df8bae1dSRodney W. Grimes 
176df8bae1dSRodney W. Grimes int
17769cdfcefSEdward Tomasz Napierala sys_mmap(struct thread *td, struct mmap_args *uap)
17869cdfcefSEdward Tomasz Napierala {
17969cdfcefSEdward Tomasz Napierala 
180496ab053SKonstantin Belousov 	return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot,
181496ab053SKonstantin Belousov 	    uap->flags, uap->fd, uap->pos));
18269cdfcefSEdward Tomasz Napierala }
18369cdfcefSEdward Tomasz Napierala 
18469cdfcefSEdward Tomasz Napierala int
18577555b84SDoug Moore kern_mmap(struct thread *td, uintptr_t addr0, size_t len, int prot, int flags,
186496ab053SKonstantin Belousov     int fd, off_t pos)
187df8bae1dSRodney W. Grimes {
188496ab053SKonstantin Belousov 	struct vmspace *vms;
189c8daea13SAlexander Kabaev 	struct file *fp;
190496ab053SKonstantin Belousov 	vm_offset_t addr;
19177555b84SDoug Moore 	vm_size_t pageoff, size;
1927077c426SJohn Baldwin 	vm_prot_t cap_maxprot;
193*74a1b66cSBrooks Davis 	int align, error, max_prot;
194a9d2f8d8SRobert Watson 	cap_rights_t rights;
195df8bae1dSRodney W. Grimes 
196*74a1b66cSBrooks Davis 	if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0)
197*74a1b66cSBrooks Davis 		return (EINVAL);
198*74a1b66cSBrooks Davis 	max_prot = PROT_MAX_EXTRACT(prot);
199*74a1b66cSBrooks Davis 	prot = PROT_EXTRACT(prot);
200*74a1b66cSBrooks Davis 	if (max_prot != 0 && (max_prot & prot) != prot)
201*74a1b66cSBrooks Davis 		return (EINVAL);
202*74a1b66cSBrooks Davis 	/*
203*74a1b66cSBrooks Davis 	 * Always honor PROT_MAX if set.  If not, default to all
204*74a1b66cSBrooks Davis 	 * permissions unless we're implying maximum permissions.
205*74a1b66cSBrooks Davis 	 *
206*74a1b66cSBrooks Davis 	 * XXX: should be tunable per process and ABI.
207*74a1b66cSBrooks Davis 	 */
208*74a1b66cSBrooks Davis 	if (max_prot == 0)
209*74a1b66cSBrooks Davis 		max_prot = (imply_prot_max && prot != PROT_NONE) ?
210*74a1b66cSBrooks Davis 		    prot : _PROT_ALL;
211*74a1b66cSBrooks Davis 
212496ab053SKonstantin Belousov 	vms = td->td_proc->p_vmspace;
213426da3bcSAlfred Perlstein 	fp = NULL;
21469cdfcefSEdward Tomasz Napierala 	AUDIT_ARG_FD(fd);
215496ab053SKonstantin Belousov 	addr = addr0;
21627bfa958SSimon L. B. Nielsen 
2177707ccabSKonstantin Belousov 	/*
2185817298fSJohn Baldwin 	 * Ignore old flags that used to be defined but did not do anything.
2195817298fSJohn Baldwin 	 */
2205817298fSJohn Baldwin 	flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040);
2215817298fSJohn Baldwin 
2225817298fSJohn Baldwin 	/*
2237707ccabSKonstantin Belousov 	 * Enforce the constraints.
2247707ccabSKonstantin Belousov 	 * Mapping of length 0 is only allowed for old binaries.
2257707ccabSKonstantin Belousov 	 * Anonymous mapping shall specify -1 as filedescriptor and
2267707ccabSKonstantin Belousov 	 * zero position for new code. Be nice to ancient a.out
2277707ccabSKonstantin Belousov 	 * binaries and correct pos for anonymous mapping, since old
2287707ccabSKonstantin Belousov 	 * ld.so sometimes issues anonymous map requests with non-zero
2297707ccabSKonstantin Belousov 	 * pos.
2307707ccabSKonstantin Belousov 	 */
2317707ccabSKonstantin Belousov 	if (!SV_CURPROC_FLAG(SV_AOUT)) {
23277555b84SDoug Moore 		if ((len == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) ||
23369cdfcefSEdward Tomasz Napierala 		    ((flags & MAP_ANON) != 0 && (fd != -1 || pos != 0)))
234df8bae1dSRodney W. Grimes 			return (EINVAL);
2357707ccabSKonstantin Belousov 	} else {
2367707ccabSKonstantin Belousov 		if ((flags & MAP_ANON) != 0)
2377707ccabSKonstantin Belousov 			pos = 0;
2387707ccabSKonstantin Belousov 	}
2399154ee6aSPeter Wemm 
2402267af78SJulian Elischer 	if (flags & MAP_STACK) {
24169cdfcefSEdward Tomasz Napierala 		if ((fd != -1) ||
2422267af78SJulian Elischer 		    ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE)))
2432267af78SJulian Elischer 			return (EINVAL);
2442267af78SJulian Elischer 		flags |= MAP_ANON;
2452267af78SJulian Elischer 		pos = 0;
2462907af2aSJulian Elischer 	}
2475817298fSJohn Baldwin 	if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE |
2485817298fSJohn Baldwin 	    MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE |
24919bd0d9cSKonstantin Belousov 	    MAP_PREFAULT_READ | MAP_GUARD |
2505fd3f8b3SJohn Baldwin #ifdef MAP_32BIT
2515fd3f8b3SJohn Baldwin 	    MAP_32BIT |
2525fd3f8b3SJohn Baldwin #endif
2535fd3f8b3SJohn Baldwin 	    MAP_ALIGNMENT_MASK)) != 0)
2545fd3f8b3SJohn Baldwin 		return (EINVAL);
25511c42bccSKonstantin Belousov 	if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL)
25611c42bccSKonstantin Belousov 		return (EINVAL);
25710204535SKonstantin Belousov 	if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED | MAP_PRIVATE))
2585fd3f8b3SJohn Baldwin 		return (EINVAL);
2595fd3f8b3SJohn Baldwin 	if (prot != PROT_NONE &&
2605fd3f8b3SJohn Baldwin 	    (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0)
2615fd3f8b3SJohn Baldwin 		return (EINVAL);
26219bd0d9cSKonstantin Belousov 	if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 ||
26360221a57SAlan Cox 	    pos != 0 || (flags & ~(MAP_FIXED | MAP_GUARD | MAP_EXCL |
264633d3b1cSKonstantin Belousov #ifdef MAP_32BIT
265633d3b1cSKonstantin Belousov 	    MAP_32BIT |
266633d3b1cSKonstantin Belousov #endif
267633d3b1cSKonstantin Belousov 	    MAP_ALIGNMENT_MASK)) != 0))
26819bd0d9cSKonstantin Belousov 		return (EINVAL);
2692907af2aSJulian Elischer 
2709154ee6aSPeter Wemm 	/*
27154f42e4bSPeter Wemm 	 * Align the file position to a page boundary,
27254f42e4bSPeter Wemm 	 * and save its page offset component.
2739154ee6aSPeter Wemm 	 */
27454f42e4bSPeter Wemm 	pageoff = (pos & PAGE_MASK);
27554f42e4bSPeter Wemm 	pos -= pageoff;
27654f42e4bSPeter Wemm 
27777555b84SDoug Moore 	/* Compute size from len by rounding (on both ends). */
27877555b84SDoug Moore 	size = len + pageoff;			/* low end... */
27997220a27SDoug Moore 	size = round_page(size);		/* hi end */
28077555b84SDoug Moore 	/* Check for rounding up to zero. */
281f8c8b2e8SDoug Moore 	if (len > size)
28277555b84SDoug Moore 		return (ENOMEM);
2839154ee6aSPeter Wemm 
2845aa60b6fSJohn Baldwin 	/* Ensure alignment is at least a page and fits in a pointer. */
2855aa60b6fSJohn Baldwin 	align = flags & MAP_ALIGNMENT_MASK;
2865aa60b6fSJohn Baldwin 	if (align != 0 && align != MAP_ALIGNED_SUPER &&
2875aa60b6fSJohn Baldwin 	    (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY ||
2885aa60b6fSJohn Baldwin 	    align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT))
2895aa60b6fSJohn Baldwin 		return (EINVAL);
2905aa60b6fSJohn Baldwin 
291df8bae1dSRodney W. Grimes 	/*
2920d94caffSDavid Greenman 	 * Check for illegal addresses.  Watch out for address wrap... Note
2930d94caffSDavid Greenman 	 * that VM_*_ADDRESS are not constants due to casts (argh).
294df8bae1dSRodney W. Grimes 	 */
295df8bae1dSRodney W. Grimes 	if (flags & MAP_FIXED) {
29654f42e4bSPeter Wemm 		/*
29754f42e4bSPeter Wemm 		 * The specified address must have the same remainder
29854f42e4bSPeter Wemm 		 * as the file offset taken modulo PAGE_SIZE, so it
29954f42e4bSPeter Wemm 		 * should be aligned after adjustment by pageoff.
30054f42e4bSPeter Wemm 		 */
30154f42e4bSPeter Wemm 		addr -= pageoff;
30254f42e4bSPeter Wemm 		if (addr & PAGE_MASK)
30354f42e4bSPeter Wemm 			return (EINVAL);
30427bfa958SSimon L. B. Nielsen 
30554f42e4bSPeter Wemm 		/* Address range must be all in user VM space. */
30605ba50f5SJake Burkholder 		if (addr < vm_map_min(&vms->vm_map) ||
30705ba50f5SJake Burkholder 		    addr + size > vm_map_max(&vms->vm_map))
308df8bae1dSRodney W. Grimes 			return (EINVAL);
309bbc0ec52SDavid Greenman 		if (addr + size < addr)
310df8bae1dSRodney W. Grimes 			return (EINVAL);
311edb572a3SJohn Baldwin #ifdef MAP_32BIT
312edb572a3SJohn Baldwin 		if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR)
313edb572a3SJohn Baldwin 			return (EINVAL);
314edb572a3SJohn Baldwin 	} else if (flags & MAP_32BIT) {
315edb572a3SJohn Baldwin 		/*
316edb572a3SJohn Baldwin 		 * For MAP_32BIT, override the hint if it is too high and
317edb572a3SJohn Baldwin 		 * do not bother moving the mapping past the heap (since
318edb572a3SJohn Baldwin 		 * the heap is usually above 2GB).
319edb572a3SJohn Baldwin 		 */
320edb572a3SJohn Baldwin 		if (addr + size > MAP_32BIT_MAX_ADDR)
321edb572a3SJohn Baldwin 			addr = 0;
322edb572a3SJohn Baldwin #endif
32391d5354aSJohn Baldwin 	} else {
324df8bae1dSRodney W. Grimes 		/*
32554f42e4bSPeter Wemm 		 * XXX for non-fixed mappings where no hint is provided or
32654f42e4bSPeter Wemm 		 * the hint would fall in the potential heap space,
32754f42e4bSPeter Wemm 		 * place it after the end of the largest possible heap.
328df8bae1dSRodney W. Grimes 		 *
32954f42e4bSPeter Wemm 		 * There should really be a pmap call to determine a reasonable
33054f42e4bSPeter Wemm 		 * location.
331df8bae1dSRodney W. Grimes 		 */
33291d5354aSJohn Baldwin 		if (addr == 0 ||
3331f6889a1SMatthew Dillon 		    (addr >= round_page((vm_offset_t)vms->vm_taddr) &&
334c460ac3aSPeter Wemm 		    addr < round_page((vm_offset_t)vms->vm_daddr +
335cd336badSMateusz Guzik 		    lim_max(td, RLIMIT_DATA))))
336c460ac3aSPeter Wemm 			addr = round_page((vm_offset_t)vms->vm_daddr +
337cd336badSMateusz Guzik 			    lim_max(td, RLIMIT_DATA));
33891d5354aSJohn Baldwin 	}
33977555b84SDoug Moore 	if (len == 0) {
3407077c426SJohn Baldwin 		/*
3417077c426SJohn Baldwin 		 * Return success without mapping anything for old
3427077c426SJohn Baldwin 		 * binaries that request a page-aligned mapping of
3437077c426SJohn Baldwin 		 * length 0.  For modern binaries, this function
3447077c426SJohn Baldwin 		 * returns an error earlier.
3457077c426SJohn Baldwin 		 */
3467077c426SJohn Baldwin 		error = 0;
34719bd0d9cSKonstantin Belousov 	} else if ((flags & MAP_GUARD) != 0) {
34819bd0d9cSKonstantin Belousov 		error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE,
34919bd0d9cSKonstantin Belousov 		    VM_PROT_NONE, flags, NULL, pos, FALSE, td);
35019bd0d9cSKonstantin Belousov 	} else if ((flags & MAP_ANON) != 0) {
351df8bae1dSRodney W. Grimes 		/*
352df8bae1dSRodney W. Grimes 		 * Mapping blank space is trivial.
3537077c426SJohn Baldwin 		 *
3547077c426SJohn Baldwin 		 * This relies on VM_PROT_* matching PROT_*.
355df8bae1dSRodney W. Grimes 		 */
3567077c426SJohn Baldwin 		error = vm_mmap_object(&vms->vm_map, &addr, size, prot,
357*74a1b66cSBrooks Davis 		    max_prot, flags, NULL, pos, FALSE, td);
35830d4dd7eSAlexander Kabaev 	} else {
359df8bae1dSRodney W. Grimes 		/*
360a9d2f8d8SRobert Watson 		 * Mapping file, get fp for validation and don't let the
361a9d2f8d8SRobert Watson 		 * descriptor disappear on us if we block. Check capability
362a9d2f8d8SRobert Watson 		 * rights, but also return the maximum rights to be combined
363a9d2f8d8SRobert Watson 		 * with maxprot later.
364df8bae1dSRodney W. Grimes 		 */
3657008be5bSPawel Jakub Dawidek 		cap_rights_init(&rights, CAP_MMAP);
366a9d2f8d8SRobert Watson 		if (prot & PROT_READ)
3677008be5bSPawel Jakub Dawidek 			cap_rights_set(&rights, CAP_MMAP_R);
368a9d2f8d8SRobert Watson 		if ((flags & MAP_SHARED) != 0) {
369a9d2f8d8SRobert Watson 			if (prot & PROT_WRITE)
3707008be5bSPawel Jakub Dawidek 				cap_rights_set(&rights, CAP_MMAP_W);
371a9d2f8d8SRobert Watson 		}
372a9d2f8d8SRobert Watson 		if (prot & PROT_EXEC)
3737008be5bSPawel Jakub Dawidek 			cap_rights_set(&rights, CAP_MMAP_X);
37469cdfcefSEdward Tomasz Napierala 		error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp);
3757008be5bSPawel Jakub Dawidek 		if (error != 0)
376426da3bcSAlfred Perlstein 			goto done;
37710204535SKonstantin Belousov 		if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 &&
37810204535SKonstantin Belousov 		    td->td_proc->p_osrel >= P_OSREL_MAP_FSTRICT) {
37910204535SKonstantin Belousov 			error = EINVAL;
38010204535SKonstantin Belousov 			goto done;
38110204535SKonstantin Belousov 		}
3825fd3f8b3SJohn Baldwin 
3835fd3f8b3SJohn Baldwin 		/* This relies on VM_PROT_* matching PROT_*. */
3847077c426SJohn Baldwin 		error = fo_mmap(fp, &vms->vm_map, &addr, size, prot,
385*74a1b66cSBrooks Davis 		    max_prot & cap_maxprot, flags, pos, td);
38649874f6eSJoseph Koshy 	}
3877077c426SJohn Baldwin 
388df8bae1dSRodney W. Grimes 	if (error == 0)
389b40ce416SJulian Elischer 		td->td_retval[0] = (register_t) (addr + pageoff);
390279d7226SMatthew Dillon done:
391279d7226SMatthew Dillon 	if (fp)
392b40ce416SJulian Elischer 		fdrop(fp, td);
393f6b5b182SJeff Roberson 
394df8bae1dSRodney W. Grimes 	return (error);
395df8bae1dSRodney W. Grimes }
396df8bae1dSRodney W. Grimes 
3970538aafcSKonstantin Belousov #if defined(COMPAT_FREEBSD6)
398c2815ad5SPeter Wemm int
399c2815ad5SPeter Wemm freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap)
400c2815ad5SPeter Wemm {
401c2815ad5SPeter Wemm 
402496ab053SKonstantin Belousov 	return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot,
403496ab053SKonstantin Belousov 	    uap->flags, uap->fd, uap->pos));
404c2815ad5SPeter Wemm }
4050538aafcSKonstantin Belousov #endif
406c2815ad5SPeter Wemm 
40705f0fdd2SPoul-Henning Kamp #ifdef COMPAT_43
408d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
40905f0fdd2SPoul-Henning Kamp struct ommap_args {
41005f0fdd2SPoul-Henning Kamp 	caddr_t addr;
41105f0fdd2SPoul-Henning Kamp 	int len;
41205f0fdd2SPoul-Henning Kamp 	int prot;
41305f0fdd2SPoul-Henning Kamp 	int flags;
41405f0fdd2SPoul-Henning Kamp 	int fd;
41505f0fdd2SPoul-Henning Kamp 	long pos;
41605f0fdd2SPoul-Henning Kamp };
417d2d3e875SBruce Evans #endif
41805f0fdd2SPoul-Henning Kamp int
41969cdfcefSEdward Tomasz Napierala ommap(struct thread *td, struct ommap_args *uap)
42005f0fdd2SPoul-Henning Kamp {
42105f0fdd2SPoul-Henning Kamp 	static const char cvtbsdprot[8] = {
42205f0fdd2SPoul-Henning Kamp 		0,
42305f0fdd2SPoul-Henning Kamp 		PROT_EXEC,
42405f0fdd2SPoul-Henning Kamp 		PROT_WRITE,
42505f0fdd2SPoul-Henning Kamp 		PROT_EXEC | PROT_WRITE,
42605f0fdd2SPoul-Henning Kamp 		PROT_READ,
42705f0fdd2SPoul-Henning Kamp 		PROT_EXEC | PROT_READ,
42805f0fdd2SPoul-Henning Kamp 		PROT_WRITE | PROT_READ,
42905f0fdd2SPoul-Henning Kamp 		PROT_EXEC | PROT_WRITE | PROT_READ,
43005f0fdd2SPoul-Henning Kamp 	};
43169cdfcefSEdward Tomasz Napierala 	int flags, prot;
4320d94caffSDavid Greenman 
43305f0fdd2SPoul-Henning Kamp #define	OMAP_ANON	0x0002
43405f0fdd2SPoul-Henning Kamp #define	OMAP_COPY	0x0020
43505f0fdd2SPoul-Henning Kamp #define	OMAP_SHARED	0x0010
43605f0fdd2SPoul-Henning Kamp #define	OMAP_FIXED	0x0100
43705f0fdd2SPoul-Henning Kamp 
43869cdfcefSEdward Tomasz Napierala 	prot = cvtbsdprot[uap->prot & 0x7];
4395dddee2dSKonstantin Belousov #if (defined(COMPAT_FREEBSD32) && defined(__amd64__)) || defined(__i386__)
440ee4116b8SKonstantin Belousov 	if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) &&
44169cdfcefSEdward Tomasz Napierala 	    prot != 0)
44269cdfcefSEdward Tomasz Napierala 		prot |= PROT_EXEC;
443ee4116b8SKonstantin Belousov #endif
44469cdfcefSEdward Tomasz Napierala 	flags = 0;
44505f0fdd2SPoul-Henning Kamp 	if (uap->flags & OMAP_ANON)
44669cdfcefSEdward Tomasz Napierala 		flags |= MAP_ANON;
44705f0fdd2SPoul-Henning Kamp 	if (uap->flags & OMAP_COPY)
44869cdfcefSEdward Tomasz Napierala 		flags |= MAP_COPY;
44905f0fdd2SPoul-Henning Kamp 	if (uap->flags & OMAP_SHARED)
45069cdfcefSEdward Tomasz Napierala 		flags |= MAP_SHARED;
45105f0fdd2SPoul-Henning Kamp 	else
45269cdfcefSEdward Tomasz Napierala 		flags |= MAP_PRIVATE;
45305f0fdd2SPoul-Henning Kamp 	if (uap->flags & OMAP_FIXED)
45469cdfcefSEdward Tomasz Napierala 		flags |= MAP_FIXED;
455496ab053SKonstantin Belousov 	return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, flags,
456496ab053SKonstantin Belousov 	    uap->fd, uap->pos));
45705f0fdd2SPoul-Henning Kamp }
45805f0fdd2SPoul-Henning Kamp #endif				/* COMPAT_43 */
45905f0fdd2SPoul-Henning Kamp 
46005f0fdd2SPoul-Henning Kamp 
461d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
462df8bae1dSRodney W. Grimes struct msync_args {
463651bb817SAlexander Langer 	void *addr;
464c899450bSPeter Wemm 	size_t len;
465e6c6af11SDavid Greenman 	int flags;
466df8bae1dSRodney W. Grimes };
467d2d3e875SBruce Evans #endif
468df8bae1dSRodney W. Grimes int
46969cdfcefSEdward Tomasz Napierala sys_msync(struct thread *td, struct msync_args *uap)
470df8bae1dSRodney W. Grimes {
47169cdfcefSEdward Tomasz Napierala 
472496ab053SKonstantin Belousov 	return (kern_msync(td, (uintptr_t)uap->addr, uap->len, uap->flags));
47369cdfcefSEdward Tomasz Napierala }
47469cdfcefSEdward Tomasz Napierala 
47569cdfcefSEdward Tomasz Napierala int
476496ab053SKonstantin Belousov kern_msync(struct thread *td, uintptr_t addr0, size_t size, int flags)
47769cdfcefSEdward Tomasz Napierala {
478496ab053SKonstantin Belousov 	vm_offset_t addr;
47969cdfcefSEdward Tomasz Napierala 	vm_size_t pageoff;
480df8bae1dSRodney W. Grimes 	vm_map_t map;
481df8bae1dSRodney W. Grimes 	int rv;
482df8bae1dSRodney W. Grimes 
483496ab053SKonstantin Belousov 	addr = addr0;
484dabee6feSPeter Wemm 	pageoff = (addr & PAGE_MASK);
485dabee6feSPeter Wemm 	addr -= pageoff;
486dabee6feSPeter Wemm 	size += pageoff;
487dabee6feSPeter Wemm 	size = (vm_size_t) round_page(size);
4889154ee6aSPeter Wemm 	if (addr + size < addr)
489dabee6feSPeter Wemm 		return (EINVAL);
490dabee6feSPeter Wemm 
491dabee6feSPeter Wemm 	if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE))
4921e62bc63SDavid Greenman 		return (EINVAL);
4931e62bc63SDavid Greenman 
494b40ce416SJulian Elischer 	map = &td->td_proc->p_vmspace->vm_map;
4959154ee6aSPeter Wemm 
496df8bae1dSRodney W. Grimes 	/*
497df8bae1dSRodney W. Grimes 	 * Clean the pages and interpret the return value.
498df8bae1dSRodney W. Grimes 	 */
499950f8459SAlan Cox 	rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0,
500e6c6af11SDavid Greenman 	    (flags & MS_INVALIDATE) != 0);
501df8bae1dSRodney W. Grimes 	switch (rv) {
502df8bae1dSRodney W. Grimes 	case KERN_SUCCESS:
503d2c60af8SMatthew Dillon 		return (0);
504df8bae1dSRodney W. Grimes 	case KERN_INVALID_ADDRESS:
505e103f5b1SPeter Holm 		return (ENOMEM);
506b7b7cd44SAlan Cox 	case KERN_INVALID_ARGUMENT:
507b7b7cd44SAlan Cox 		return (EBUSY);
508126d6082SKonstantin Belousov 	case KERN_FAILURE:
509126d6082SKonstantin Belousov 		return (EIO);
510df8bae1dSRodney W. Grimes 	default:
511df8bae1dSRodney W. Grimes 		return (EINVAL);
512df8bae1dSRodney W. Grimes 	}
513df8bae1dSRodney W. Grimes }
514df8bae1dSRodney W. Grimes 
515d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
516df8bae1dSRodney W. Grimes struct munmap_args {
517651bb817SAlexander Langer 	void *addr;
5189154ee6aSPeter Wemm 	size_t len;
519df8bae1dSRodney W. Grimes };
520d2d3e875SBruce Evans #endif
521df8bae1dSRodney W. Grimes int
52269cdfcefSEdward Tomasz Napierala sys_munmap(struct thread *td, struct munmap_args *uap)
52369cdfcefSEdward Tomasz Napierala {
52469cdfcefSEdward Tomasz Napierala 
525496ab053SKonstantin Belousov 	return (kern_munmap(td, (uintptr_t)uap->addr, uap->len));
52669cdfcefSEdward Tomasz Napierala }
52769cdfcefSEdward Tomasz Napierala 
52869cdfcefSEdward Tomasz Napierala int
529496ab053SKonstantin Belousov kern_munmap(struct thread *td, uintptr_t addr0, size_t size)
530df8bae1dSRodney W. Grimes {
53149874f6eSJoseph Koshy #ifdef HWPMC_HOOKS
53249874f6eSJoseph Koshy 	struct pmckern_map_out pkm;
53349874f6eSJoseph Koshy 	vm_map_entry_t entry;
534736ff8c3SMateusz Guzik 	bool pmc_handled;
53549874f6eSJoseph Koshy #endif
536496ab053SKonstantin Belousov 	vm_offset_t addr;
53769cdfcefSEdward Tomasz Napierala 	vm_size_t pageoff;
538df8bae1dSRodney W. Grimes 	vm_map_t map;
539df8bae1dSRodney W. Grimes 
540d8834602SAlan Cox 	if (size == 0)
541d8834602SAlan Cox 		return (EINVAL);
542dabee6feSPeter Wemm 
543496ab053SKonstantin Belousov 	addr = addr0;
544dabee6feSPeter Wemm 	pageoff = (addr & PAGE_MASK);
545dabee6feSPeter Wemm 	addr -= pageoff;
546dabee6feSPeter Wemm 	size += pageoff;
547dabee6feSPeter Wemm 	size = (vm_size_t) round_page(size);
5489154ee6aSPeter Wemm 	if (addr + size < addr)
549df8bae1dSRodney W. Grimes 		return (EINVAL);
5509154ee6aSPeter Wemm 
551df8bae1dSRodney W. Grimes 	/*
55205ba50f5SJake Burkholder 	 * Check for illegal addresses.  Watch out for address wrap...
553df8bae1dSRodney W. Grimes 	 */
554b40ce416SJulian Elischer 	map = &td->td_proc->p_vmspace->vm_map;
55505ba50f5SJake Burkholder 	if (addr < vm_map_min(map) || addr + size > vm_map_max(map))
55605ba50f5SJake Burkholder 		return (EINVAL);
557d8834602SAlan Cox 	vm_map_lock(map);
55849874f6eSJoseph Koshy #ifdef HWPMC_HOOKS
559736ff8c3SMateusz Guzik 	pmc_handled = false;
560736ff8c3SMateusz Guzik 	if (PMC_HOOK_INSTALLED(PMC_FN_MUNMAP)) {
561736ff8c3SMateusz Guzik 		pmc_handled = true;
56249874f6eSJoseph Koshy 		/*
56349874f6eSJoseph Koshy 		 * Inform hwpmc if the address range being unmapped contains
56449874f6eSJoseph Koshy 		 * an executable region.
56549874f6eSJoseph Koshy 		 */
5660d419640SRyan Stone 		pkm.pm_address = (uintptr_t) NULL;
56749874f6eSJoseph Koshy 		if (vm_map_lookup_entry(map, addr, &entry)) {
5681c5196c3SKonstantin Belousov 			for (; entry->start < addr + size;
56949874f6eSJoseph Koshy 			    entry = entry->next) {
57049874f6eSJoseph Koshy 				if (vm_map_check_protection(map, entry->start,
57149874f6eSJoseph Koshy 					entry->end, VM_PROT_EXECUTE) == TRUE) {
57249874f6eSJoseph Koshy 					pkm.pm_address = (uintptr_t) addr;
57349874f6eSJoseph Koshy 					pkm.pm_size = (size_t) size;
57449874f6eSJoseph Koshy 					break;
57549874f6eSJoseph Koshy 				}
57649874f6eSJoseph Koshy 			}
57749874f6eSJoseph Koshy 		}
578736ff8c3SMateusz Guzik 	}
57949874f6eSJoseph Koshy #endif
580655c3490SKonstantin Belousov 	vm_map_delete(map, addr, addr + size);
5810d419640SRyan Stone 
5820d419640SRyan Stone #ifdef HWPMC_HOOKS
583736ff8c3SMateusz Guzik 	if (__predict_false(pmc_handled)) {
5840d419640SRyan Stone 		/* downgrade the lock to prevent a LOR with the pmc-sx lock */
5850d419640SRyan Stone 		vm_map_lock_downgrade(map);
586d473d3a1SRyan Stone 		if (pkm.pm_address != (uintptr_t) NULL)
5870d419640SRyan Stone 			PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm);
5880d419640SRyan Stone 		vm_map_unlock_read(map);
589736ff8c3SMateusz Guzik 	} else
5900d419640SRyan Stone #endif
591736ff8c3SMateusz Guzik 		vm_map_unlock(map);
592736ff8c3SMateusz Guzik 
5930d419640SRyan Stone 	/* vm_map_delete returns nothing but KERN_SUCCESS anyway */
594df8bae1dSRodney W. Grimes 	return (0);
595df8bae1dSRodney W. Grimes }
596df8bae1dSRodney W. Grimes 
597d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
598df8bae1dSRodney W. Grimes struct mprotect_args {
599651bb817SAlexander Langer 	const void *addr;
6009154ee6aSPeter Wemm 	size_t len;
601df8bae1dSRodney W. Grimes 	int prot;
602df8bae1dSRodney W. Grimes };
603d2d3e875SBruce Evans #endif
604df8bae1dSRodney W. Grimes int
60569cdfcefSEdward Tomasz Napierala sys_mprotect(struct thread *td, struct mprotect_args *uap)
606df8bae1dSRodney W. Grimes {
607df8bae1dSRodney W. Grimes 
608496ab053SKonstantin Belousov 	return (kern_mprotect(td, (uintptr_t)uap->addr, uap->len, uap->prot));
60969cdfcefSEdward Tomasz Napierala }
610df8bae1dSRodney W. Grimes 
61169cdfcefSEdward Tomasz Napierala int
612496ab053SKonstantin Belousov kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot)
61369cdfcefSEdward Tomasz Napierala {
614496ab053SKonstantin Belousov 	vm_offset_t addr;
61569cdfcefSEdward Tomasz Napierala 	vm_size_t pageoff;
616*74a1b66cSBrooks Davis 	int vm_error, max_prot;
61769cdfcefSEdward Tomasz Napierala 
618496ab053SKonstantin Belousov 	addr = addr0;
619*74a1b66cSBrooks Davis 	if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0)
620*74a1b66cSBrooks Davis 		return (EINVAL);
621*74a1b66cSBrooks Davis 	max_prot = PROT_MAX_EXTRACT(prot);
622*74a1b66cSBrooks Davis 	prot = PROT_EXTRACT(prot);
623dabee6feSPeter Wemm 	pageoff = (addr & PAGE_MASK);
624dabee6feSPeter Wemm 	addr -= pageoff;
625dabee6feSPeter Wemm 	size += pageoff;
626dabee6feSPeter Wemm 	size = (vm_size_t) round_page(size);
6276e1d2cf6SKonstantin Belousov #ifdef COMPAT_FREEBSD32
6286e1d2cf6SKonstantin Belousov 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
6296e1d2cf6SKonstantin Belousov 		if (((addr + size) & 0xffffffff) < addr)
6306e1d2cf6SKonstantin Belousov 			return (EINVAL);
6316e1d2cf6SKonstantin Belousov 	} else
6326e1d2cf6SKonstantin Belousov #endif
6339154ee6aSPeter Wemm 	if (addr + size < addr)
634dabee6feSPeter Wemm 		return (EINVAL);
635dabee6feSPeter Wemm 
636*74a1b66cSBrooks Davis 	vm_error = KERN_SUCCESS;
637*74a1b66cSBrooks Davis 	if (max_prot != 0) {
638*74a1b66cSBrooks Davis 		if ((max_prot & prot) != prot)
639*74a1b66cSBrooks Davis 			return (EINVAL);
640*74a1b66cSBrooks Davis 		vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map,
641*74a1b66cSBrooks Davis 		    addr, addr + size, max_prot, TRUE);
642*74a1b66cSBrooks Davis 	}
643*74a1b66cSBrooks Davis 	if (vm_error == KERN_SUCCESS)
644*74a1b66cSBrooks Davis 		vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map,
645*74a1b66cSBrooks Davis 		    addr, addr + size, prot, FALSE);
646*74a1b66cSBrooks Davis 
647*74a1b66cSBrooks Davis 	switch (vm_error) {
648df8bae1dSRodney W. Grimes 	case KERN_SUCCESS:
649df8bae1dSRodney W. Grimes 		return (0);
650df8bae1dSRodney W. Grimes 	case KERN_PROTECTION_FAILURE:
651df8bae1dSRodney W. Grimes 		return (EACCES);
6523364c323SKonstantin Belousov 	case KERN_RESOURCE_SHORTAGE:
6533364c323SKonstantin Belousov 		return (ENOMEM);
654df8bae1dSRodney W. Grimes 	}
655df8bae1dSRodney W. Grimes 	return (EINVAL);
656df8bae1dSRodney W. Grimes }
657df8bae1dSRodney W. Grimes 
658d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
659dabee6feSPeter Wemm struct minherit_args {
660651bb817SAlexander Langer 	void *addr;
6619154ee6aSPeter Wemm 	size_t len;
662dabee6feSPeter Wemm 	int inherit;
663dabee6feSPeter Wemm };
664dabee6feSPeter Wemm #endif
665dabee6feSPeter Wemm int
66604e89ffbSKonstantin Belousov sys_minherit(struct thread *td, struct minherit_args *uap)
667dabee6feSPeter Wemm {
668dabee6feSPeter Wemm 	vm_offset_t addr;
669dabee6feSPeter Wemm 	vm_size_t size, pageoff;
67054d92145SMatthew Dillon 	vm_inherit_t inherit;
671dabee6feSPeter Wemm 
672dabee6feSPeter Wemm 	addr = (vm_offset_t)uap->addr;
6739154ee6aSPeter Wemm 	size = uap->len;
674dabee6feSPeter Wemm 	inherit = uap->inherit;
675dabee6feSPeter Wemm 
676dabee6feSPeter Wemm 	pageoff = (addr & PAGE_MASK);
677dabee6feSPeter Wemm 	addr -= pageoff;
678dabee6feSPeter Wemm 	size += pageoff;
679dabee6feSPeter Wemm 	size = (vm_size_t) round_page(size);
6809154ee6aSPeter Wemm 	if (addr + size < addr)
681dabee6feSPeter Wemm 		return (EINVAL);
682dabee6feSPeter Wemm 
683e0be79afSAlan Cox 	switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr,
684e0be79afSAlan Cox 	    addr + size, inherit)) {
685dabee6feSPeter Wemm 	case KERN_SUCCESS:
686dabee6feSPeter Wemm 		return (0);
687dabee6feSPeter Wemm 	case KERN_PROTECTION_FAILURE:
688dabee6feSPeter Wemm 		return (EACCES);
689dabee6feSPeter Wemm 	}
690dabee6feSPeter Wemm 	return (EINVAL);
691dabee6feSPeter Wemm }
692dabee6feSPeter Wemm 
693dabee6feSPeter Wemm #ifndef _SYS_SYSPROTO_H_
694df8bae1dSRodney W. Grimes struct madvise_args {
695651bb817SAlexander Langer 	void *addr;
6969154ee6aSPeter Wemm 	size_t len;
697df8bae1dSRodney W. Grimes 	int behav;
698df8bae1dSRodney W. Grimes };
699d2d3e875SBruce Evans #endif
7000d94caffSDavid Greenman 
701df8bae1dSRodney W. Grimes int
70204e89ffbSKonstantin Belousov sys_madvise(struct thread *td, struct madvise_args *uap)
703df8bae1dSRodney W. Grimes {
70469cdfcefSEdward Tomasz Napierala 
705496ab053SKonstantin Belousov 	return (kern_madvise(td, (uintptr_t)uap->addr, uap->len, uap->behav));
70669cdfcefSEdward Tomasz Napierala }
70769cdfcefSEdward Tomasz Napierala 
70869cdfcefSEdward Tomasz Napierala int
709496ab053SKonstantin Belousov kern_madvise(struct thread *td, uintptr_t addr0, size_t len, int behav)
71069cdfcefSEdward Tomasz Napierala {
71105ba50f5SJake Burkholder 	vm_map_t map;
712496ab053SKonstantin Belousov 	vm_offset_t addr, end, start;
71355648840SJohn Baldwin 	int flags;
714b4309055SMatthew Dillon 
715b4309055SMatthew Dillon 	/*
716f4cf2141SWes Peters 	 * Check for our special case, advising the swap pager we are
717f4cf2141SWes Peters 	 * "immortal."
718f4cf2141SWes Peters 	 */
71969cdfcefSEdward Tomasz Napierala 	if (behav == MADV_PROTECT) {
72055648840SJohn Baldwin 		flags = PPROT_SET;
72155648840SJohn Baldwin 		return (kern_procctl(td, P_PID, td->td_proc->p_pid,
72255648840SJohn Baldwin 		    PROC_SPROTECT, &flags));
72369297bf8SJohn Baldwin 	}
72455648840SJohn Baldwin 
725f4cf2141SWes Peters 	/*
726867a482dSJohn Dyson 	 * Check for illegal addresses.  Watch out for address wrap... Note
727867a482dSJohn Dyson 	 * that VM_*_ADDRESS are not constants due to casts (argh).
728867a482dSJohn Dyson 	 */
72905ba50f5SJake Burkholder 	map = &td->td_proc->p_vmspace->vm_map;
730496ab053SKonstantin Belousov 	addr = addr0;
73169cdfcefSEdward Tomasz Napierala 	if (addr < vm_map_min(map) || addr + len > vm_map_max(map))
732867a482dSJohn Dyson 		return (EINVAL);
73369cdfcefSEdward Tomasz Napierala 	if ((addr + len) < addr)
734867a482dSJohn Dyson 		return (EINVAL);
735867a482dSJohn Dyson 
736867a482dSJohn Dyson 	/*
737867a482dSJohn Dyson 	 * Since this routine is only advisory, we default to conservative
738867a482dSJohn Dyson 	 * behavior.
739867a482dSJohn Dyson 	 */
74069cdfcefSEdward Tomasz Napierala 	start = trunc_page(addr);
74169cdfcefSEdward Tomasz Napierala 	end = round_page(addr + len);
742867a482dSJohn Dyson 
7433e7cb27cSAlan Cox 	/*
7443e7cb27cSAlan Cox 	 * vm_map_madvise() checks for illegal values of behav.
7453e7cb27cSAlan Cox 	 */
7463e7cb27cSAlan Cox 	return (vm_map_madvise(map, start, end, behav));
747df8bae1dSRodney W. Grimes }
748df8bae1dSRodney W. Grimes 
749d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
750df8bae1dSRodney W. Grimes struct mincore_args {
751651bb817SAlexander Langer 	const void *addr;
7529154ee6aSPeter Wemm 	size_t len;
753df8bae1dSRodney W. Grimes 	char *vec;
754df8bae1dSRodney W. Grimes };
755d2d3e875SBruce Evans #endif
7560d94caffSDavid Greenman 
757df8bae1dSRodney W. Grimes int
75804e89ffbSKonstantin Belousov sys_mincore(struct thread *td, struct mincore_args *uap)
759df8bae1dSRodney W. Grimes {
76046dc8e9dSDmitry Chagin 
76146dc8e9dSDmitry Chagin 	return (kern_mincore(td, (uintptr_t)uap->addr, uap->len, uap->vec));
76246dc8e9dSDmitry Chagin }
76346dc8e9dSDmitry Chagin 
76446dc8e9dSDmitry Chagin int
76546dc8e9dSDmitry Chagin kern_mincore(struct thread *td, uintptr_t addr0, size_t len, char *vec)
76646dc8e9dSDmitry Chagin {
767867a482dSJohn Dyson 	vm_offset_t addr, first_addr;
768867a482dSJohn Dyson 	vm_offset_t end, cend;
769867a482dSJohn Dyson 	pmap_t pmap;
770867a482dSJohn Dyson 	vm_map_t map;
771d2c60af8SMatthew Dillon 	int error = 0;
772867a482dSJohn Dyson 	int vecindex, lastvecindex;
77354d92145SMatthew Dillon 	vm_map_entry_t current;
774867a482dSJohn Dyson 	vm_map_entry_t entry;
775567e51e1SAlan Cox 	vm_object_t object;
776567e51e1SAlan Cox 	vm_paddr_t locked_pa;
777567e51e1SAlan Cox 	vm_page_t m;
778567e51e1SAlan Cox 	vm_pindex_t pindex;
779867a482dSJohn Dyson 	int mincoreinfo;
780dd2622a8SAlan Cox 	unsigned int timestamp;
781567e51e1SAlan Cox 	boolean_t locked;
782df8bae1dSRodney W. Grimes 
783867a482dSJohn Dyson 	/*
784867a482dSJohn Dyson 	 * Make sure that the addresses presented are valid for user
785867a482dSJohn Dyson 	 * mode.
786867a482dSJohn Dyson 	 */
78746dc8e9dSDmitry Chagin 	first_addr = addr = trunc_page(addr0);
78846dc8e9dSDmitry Chagin 	end = addr + (vm_size_t)round_page(len);
78905ba50f5SJake Burkholder 	map = &td->td_proc->p_vmspace->vm_map;
79005ba50f5SJake Burkholder 	if (end > vm_map_max(map) || end < addr)
791455dd7d4SKonstantin Belousov 		return (ENOMEM);
79202c04a2fSJohn Dyson 
793b40ce416SJulian Elischer 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
794867a482dSJohn Dyson 
795eff50fcdSAlan Cox 	vm_map_lock_read(map);
796dd2622a8SAlan Cox RestartScan:
797dd2622a8SAlan Cox 	timestamp = map->timestamp;
798867a482dSJohn Dyson 
799455dd7d4SKonstantin Belousov 	if (!vm_map_lookup_entry(map, addr, &entry)) {
800455dd7d4SKonstantin Belousov 		vm_map_unlock_read(map);
801455dd7d4SKonstantin Belousov 		return (ENOMEM);
802455dd7d4SKonstantin Belousov 	}
803867a482dSJohn Dyson 
804867a482dSJohn Dyson 	/*
805867a482dSJohn Dyson 	 * Do this on a map entry basis so that if the pages are not
806867a482dSJohn Dyson 	 * in the current processes address space, we can easily look
807867a482dSJohn Dyson 	 * up the pages elsewhere.
808867a482dSJohn Dyson 	 */
809867a482dSJohn Dyson 	lastvecindex = -1;
8101c5196c3SKonstantin Belousov 	for (current = entry; current->start < end; current = current->next) {
811867a482dSJohn Dyson 
812867a482dSJohn Dyson 		/*
813455dd7d4SKonstantin Belousov 		 * check for contiguity
814455dd7d4SKonstantin Belousov 		 */
8151c5196c3SKonstantin Belousov 		if (current->end < end && current->next->start > current->end) {
816455dd7d4SKonstantin Belousov 			vm_map_unlock_read(map);
817455dd7d4SKonstantin Belousov 			return (ENOMEM);
818455dd7d4SKonstantin Belousov 		}
819455dd7d4SKonstantin Belousov 
820455dd7d4SKonstantin Belousov 		/*
821867a482dSJohn Dyson 		 * ignore submaps (for now) or null objects
822867a482dSJohn Dyson 		 */
8239fdfe602SMatthew Dillon 		if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) ||
824867a482dSJohn Dyson 			current->object.vm_object == NULL)
825867a482dSJohn Dyson 			continue;
826867a482dSJohn Dyson 
827867a482dSJohn Dyson 		/*
828867a482dSJohn Dyson 		 * limit this scan to the current map entry and the
829867a482dSJohn Dyson 		 * limits for the mincore call
830867a482dSJohn Dyson 		 */
831867a482dSJohn Dyson 		if (addr < current->start)
832867a482dSJohn Dyson 			addr = current->start;
833867a482dSJohn Dyson 		cend = current->end;
834867a482dSJohn Dyson 		if (cend > end)
835867a482dSJohn Dyson 			cend = end;
836867a482dSJohn Dyson 
837867a482dSJohn Dyson 		/*
838867a482dSJohn Dyson 		 * scan this entry one page at a time
839867a482dSJohn Dyson 		 */
840867a482dSJohn Dyson 		while (addr < cend) {
841867a482dSJohn Dyson 			/*
842867a482dSJohn Dyson 			 * Check pmap first, it is likely faster, also
843867a482dSJohn Dyson 			 * it can provide info as to whether we are the
844867a482dSJohn Dyson 			 * one referencing or modifying the page.
845867a482dSJohn Dyson 			 */
846567e51e1SAlan Cox 			object = NULL;
847567e51e1SAlan Cox 			locked_pa = 0;
848567e51e1SAlan Cox 		retry:
849567e51e1SAlan Cox 			m = NULL;
850567e51e1SAlan Cox 			mincoreinfo = pmap_mincore(pmap, addr, &locked_pa);
8513fbc2e00SKonstantin Belousov 			if (mincore_mapped) {
8523fbc2e00SKonstantin Belousov 				/*
8533fbc2e00SKonstantin Belousov 				 * We only care about this pmap's
8543fbc2e00SKonstantin Belousov 				 * mapping of the page, if any.
8553fbc2e00SKonstantin Belousov 				 */
856567e51e1SAlan Cox 				if (locked_pa != 0) {
8573fbc2e00SKonstantin Belousov 					vm_page_unlock(PHYS_TO_VM_PAGE(
8583fbc2e00SKonstantin Belousov 					    locked_pa));
8593fbc2e00SKonstantin Belousov 				}
8603fbc2e00SKonstantin Belousov 			} else if (locked_pa != 0) {
861867a482dSJohn Dyson 				/*
862567e51e1SAlan Cox 				 * The page is mapped by this process but not
863567e51e1SAlan Cox 				 * both accessed and modified.  It is also
864567e51e1SAlan Cox 				 * managed.  Acquire the object lock so that
865567e51e1SAlan Cox 				 * other mappings might be examined.
866867a482dSJohn Dyson 				 */
867567e51e1SAlan Cox 				m = PHYS_TO_VM_PAGE(locked_pa);
868567e51e1SAlan Cox 				if (m->object != object) {
869567e51e1SAlan Cox 					if (object != NULL)
87089f6b863SAttilio Rao 						VM_OBJECT_WUNLOCK(object);
871567e51e1SAlan Cox 					object = m->object;
87289f6b863SAttilio Rao 					locked = VM_OBJECT_TRYWLOCK(object);
873567e51e1SAlan Cox 					vm_page_unlock(m);
874567e51e1SAlan Cox 					if (!locked) {
87589f6b863SAttilio Rao 						VM_OBJECT_WLOCK(object);
8762965a453SKip Macy 						vm_page_lock(m);
877567e51e1SAlan Cox 						goto retry;
878567e51e1SAlan Cox 					}
879567e51e1SAlan Cox 				} else
880567e51e1SAlan Cox 					vm_page_unlock(m);
881567e51e1SAlan Cox 				KASSERT(m->valid == VM_PAGE_BITS_ALL,
882567e51e1SAlan Cox 				    ("mincore: page %p is mapped but invalid",
883567e51e1SAlan Cox 				    m));
884567e51e1SAlan Cox 			} else if (mincoreinfo == 0) {
885567e51e1SAlan Cox 				/*
886567e51e1SAlan Cox 				 * The page is not mapped by this process.  If
887567e51e1SAlan Cox 				 * the object implements managed pages, then
888567e51e1SAlan Cox 				 * determine if the page is resident so that
889567e51e1SAlan Cox 				 * the mappings might be examined.
890567e51e1SAlan Cox 				 */
891567e51e1SAlan Cox 				if (current->object.vm_object != object) {
892567e51e1SAlan Cox 					if (object != NULL)
89389f6b863SAttilio Rao 						VM_OBJECT_WUNLOCK(object);
894567e51e1SAlan Cox 					object = current->object.vm_object;
89589f6b863SAttilio Rao 					VM_OBJECT_WLOCK(object);
896567e51e1SAlan Cox 				}
897567e51e1SAlan Cox 				if (object->type == OBJT_DEFAULT ||
898567e51e1SAlan Cox 				    object->type == OBJT_SWAP ||
899567e51e1SAlan Cox 				    object->type == OBJT_VNODE) {
900567e51e1SAlan Cox 					pindex = OFF_TO_IDX(current->offset +
901567e51e1SAlan Cox 					    (addr - current->start));
902567e51e1SAlan Cox 					m = vm_page_lookup(object, pindex);
903567e51e1SAlan Cox 					if (m != NULL && m->valid == 0)
904567e51e1SAlan Cox 						m = NULL;
905567e51e1SAlan Cox 					if (m != NULL)
906567e51e1SAlan Cox 						mincoreinfo = MINCORE_INCORE;
907567e51e1SAlan Cox 				}
908567e51e1SAlan Cox 			}
909567e51e1SAlan Cox 			if (m != NULL) {
910567e51e1SAlan Cox 				/* Examine other mappings to the page. */
911567e51e1SAlan Cox 				if (m->dirty == 0 && pmap_is_modified(m))
912567e51e1SAlan Cox 					vm_page_dirty(m);
913567e51e1SAlan Cox 				if (m->dirty != 0)
914867a482dSJohn Dyson 					mincoreinfo |= MINCORE_MODIFIED_OTHER;
915c46b90e9SAlan Cox 				/*
9163407fefeSKonstantin Belousov 				 * The first test for PGA_REFERENCED is an
917c46b90e9SAlan Cox 				 * optimization.  The second test is
918c46b90e9SAlan Cox 				 * required because a concurrent pmap
919c46b90e9SAlan Cox 				 * operation could clear the last reference
9203407fefeSKonstantin Belousov 				 * and set PGA_REFERENCED before the call to
921c46b90e9SAlan Cox 				 * pmap_is_referenced().
922c46b90e9SAlan Cox 				 */
9233407fefeSKonstantin Belousov 				if ((m->aflags & PGA_REFERENCED) != 0 ||
924c46b90e9SAlan Cox 				    pmap_is_referenced(m) ||
9253407fefeSKonstantin Belousov 				    (m->aflags & PGA_REFERENCED) != 0)
926867a482dSJohn Dyson 					mincoreinfo |= MINCORE_REFERENCED_OTHER;
9279b5a5d81SJohn Dyson 			}
928567e51e1SAlan Cox 			if (object != NULL)
92989f6b863SAttilio Rao 				VM_OBJECT_WUNLOCK(object);
930867a482dSJohn Dyson 
931867a482dSJohn Dyson 			/*
932dd2622a8SAlan Cox 			 * subyte may page fault.  In case it needs to modify
933dd2622a8SAlan Cox 			 * the map, we release the lock.
934dd2622a8SAlan Cox 			 */
935dd2622a8SAlan Cox 			vm_map_unlock_read(map);
936dd2622a8SAlan Cox 
937dd2622a8SAlan Cox 			/*
938867a482dSJohn Dyson 			 * calculate index into user supplied byte vector
939867a482dSJohn Dyson 			 */
940d1780e8dSKonstantin Belousov 			vecindex = atop(addr - first_addr);
941867a482dSJohn Dyson 
942867a482dSJohn Dyson 			/*
943867a482dSJohn Dyson 			 * If we have skipped map entries, we need to make sure that
944867a482dSJohn Dyson 			 * the byte vector is zeroed for those skipped entries.
945867a482dSJohn Dyson 			 */
946867a482dSJohn Dyson 			while ((lastvecindex + 1) < vecindex) {
9476a87d217SJohn Baldwin 				++lastvecindex;
948867a482dSJohn Dyson 				error = subyte(vec + lastvecindex, 0);
949867a482dSJohn Dyson 				if (error) {
950d2c60af8SMatthew Dillon 					error = EFAULT;
951d2c60af8SMatthew Dillon 					goto done2;
952867a482dSJohn Dyson 				}
953867a482dSJohn Dyson 			}
954867a482dSJohn Dyson 
955867a482dSJohn Dyson 			/*
956867a482dSJohn Dyson 			 * Pass the page information to the user
957867a482dSJohn Dyson 			 */
958867a482dSJohn Dyson 			error = subyte(vec + vecindex, mincoreinfo);
959867a482dSJohn Dyson 			if (error) {
960d2c60af8SMatthew Dillon 				error = EFAULT;
961d2c60af8SMatthew Dillon 				goto done2;
962867a482dSJohn Dyson 			}
963dd2622a8SAlan Cox 
964dd2622a8SAlan Cox 			/*
965dd2622a8SAlan Cox 			 * If the map has changed, due to the subyte, the previous
966dd2622a8SAlan Cox 			 * output may be invalid.
967dd2622a8SAlan Cox 			 */
968dd2622a8SAlan Cox 			vm_map_lock_read(map);
969dd2622a8SAlan Cox 			if (timestamp != map->timestamp)
970dd2622a8SAlan Cox 				goto RestartScan;
971dd2622a8SAlan Cox 
972867a482dSJohn Dyson 			lastvecindex = vecindex;
97302c04a2fSJohn Dyson 			addr += PAGE_SIZE;
97402c04a2fSJohn Dyson 		}
975867a482dSJohn Dyson 	}
976867a482dSJohn Dyson 
977867a482dSJohn Dyson 	/*
978dd2622a8SAlan Cox 	 * subyte may page fault.  In case it needs to modify
979dd2622a8SAlan Cox 	 * the map, we release the lock.
980dd2622a8SAlan Cox 	 */
981dd2622a8SAlan Cox 	vm_map_unlock_read(map);
982dd2622a8SAlan Cox 
983dd2622a8SAlan Cox 	/*
984867a482dSJohn Dyson 	 * Zero the last entries in the byte vector.
985867a482dSJohn Dyson 	 */
986d1780e8dSKonstantin Belousov 	vecindex = atop(end - first_addr);
987867a482dSJohn Dyson 	while ((lastvecindex + 1) < vecindex) {
9886a87d217SJohn Baldwin 		++lastvecindex;
989867a482dSJohn Dyson 		error = subyte(vec + lastvecindex, 0);
990867a482dSJohn Dyson 		if (error) {
991d2c60af8SMatthew Dillon 			error = EFAULT;
992d2c60af8SMatthew Dillon 			goto done2;
993867a482dSJohn Dyson 		}
994867a482dSJohn Dyson 	}
995867a482dSJohn Dyson 
996dd2622a8SAlan Cox 	/*
997dd2622a8SAlan Cox 	 * If the map has changed, due to the subyte, the previous
998dd2622a8SAlan Cox 	 * output may be invalid.
999dd2622a8SAlan Cox 	 */
1000dd2622a8SAlan Cox 	vm_map_lock_read(map);
1001dd2622a8SAlan Cox 	if (timestamp != map->timestamp)
1002dd2622a8SAlan Cox 		goto RestartScan;
1003eff50fcdSAlan Cox 	vm_map_unlock_read(map);
1004d2c60af8SMatthew Dillon done2:
1005d2c60af8SMatthew Dillon 	return (error);
1006df8bae1dSRodney W. Grimes }
1007df8bae1dSRodney W. Grimes 
1008d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
1009df8bae1dSRodney W. Grimes struct mlock_args {
1010651bb817SAlexander Langer 	const void *addr;
1011df8bae1dSRodney W. Grimes 	size_t len;
1012df8bae1dSRodney W. Grimes };
1013d2d3e875SBruce Evans #endif
1014df8bae1dSRodney W. Grimes int
101504e89ffbSKonstantin Belousov sys_mlock(struct thread *td, struct mlock_args *uap)
1016df8bae1dSRodney W. Grimes {
1017995d7069SGleb Smirnoff 
1018496ab053SKonstantin Belousov 	return (kern_mlock(td->td_proc, td->td_ucred,
1019496ab053SKonstantin Belousov 	    __DECONST(uintptr_t, uap->addr), uap->len));
1020995d7069SGleb Smirnoff }
1021995d7069SGleb Smirnoff 
1022995d7069SGleb Smirnoff int
1023496ab053SKonstantin Belousov kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len)
1024995d7069SGleb Smirnoff {
1025bb734798SDon Lewis 	vm_offset_t addr, end, last, start;
1026bb734798SDon Lewis 	vm_size_t npages, size;
10273ac7d297SAndrey Zonov 	vm_map_t map;
10281ba5ad42SEdward Tomasz Napierala 	unsigned long nsize;
1029bb734798SDon Lewis 	int error;
1030df8bae1dSRodney W. Grimes 
1031cc426dd3SMateusz Guzik 	error = priv_check_cred(cred, PRIV_VM_MLOCK);
103247934cefSDon Lewis 	if (error)
103347934cefSDon Lewis 		return (error);
1034496ab053SKonstantin Belousov 	addr = addr0;
1035995d7069SGleb Smirnoff 	size = len;
1036bb734798SDon Lewis 	last = addr + size;
103716929939SDon Lewis 	start = trunc_page(addr);
1038bb734798SDon Lewis 	end = round_page(last);
1039bb734798SDon Lewis 	if (last < addr || end < addr)
1040df8bae1dSRodney W. Grimes 		return (EINVAL);
104116929939SDon Lewis 	npages = atop(end - start);
104254a3a114SMark Johnston 	if (npages > vm_page_max_user_wired)
104316929939SDon Lewis 		return (ENOMEM);
10443ac7d297SAndrey Zonov 	map = &proc->p_vmspace->vm_map;
104547934cefSDon Lewis 	PROC_LOCK(proc);
10463ac7d297SAndrey Zonov 	nsize = ptoa(npages + pmap_wired_count(map->pmap));
1047f6f6d240SMateusz Guzik 	if (nsize > lim_cur_proc(proc, RLIMIT_MEMLOCK)) {
104847934cefSDon Lewis 		PROC_UNLOCK(proc);
10494a40e3d4SJohn Dyson 		return (ENOMEM);
105091d5354aSJohn Baldwin 	}
105147934cefSDon Lewis 	PROC_UNLOCK(proc);
1052afcc55f3SEdward Tomasz Napierala #ifdef RACCT
10534b5c9cf6SEdward Tomasz Napierala 	if (racct_enable) {
10541ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(proc);
10551ba5ad42SEdward Tomasz Napierala 		error = racct_set(proc, RACCT_MEMLOCK, nsize);
10561ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(proc);
10571ba5ad42SEdward Tomasz Napierala 		if (error != 0)
10581ba5ad42SEdward Tomasz Napierala 			return (ENOMEM);
10594b5c9cf6SEdward Tomasz Napierala 	}
1060afcc55f3SEdward Tomasz Napierala #endif
10613ac7d297SAndrey Zonov 	error = vm_map_wire(map, start, end,
106216929939SDon Lewis 	    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
1063afcc55f3SEdward Tomasz Napierala #ifdef RACCT
10644b5c9cf6SEdward Tomasz Napierala 	if (racct_enable && error != KERN_SUCCESS) {
10651ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(proc);
10661ba5ad42SEdward Tomasz Napierala 		racct_set(proc, RACCT_MEMLOCK,
10673ac7d297SAndrey Zonov 		    ptoa(pmap_wired_count(map->pmap)));
10681ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(proc);
10691ba5ad42SEdward Tomasz Napierala 	}
1070afcc55f3SEdward Tomasz Napierala #endif
1071df8bae1dSRodney W. Grimes 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
1072df8bae1dSRodney W. Grimes }
1073df8bae1dSRodney W. Grimes 
1074d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
10754a40e3d4SJohn Dyson struct mlockall_args {
10764a40e3d4SJohn Dyson 	int	how;
10774a40e3d4SJohn Dyson };
10784a40e3d4SJohn Dyson #endif
10794a40e3d4SJohn Dyson 
10804a40e3d4SJohn Dyson int
108104e89ffbSKonstantin Belousov sys_mlockall(struct thread *td, struct mlockall_args *uap)
10824a40e3d4SJohn Dyson {
1083abd498aaSBruce M Simpson 	vm_map_t map;
1084abd498aaSBruce M Simpson 	int error;
1085abd498aaSBruce M Simpson 
1086abd498aaSBruce M Simpson 	map = &td->td_proc->p_vmspace->vm_map;
10877e19eda4SAndrey Zonov 	error = priv_check(td, PRIV_VM_MLOCK);
10887e19eda4SAndrey Zonov 	if (error)
10897e19eda4SAndrey Zonov 		return (error);
1090abd498aaSBruce M Simpson 
1091abd498aaSBruce M Simpson 	if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0))
1092abd498aaSBruce M Simpson 		return (EINVAL);
1093abd498aaSBruce M Simpson 
1094abd498aaSBruce M Simpson 	/*
1095abd498aaSBruce M Simpson 	 * If wiring all pages in the process would cause it to exceed
1096abd498aaSBruce M Simpson 	 * a hard resource limit, return ENOMEM.
1097abd498aaSBruce M Simpson 	 */
10987e19eda4SAndrey Zonov 	if (!old_mlock && uap->how & MCL_CURRENT) {
10992554f86aSMateusz Guzik 		if (map->size > lim_cur(td, RLIMIT_MEMLOCK))
1100abd498aaSBruce M Simpson 			return (ENOMEM);
110191d5354aSJohn Baldwin 	}
1102afcc55f3SEdward Tomasz Napierala #ifdef RACCT
11034b5c9cf6SEdward Tomasz Napierala 	if (racct_enable) {
11041ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(td->td_proc);
11051ba5ad42SEdward Tomasz Napierala 		error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size);
11061ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(td->td_proc);
11071ba5ad42SEdward Tomasz Napierala 		if (error != 0)
11081ba5ad42SEdward Tomasz Napierala 			return (ENOMEM);
11094b5c9cf6SEdward Tomasz Napierala 	}
1110afcc55f3SEdward Tomasz Napierala #endif
1111abd498aaSBruce M Simpson 
1112abd498aaSBruce M Simpson 	if (uap->how & MCL_FUTURE) {
1113abd498aaSBruce M Simpson 		vm_map_lock(map);
1114abd498aaSBruce M Simpson 		vm_map_modflags(map, MAP_WIREFUTURE, 0);
1115abd498aaSBruce M Simpson 		vm_map_unlock(map);
1116abd498aaSBruce M Simpson 		error = 0;
1117abd498aaSBruce M Simpson 	}
1118abd498aaSBruce M Simpson 
1119abd498aaSBruce M Simpson 	if (uap->how & MCL_CURRENT) {
1120abd498aaSBruce M Simpson 		/*
1121abd498aaSBruce M Simpson 		 * P1003.1-2001 mandates that all currently mapped pages
1122abd498aaSBruce M Simpson 		 * will be memory resident and locked (wired) upon return
1123abd498aaSBruce M Simpson 		 * from mlockall(). vm_map_wire() will wire pages, by
1124abd498aaSBruce M Simpson 		 * calling vm_fault_wire() for each page in the region.
1125abd498aaSBruce M Simpson 		 */
1126abd498aaSBruce M Simpson 		error = vm_map_wire(map, vm_map_min(map), vm_map_max(map),
1127abd498aaSBruce M Simpson 		    VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK);
112854a3a114SMark Johnston 		if (error == KERN_SUCCESS)
112954a3a114SMark Johnston 			error = 0;
113054a3a114SMark Johnston 		else if (error == KERN_RESOURCE_SHORTAGE)
113154a3a114SMark Johnston 			error = ENOMEM;
113254a3a114SMark Johnston 		else
113354a3a114SMark Johnston 			error = EAGAIN;
1134abd498aaSBruce M Simpson 	}
1135afcc55f3SEdward Tomasz Napierala #ifdef RACCT
11364b5c9cf6SEdward Tomasz Napierala 	if (racct_enable && error != KERN_SUCCESS) {
11371ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(td->td_proc);
11381ba5ad42SEdward Tomasz Napierala 		racct_set(td->td_proc, RACCT_MEMLOCK,
11393ac7d297SAndrey Zonov 		    ptoa(pmap_wired_count(map->pmap)));
11401ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(td->td_proc);
11411ba5ad42SEdward Tomasz Napierala 	}
1142afcc55f3SEdward Tomasz Napierala #endif
1143abd498aaSBruce M Simpson 
1144abd498aaSBruce M Simpson 	return (error);
11454a40e3d4SJohn Dyson }
11464a40e3d4SJohn Dyson 
11474a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_
1148fa721254SAlfred Perlstein struct munlockall_args {
1149abd498aaSBruce M Simpson 	register_t dummy;
11504a40e3d4SJohn Dyson };
11514a40e3d4SJohn Dyson #endif
11524a40e3d4SJohn Dyson 
11534a40e3d4SJohn Dyson int
115404e89ffbSKonstantin Belousov sys_munlockall(struct thread *td, struct munlockall_args *uap)
11554a40e3d4SJohn Dyson {
1156abd498aaSBruce M Simpson 	vm_map_t map;
1157abd498aaSBruce M Simpson 	int error;
1158abd498aaSBruce M Simpson 
1159abd498aaSBruce M Simpson 	map = &td->td_proc->p_vmspace->vm_map;
1160acd3428bSRobert Watson 	error = priv_check(td, PRIV_VM_MUNLOCK);
1161abd498aaSBruce M Simpson 	if (error)
1162abd498aaSBruce M Simpson 		return (error);
1163abd498aaSBruce M Simpson 
1164abd498aaSBruce M Simpson 	/* Clear the MAP_WIREFUTURE flag from this vm_map. */
1165abd498aaSBruce M Simpson 	vm_map_lock(map);
1166abd498aaSBruce M Simpson 	vm_map_modflags(map, 0, MAP_WIREFUTURE);
1167abd498aaSBruce M Simpson 	vm_map_unlock(map);
1168abd498aaSBruce M Simpson 
1169abd498aaSBruce M Simpson 	/* Forcibly unwire all pages. */
1170abd498aaSBruce M Simpson 	error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map),
1171abd498aaSBruce M Simpson 	    VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK);
1172afcc55f3SEdward Tomasz Napierala #ifdef RACCT
11734b5c9cf6SEdward Tomasz Napierala 	if (racct_enable && error == KERN_SUCCESS) {
11741ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(td->td_proc);
11751ba5ad42SEdward Tomasz Napierala 		racct_set(td->td_proc, RACCT_MEMLOCK, 0);
11761ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(td->td_proc);
11771ba5ad42SEdward Tomasz Napierala 	}
1178afcc55f3SEdward Tomasz Napierala #endif
1179abd498aaSBruce M Simpson 
1180abd498aaSBruce M Simpson 	return (error);
11814a40e3d4SJohn Dyson }
11824a40e3d4SJohn Dyson 
11834a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_
1184df8bae1dSRodney W. Grimes struct munlock_args {
1185651bb817SAlexander Langer 	const void *addr;
1186df8bae1dSRodney W. Grimes 	size_t len;
1187df8bae1dSRodney W. Grimes };
1188d2d3e875SBruce Evans #endif
1189df8bae1dSRodney W. Grimes int
119069cdfcefSEdward Tomasz Napierala sys_munlock(struct thread *td, struct munlock_args *uap)
1191df8bae1dSRodney W. Grimes {
119269cdfcefSEdward Tomasz Napierala 
1193496ab053SKonstantin Belousov 	return (kern_munlock(td, (uintptr_t)uap->addr, uap->len));
119469cdfcefSEdward Tomasz Napierala }
119569cdfcefSEdward Tomasz Napierala 
119669cdfcefSEdward Tomasz Napierala int
1197496ab053SKonstantin Belousov kern_munlock(struct thread *td, uintptr_t addr0, size_t size)
119869cdfcefSEdward Tomasz Napierala {
1199496ab053SKonstantin Belousov 	vm_offset_t addr, end, last, start;
1200fc2b1679SJeremie Le Hen #ifdef RACCT
1201c92b5069SJeremie Le Hen 	vm_map_t map;
1202fc2b1679SJeremie Le Hen #endif
1203df8bae1dSRodney W. Grimes 	int error;
1204df8bae1dSRodney W. Grimes 
1205acd3428bSRobert Watson 	error = priv_check(td, PRIV_VM_MUNLOCK);
120647934cefSDon Lewis 	if (error)
120747934cefSDon Lewis 		return (error);
1208496ab053SKonstantin Belousov 	addr = addr0;
1209bb734798SDon Lewis 	last = addr + size;
121016929939SDon Lewis 	start = trunc_page(addr);
1211bb734798SDon Lewis 	end = round_page(last);
1212bb734798SDon Lewis 	if (last < addr || end < addr)
1213df8bae1dSRodney W. Grimes 		return (EINVAL);
121416929939SDon Lewis 	error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end,
121516929939SDon Lewis 	    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
1216afcc55f3SEdward Tomasz Napierala #ifdef RACCT
12174b5c9cf6SEdward Tomasz Napierala 	if (racct_enable && error == KERN_SUCCESS) {
12181ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(td->td_proc);
1219c92b5069SJeremie Le Hen 		map = &td->td_proc->p_vmspace->vm_map;
1220c92b5069SJeremie Le Hen 		racct_set(td->td_proc, RACCT_MEMLOCK,
1221c92b5069SJeremie Le Hen 		    ptoa(pmap_wired_count(map->pmap)));
12221ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(td->td_proc);
12231ba5ad42SEdward Tomasz Napierala 	}
1224afcc55f3SEdward Tomasz Napierala #endif
1225df8bae1dSRodney W. Grimes 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
1226df8bae1dSRodney W. Grimes }
1227df8bae1dSRodney W. Grimes 
1228df8bae1dSRodney W. Grimes /*
1229c8daea13SAlexander Kabaev  * vm_mmap_vnode()
1230c8daea13SAlexander Kabaev  *
1231c8daea13SAlexander Kabaev  * Helper function for vm_mmap.  Perform sanity check specific for mmap
1232c8daea13SAlexander Kabaev  * operations on vnodes.
1233c8daea13SAlexander Kabaev  */
1234c8daea13SAlexander Kabaev int
1235c8daea13SAlexander Kabaev vm_mmap_vnode(struct thread *td, vm_size_t objsize,
1236c8daea13SAlexander Kabaev     vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
123784110e7eSKonstantin Belousov     struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp,
123884110e7eSKonstantin Belousov     boolean_t *writecounted)
1239c8daea13SAlexander Kabaev {
1240c8daea13SAlexander Kabaev 	struct vattr va;
1241c8daea13SAlexander Kabaev 	vm_object_t obj;
1242bd0e1bebSMark Johnston 	vm_ooffset_t foff;
12430359a12eSAttilio Rao 	struct ucred *cred;
124478022527SKonstantin Belousov 	int error, flags;
124578022527SKonstantin Belousov 	bool writex;
1246c8daea13SAlexander Kabaev 
12470359a12eSAttilio Rao 	cred = td->td_ucred;
124878022527SKonstantin Belousov 	writex = (*maxprotp & VM_PROT_WRITE) != 0 &&
124978022527SKonstantin Belousov 	    (*flagsp & MAP_SHARED) != 0;
125078022527SKonstantin Belousov 	if ((error = vget(vp, LK_SHARED, td)) != 0)
1251c8daea13SAlexander Kabaev 		return (error);
12520df42647SRobert Watson 	AUDIT_ARG_VNODE1(vp);
125364345f0bSJohn Baldwin 	foff = *foffp;
1254c8daea13SAlexander Kabaev 	flags = *flagsp;
12558516dd18SPoul-Henning Kamp 	obj = vp->v_object;
1256c8daea13SAlexander Kabaev 	if (vp->v_type == VREG) {
1257c8daea13SAlexander Kabaev 		/*
1258c8daea13SAlexander Kabaev 		 * Get the proper underlying object
1259c8daea13SAlexander Kabaev 		 */
12608516dd18SPoul-Henning Kamp 		if (obj == NULL) {
1261c8daea13SAlexander Kabaev 			error = EINVAL;
1262c8daea13SAlexander Kabaev 			goto done;
1263c8daea13SAlexander Kabaev 		}
1264e5f299ffSKonstantin Belousov 		if (obj->type == OBJT_VNODE && obj->handle != vp) {
1265c8daea13SAlexander Kabaev 			vput(vp);
1266c8daea13SAlexander Kabaev 			vp = (struct vnode *)obj->handle;
126784110e7eSKonstantin Belousov 			/*
126884110e7eSKonstantin Belousov 			 * Bypass filesystems obey the mpsafety of the
126953f5f8a0SKonstantin Belousov 			 * underlying fs.  Tmpfs never bypasses.
127084110e7eSKonstantin Belousov 			 */
127178022527SKonstantin Belousov 			error = vget(vp, LK_SHARED, td);
12725050aa86SKonstantin Belousov 			if (error != 0)
127384110e7eSKonstantin Belousov 				return (error);
127484110e7eSKonstantin Belousov 		}
127578022527SKonstantin Belousov 		if (writex) {
127684110e7eSKonstantin Belousov 			*writecounted = TRUE;
127784110e7eSKonstantin Belousov 			vnode_pager_update_writecount(obj, 0, objsize);
127884110e7eSKonstantin Belousov 		}
1279c8daea13SAlexander Kabaev 	} else {
1280c8daea13SAlexander Kabaev 		error = EINVAL;
1281c8daea13SAlexander Kabaev 		goto done;
1282c8daea13SAlexander Kabaev 	}
12830359a12eSAttilio Rao 	if ((error = VOP_GETATTR(vp, &va, cred)))
1284c8daea13SAlexander Kabaev 		goto done;
1285c92163dcSChristian S.J. Peron #ifdef MAC
12867077c426SJohn Baldwin 	/* This relies on VM_PROT_* matching PROT_*. */
12877077c426SJohn Baldwin 	error = mac_vnode_check_mmap(cred, vp, (int)prot, flags);
1288c92163dcSChristian S.J. Peron 	if (error != 0)
1289c92163dcSChristian S.J. Peron 		goto done;
1290c92163dcSChristian S.J. Peron #endif
1291c8daea13SAlexander Kabaev 	if ((flags & MAP_SHARED) != 0) {
1292c8daea13SAlexander Kabaev 		if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) {
12937077c426SJohn Baldwin 			if (prot & VM_PROT_WRITE) {
1294c8daea13SAlexander Kabaev 				error = EPERM;
1295c8daea13SAlexander Kabaev 				goto done;
1296c8daea13SAlexander Kabaev 			}
1297c8daea13SAlexander Kabaev 			*maxprotp &= ~VM_PROT_WRITE;
1298c8daea13SAlexander Kabaev 		}
1299c8daea13SAlexander Kabaev 	}
1300c8daea13SAlexander Kabaev 	/*
1301c8daea13SAlexander Kabaev 	 * If it is a regular file without any references
1302c8daea13SAlexander Kabaev 	 * we do not need to sync it.
1303c8daea13SAlexander Kabaev 	 * Adjust object size to be the size of actual file.
1304c8daea13SAlexander Kabaev 	 */
1305c8daea13SAlexander Kabaev 	objsize = round_page(va.va_size);
1306c8daea13SAlexander Kabaev 	if (va.va_nlink == 0)
1307c8daea13SAlexander Kabaev 		flags |= MAP_NOSYNC;
13083d653db0SAlan Cox 	if (obj->type == OBJT_VNODE) {
1309e5f299ffSKonstantin Belousov 		obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff,
1310e5f299ffSKonstantin Belousov 		    cred);
1311c8daea13SAlexander Kabaev 		if (obj == NULL) {
131264345f0bSJohn Baldwin 			error = ENOMEM;
1313c8daea13SAlexander Kabaev 			goto done;
1314c8daea13SAlexander Kabaev 		}
13153d653db0SAlan Cox 	} else {
13163d653db0SAlan Cox 		KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP,
13173d653db0SAlan Cox 		    ("wrong object type"));
13183d653db0SAlan Cox 		VM_OBJECT_WLOCK(obj);
13193d653db0SAlan Cox 		vm_object_reference_locked(obj);
13203d653db0SAlan Cox #if VM_NRESERVLEVEL > 0
13213d653db0SAlan Cox 		vm_object_color(obj, 0);
13223d653db0SAlan Cox #endif
13233d653db0SAlan Cox 		VM_OBJECT_WUNLOCK(obj);
13243d653db0SAlan Cox 	}
1325c8daea13SAlexander Kabaev 	*objp = obj;
1326c8daea13SAlexander Kabaev 	*flagsp = flags;
132764345f0bSJohn Baldwin 
13280359a12eSAttilio Rao 	vfs_mark_atime(vp, cred);
13291e309003SDiomidis Spinellis 
1330c8daea13SAlexander Kabaev done:
1331bafa6cfcSKonstantin Belousov 	if (error != 0 && *writecounted) {
1332bafa6cfcSKonstantin Belousov 		*writecounted = FALSE;
1333bafa6cfcSKonstantin Belousov 		vnode_pager_update_writecount(obj, objsize, 0);
1334bafa6cfcSKonstantin Belousov 	}
1335c8daea13SAlexander Kabaev 	vput(vp);
1336c8daea13SAlexander Kabaev 	return (error);
1337c8daea13SAlexander Kabaev }
1338c8daea13SAlexander Kabaev 
1339c8daea13SAlexander Kabaev /*
134098df9218SJohn Baldwin  * vm_mmap_cdev()
134198df9218SJohn Baldwin  *
134298df9218SJohn Baldwin  * Helper function for vm_mmap.  Perform sanity check specific for mmap
134398df9218SJohn Baldwin  * operations on cdevs.
134498df9218SJohn Baldwin  */
134598df9218SJohn Baldwin int
13467077c426SJohn Baldwin vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot,
13477077c426SJohn Baldwin     vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw,
13487077c426SJohn Baldwin     vm_ooffset_t *foff, vm_object_t *objp)
134998df9218SJohn Baldwin {
135098df9218SJohn Baldwin 	vm_object_t obj;
13517077c426SJohn Baldwin 	int error, flags;
135298df9218SJohn Baldwin 
135398df9218SJohn Baldwin 	flags = *flagsp;
135498df9218SJohn Baldwin 
135591a35e78SKonstantin Belousov 	if (dsw->d_flags & D_MMAP_ANON) {
13567077c426SJohn Baldwin 		*objp = NULL;
13577077c426SJohn Baldwin 		*foff = 0;
135898df9218SJohn Baldwin 		*maxprotp = VM_PROT_ALL;
135998df9218SJohn Baldwin 		*flagsp |= MAP_ANON;
136098df9218SJohn Baldwin 		return (0);
136198df9218SJohn Baldwin 	}
136298df9218SJohn Baldwin 	/*
136364345f0bSJohn Baldwin 	 * cdevs do not provide private mappings of any kind.
136498df9218SJohn Baldwin 	 */
136598df9218SJohn Baldwin 	if ((*maxprotp & VM_PROT_WRITE) == 0 &&
13667077c426SJohn Baldwin 	    (prot & VM_PROT_WRITE) != 0)
136798df9218SJohn Baldwin 		return (EACCES);
13687077c426SJohn Baldwin 	if (flags & (MAP_PRIVATE|MAP_COPY))
136998df9218SJohn Baldwin 		return (EINVAL);
137098df9218SJohn Baldwin 	/*
137198df9218SJohn Baldwin 	 * Force device mappings to be shared.
137298df9218SJohn Baldwin 	 */
137398df9218SJohn Baldwin 	flags |= MAP_SHARED;
137498df9218SJohn Baldwin #ifdef MAC_XXX
13757077c426SJohn Baldwin 	error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot);
13767077c426SJohn Baldwin 	if (error != 0)
137798df9218SJohn Baldwin 		return (error);
137898df9218SJohn Baldwin #endif
137964345f0bSJohn Baldwin 	/*
138064345f0bSJohn Baldwin 	 * First, try d_mmap_single().  If that is not implemented
138164345f0bSJohn Baldwin 	 * (returns ENODEV), fall back to using the device pager.
138264345f0bSJohn Baldwin 	 * Note that d_mmap_single() must return a reference to the
138364345f0bSJohn Baldwin 	 * object (it needs to bump the reference count of the object
138464345f0bSJohn Baldwin 	 * it returns somehow).
138564345f0bSJohn Baldwin 	 *
138664345f0bSJohn Baldwin 	 * XXX assumes VM_PROT_* == PROT_*
138764345f0bSJohn Baldwin 	 */
138864345f0bSJohn Baldwin 	error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot);
138964345f0bSJohn Baldwin 	if (error != ENODEV)
139064345f0bSJohn Baldwin 		return (error);
13913364c323SKonstantin Belousov 	obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff,
13923364c323SKonstantin Belousov 	    td->td_ucred);
139398df9218SJohn Baldwin 	if (obj == NULL)
139498df9218SJohn Baldwin 		return (EINVAL);
139598df9218SJohn Baldwin 	*objp = obj;
139698df9218SJohn Baldwin 	*flagsp = flags;
139798df9218SJohn Baldwin 	return (0);
139898df9218SJohn Baldwin }
139998df9218SJohn Baldwin 
140098df9218SJohn Baldwin /*
1401d2c60af8SMatthew Dillon  * vm_mmap()
1402d2c60af8SMatthew Dillon  *
14037077c426SJohn Baldwin  * Internal version of mmap used by exec, sys5 shared memory, and
14047077c426SJohn Baldwin  * various device drivers.  Handle is either a vnode pointer, a
14057077c426SJohn Baldwin  * character device, or NULL for MAP_ANON.
1406df8bae1dSRodney W. Grimes  */
1407df8bae1dSRodney W. Grimes int
1408b9dcd593SBruce Evans vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
1409b9dcd593SBruce Evans 	vm_prot_t maxprot, int flags,
141098df9218SJohn Baldwin 	objtype_t handle_type, void *handle,
1411b9dcd593SBruce Evans 	vm_ooffset_t foff)
1412df8bae1dSRodney W. Grimes {
14137077c426SJohn Baldwin 	vm_object_t object;
1414b40ce416SJulian Elischer 	struct thread *td = curthread;
14157077c426SJohn Baldwin 	int error;
141684110e7eSKonstantin Belousov 	boolean_t writecounted;
1417df8bae1dSRodney W. Grimes 
1418df8bae1dSRodney W. Grimes 	if (size == 0)
14197077c426SJohn Baldwin 		return (EINVAL);
1420df8bae1dSRodney W. Grimes 
1421749474f2SPeter Wemm 	size = round_page(size);
1422010ba384SMark Johnston 	object = NULL;
14237077c426SJohn Baldwin 	writecounted = FALSE;
14247077c426SJohn Baldwin 
14257077c426SJohn Baldwin 	/*
14267077c426SJohn Baldwin 	 * Lookup/allocate object.
14277077c426SJohn Baldwin 	 */
14287077c426SJohn Baldwin 	switch (handle_type) {
14297077c426SJohn Baldwin 	case OBJT_DEVICE: {
14307077c426SJohn Baldwin 		struct cdevsw *dsw;
14317077c426SJohn Baldwin 		struct cdev *cdev;
14327077c426SJohn Baldwin 		int ref;
14337077c426SJohn Baldwin 
14347077c426SJohn Baldwin 		cdev = handle;
14357077c426SJohn Baldwin 		dsw = dev_refthread(cdev, &ref);
14367077c426SJohn Baldwin 		if (dsw == NULL)
14377077c426SJohn Baldwin 			return (ENXIO);
14387077c426SJohn Baldwin 		error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev,
14397077c426SJohn Baldwin 		    dsw, &foff, &object);
14407077c426SJohn Baldwin 		dev_relthread(cdev, ref);
14417077c426SJohn Baldwin 		break;
14427077c426SJohn Baldwin 	}
14437077c426SJohn Baldwin 	case OBJT_VNODE:
14447077c426SJohn Baldwin 		error = vm_mmap_vnode(td, size, prot, &maxprot, &flags,
14457077c426SJohn Baldwin 		    handle, &foff, &object, &writecounted);
14467077c426SJohn Baldwin 		break;
14477077c426SJohn Baldwin 	case OBJT_DEFAULT:
14487077c426SJohn Baldwin 		if (handle == NULL) {
14497077c426SJohn Baldwin 			error = 0;
14507077c426SJohn Baldwin 			break;
14517077c426SJohn Baldwin 		}
14527077c426SJohn Baldwin 		/* FALLTHROUGH */
14537077c426SJohn Baldwin 	default:
14547077c426SJohn Baldwin 		error = EINVAL;
14557077c426SJohn Baldwin 		break;
14567077c426SJohn Baldwin 	}
14577077c426SJohn Baldwin 	if (error)
14587077c426SJohn Baldwin 		return (error);
14597077c426SJohn Baldwin 
14607077c426SJohn Baldwin 	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
14617077c426SJohn Baldwin 	    foff, writecounted, td);
14627077c426SJohn Baldwin 	if (error != 0 && object != NULL) {
14637077c426SJohn Baldwin 		/*
14647077c426SJohn Baldwin 		 * If this mapping was accounted for in the vnode's
14657077c426SJohn Baldwin 		 * writecount, then undo that now.
14667077c426SJohn Baldwin 		 */
14677077c426SJohn Baldwin 		if (writecounted)
14687077c426SJohn Baldwin 			vnode_pager_release_writecount(object, 0, size);
14697077c426SJohn Baldwin 		vm_object_deallocate(object);
14707077c426SJohn Baldwin 	}
14717077c426SJohn Baldwin 	return (error);
14727077c426SJohn Baldwin }
14737077c426SJohn Baldwin 
14747077c426SJohn Baldwin /*
14757077c426SJohn Baldwin  * Internal version of mmap that maps a specific VM object into an
14767077c426SJohn Baldwin  * map.  Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap.
14777077c426SJohn Baldwin  */
14787077c426SJohn Baldwin int
14797077c426SJohn Baldwin vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
14807077c426SJohn Baldwin     vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff,
14817077c426SJohn Baldwin     boolean_t writecounted, struct thread *td)
14827077c426SJohn Baldwin {
14836a97a3f7SKonstantin Belousov 	boolean_t curmap, fitit;
14846a97a3f7SKonstantin Belousov 	vm_offset_t max_addr;
14857077c426SJohn Baldwin 	int docow, error, findspace, rv;
1486df8bae1dSRodney W. Grimes 
14876a97a3f7SKonstantin Belousov 	curmap = map == &td->td_proc->p_vmspace->vm_map;
14886a97a3f7SKonstantin Belousov 	if (curmap) {
14892554f86aSMateusz Guzik 		RACCT_PROC_LOCK(td->td_proc);
14902554f86aSMateusz Guzik 		if (map->size + size > lim_cur(td, RLIMIT_VMEM)) {
14912554f86aSMateusz Guzik 			RACCT_PROC_UNLOCK(td->td_proc);
1492070f64feSMatthew Dillon 			return (ENOMEM);
1493070f64feSMatthew Dillon 		}
1494a6492969SAlan Cox 		if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) {
14952554f86aSMateusz Guzik 			RACCT_PROC_UNLOCK(td->td_proc);
14961ba5ad42SEdward Tomasz Napierala 			return (ENOMEM);
14971ba5ad42SEdward Tomasz Napierala 		}
14987e19eda4SAndrey Zonov 		if (!old_mlock && map->flags & MAP_WIREFUTURE) {
14993ac7d297SAndrey Zonov 			if (ptoa(pmap_wired_count(map->pmap)) + size >
15002554f86aSMateusz Guzik 			    lim_cur(td, RLIMIT_MEMLOCK)) {
15017e19eda4SAndrey Zonov 				racct_set_force(td->td_proc, RACCT_VMEM,
15027e19eda4SAndrey Zonov 				    map->size);
15032554f86aSMateusz Guzik 				RACCT_PROC_UNLOCK(td->td_proc);
15047e19eda4SAndrey Zonov 				return (ENOMEM);
15057e19eda4SAndrey Zonov 			}
15067e19eda4SAndrey Zonov 			error = racct_set(td->td_proc, RACCT_MEMLOCK,
15073ac7d297SAndrey Zonov 			    ptoa(pmap_wired_count(map->pmap)) + size);
15087e19eda4SAndrey Zonov 			if (error != 0) {
15097e19eda4SAndrey Zonov 				racct_set_force(td->td_proc, RACCT_VMEM,
15107e19eda4SAndrey Zonov 				    map->size);
15112554f86aSMateusz Guzik 				RACCT_PROC_UNLOCK(td->td_proc);
15127e19eda4SAndrey Zonov 				return (error);
15137e19eda4SAndrey Zonov 			}
15147e19eda4SAndrey Zonov 		}
15152554f86aSMateusz Guzik 		RACCT_PROC_UNLOCK(td->td_proc);
1516a6492969SAlan Cox 	}
1517070f64feSMatthew Dillon 
1518df8bae1dSRodney W. Grimes 	/*
1519bc9ad247SDavid Greenman 	 * We currently can only deal with page aligned file offsets.
15207077c426SJohn Baldwin 	 * The mmap() system call already enforces this by subtracting
15217077c426SJohn Baldwin 	 * the page offset from the file offset, but checking here
15227077c426SJohn Baldwin 	 * catches errors in device drivers (e.g. d_single_mmap()
15237077c426SJohn Baldwin 	 * callbacks) and other internal mapping requests (such as in
15247077c426SJohn Baldwin 	 * exec).
1525bc9ad247SDavid Greenman 	 */
1526bc9ad247SDavid Greenman 	if (foff & PAGE_MASK)
1527bc9ad247SDavid Greenman 		return (EINVAL);
1528bc9ad247SDavid Greenman 
152906cb7259SDavid Greenman 	if ((flags & MAP_FIXED) == 0) {
153006cb7259SDavid Greenman 		fitit = TRUE;
153106cb7259SDavid Greenman 		*addr = round_page(*addr);
153206cb7259SDavid Greenman 	} else {
153306cb7259SDavid Greenman 		if (*addr != trunc_page(*addr))
153406cb7259SDavid Greenman 			return (EINVAL);
153506cb7259SDavid Greenman 		fitit = FALSE;
153606cb7259SDavid Greenman 	}
153784110e7eSKonstantin Belousov 
15385f55e841SDavid Greenman 	if (flags & MAP_ANON) {
15397077c426SJohn Baldwin 		if (object != NULL || foff != 0)
15407077c426SJohn Baldwin 			return (EINVAL);
1541c8daea13SAlexander Kabaev 		docow = 0;
154274ffb9afSAlan Cox 	} else if (flags & MAP_PREFAULT_READ)
154374ffb9afSAlan Cox 		docow = MAP_PREFAULT;
154474ffb9afSAlan Cox 	else
15454738fa09SAlan Cox 		docow = MAP_PREFAULT_PARTIAL;
1546df8bae1dSRodney W. Grimes 
15474f79d873SMatthew Dillon 	if ((flags & (MAP_ANON|MAP_SHARED)) == 0)
15484738fa09SAlan Cox 		docow |= MAP_COPY_ON_WRITE;
15494f79d873SMatthew Dillon 	if (flags & MAP_NOSYNC)
15504f79d873SMatthew Dillon 		docow |= MAP_DISABLE_SYNCER;
15519730a5daSPaul Saab 	if (flags & MAP_NOCORE)
15529730a5daSPaul Saab 		docow |= MAP_DISABLE_COREDUMP;
15538211bd45SKonstantin Belousov 	/* Shared memory is also shared with children. */
15548211bd45SKonstantin Belousov 	if (flags & MAP_SHARED)
15558211bd45SKonstantin Belousov 		docow |= MAP_INHERIT_SHARE;
155684110e7eSKonstantin Belousov 	if (writecounted)
155784110e7eSKonstantin Belousov 		docow |= MAP_VN_WRITECOUNT;
15584648ba0aSKonstantin Belousov 	if (flags & MAP_STACK) {
15594648ba0aSKonstantin Belousov 		if (object != NULL)
15604648ba0aSKonstantin Belousov 			return (EINVAL);
15614648ba0aSKonstantin Belousov 		docow |= MAP_STACK_GROWS_DOWN;
15624648ba0aSKonstantin Belousov 	}
156311c42bccSKonstantin Belousov 	if ((flags & MAP_EXCL) != 0)
156411c42bccSKonstantin Belousov 		docow |= MAP_CHECK_EXCL;
156519bd0d9cSKonstantin Belousov 	if ((flags & MAP_GUARD) != 0)
156619bd0d9cSKonstantin Belousov 		docow |= MAP_CREATE_GUARD;
15675850152dSJohn Dyson 
15684648ba0aSKonstantin Belousov 	if (fitit) {
15695aa60b6fSJohn Baldwin 		if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER)
15705aa60b6fSJohn Baldwin 			findspace = VMFS_SUPER_SPACE;
15715aa60b6fSJohn Baldwin 		else if ((flags & MAP_ALIGNMENT_MASK) != 0)
15725aa60b6fSJohn Baldwin 			findspace = VMFS_ALIGNED_SPACE(flags >>
15735aa60b6fSJohn Baldwin 			    MAP_ALIGNMENT_SHIFT);
15742267af78SJulian Elischer 		else
15755aa60b6fSJohn Baldwin 			findspace = VMFS_OPTIMAL_SPACE;
15766a97a3f7SKonstantin Belousov 		max_addr = 0;
1577edb572a3SJohn Baldwin #ifdef MAP_32BIT
15786a97a3f7SKonstantin Belousov 		if ((flags & MAP_32BIT) != 0)
15796a97a3f7SKonstantin Belousov 			max_addr = MAP_32BIT_MAX_ADDR;
1580edb572a3SJohn Baldwin #endif
15816a97a3f7SKonstantin Belousov 		if (curmap) {
15826a97a3f7SKonstantin Belousov 			rv = vm_map_find_min(map, object, foff, addr, size,
15836a97a3f7SKonstantin Belousov 			    round_page((vm_offset_t)td->td_proc->p_vmspace->
15846a97a3f7SKonstantin Belousov 			    vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr,
15856a97a3f7SKonstantin Belousov 			    findspace, prot, maxprot, docow);
15866a97a3f7SKonstantin Belousov 		} else {
15876a97a3f7SKonstantin Belousov 			rv = vm_map_find(map, object, foff, addr, size,
15886a97a3f7SKonstantin Belousov 			    max_addr, findspace, prot, maxprot, docow);
15896a97a3f7SKonstantin Belousov 		}
15904648ba0aSKonstantin Belousov 	} else {
1591b8ca4ef2SAlan Cox 		rv = vm_map_fixed(map, object, foff, *addr, size,
1592bd7e5f99SJohn Dyson 		    prot, maxprot, docow);
15934648ba0aSKonstantin Belousov 	}
1594bd7e5f99SJohn Dyson 
1595f9230ad6SAlan Cox 	if (rv == KERN_SUCCESS) {
15967fb0c17eSDavid Greenman 		/*
1597f9230ad6SAlan Cox 		 * If the process has requested that all future mappings
1598f9230ad6SAlan Cox 		 * be wired, then heed this.
1599f9230ad6SAlan Cox 		 */
160054a3a114SMark Johnston 		if ((map->flags & MAP_WIREFUTURE) != 0) {
160154a3a114SMark Johnston 			vm_map_lock(map);
160254a3a114SMark Johnston 			if ((map->flags & MAP_WIREFUTURE) != 0)
16038cd6a80dSMark Johnston 				(void)vm_map_wire_locked(map, *addr,
160454a3a114SMark Johnston 				    *addr + size, VM_MAP_WIRE_USER |
160554a3a114SMark Johnston 				    ((flags & MAP_STACK) ? VM_MAP_WIRE_HOLESOK :
160654a3a114SMark Johnston 				    VM_MAP_WIRE_NOHOLES));
160754a3a114SMark Johnston 			vm_map_unlock(map);
16081472f4f4SKonstantin Belousov 		}
1609df8bae1dSRodney W. Grimes 	}
16102e32165cSKonstantin Belousov 	return (vm_mmap_to_errno(rv));
16112e32165cSKonstantin Belousov }
16122e32165cSKonstantin Belousov 
1613f9230ad6SAlan Cox /*
1614f9230ad6SAlan Cox  * Translate a Mach VM return code to zero on success or the appropriate errno
1615f9230ad6SAlan Cox  * on failure.
1616f9230ad6SAlan Cox  */
16172e32165cSKonstantin Belousov int
16182e32165cSKonstantin Belousov vm_mmap_to_errno(int rv)
16192e32165cSKonstantin Belousov {
16202e32165cSKonstantin Belousov 
1621df8bae1dSRodney W. Grimes 	switch (rv) {
1622df8bae1dSRodney W. Grimes 	case KERN_SUCCESS:
1623df8bae1dSRodney W. Grimes 		return (0);
1624df8bae1dSRodney W. Grimes 	case KERN_INVALID_ADDRESS:
1625df8bae1dSRodney W. Grimes 	case KERN_NO_SPACE:
1626df8bae1dSRodney W. Grimes 		return (ENOMEM);
1627df8bae1dSRodney W. Grimes 	case KERN_PROTECTION_FAILURE:
1628df8bae1dSRodney W. Grimes 		return (EACCES);
1629df8bae1dSRodney W. Grimes 	default:
1630df8bae1dSRodney W. Grimes 		return (EINVAL);
1631df8bae1dSRodney W. Grimes 	}
1632df8bae1dSRodney W. Grimes }
1633