xref: /freebsd/sys/vm/vm_mmap.c (revision 1c5196c3edf5acd6c29c1990f4c44f285a804519)
160727d8bSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1988 University of Utah.
5df8bae1dSRodney W. Grimes  * Copyright (c) 1991, 1993
6df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
7df8bae1dSRodney W. Grimes  *
8df8bae1dSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
9df8bae1dSRodney W. Grimes  * the Systems Programming Group of the University of Utah Computer
10df8bae1dSRodney W. Grimes  * Science Department.
11df8bae1dSRodney W. Grimes  *
12df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
13df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
14df8bae1dSRodney W. Grimes  * are met:
15df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
17df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
18df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
19df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
20fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
21df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
22df8bae1dSRodney W. Grimes  *    without specific prior written permission.
23df8bae1dSRodney W. Grimes  *
24df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
35df8bae1dSRodney W. Grimes  *
36df8bae1dSRodney W. Grimes  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
37df8bae1dSRodney W. Grimes  *
38df8bae1dSRodney W. Grimes  *	@(#)vm_mmap.c	8.4 (Berkeley) 1/12/94
39df8bae1dSRodney W. Grimes  */
40df8bae1dSRodney W. Grimes 
41df8bae1dSRodney W. Grimes /*
42df8bae1dSRodney W. Grimes  * Mapped file (mmap) interface to VM
43df8bae1dSRodney W. Grimes  */
44df8bae1dSRodney W. Grimes 
45874651b1SDavid E. O'Brien #include <sys/cdefs.h>
46874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$");
47874651b1SDavid E. O'Brien 
485591b823SEivind Eklund #include "opt_compat.h"
4949874f6eSJoseph Koshy #include "opt_hwpmc_hooks.h"
503d653db0SAlan Cox #include "opt_vm.h"
51e9822d92SJoerg Wunsch 
52df8bae1dSRodney W. Grimes #include <sys/param.h>
53df8bae1dSRodney W. Grimes #include <sys/systm.h>
544a144410SRobert Watson #include <sys/capsicum.h>
55a9d2f8d8SRobert Watson #include <sys/kernel.h>
56fb919e4dSMark Murray #include <sys/lock.h>
5723955314SAlfred Perlstein #include <sys/mutex.h>
58d2d3e875SBruce Evans #include <sys/sysproto.h>
59df8bae1dSRodney W. Grimes #include <sys/filedesc.h>
60acd3428bSRobert Watson #include <sys/priv.h>
61df8bae1dSRodney W. Grimes #include <sys/proc.h>
6255648840SJohn Baldwin #include <sys/procctl.h>
631ba5ad42SEdward Tomasz Napierala #include <sys/racct.h>
64070f64feSMatthew Dillon #include <sys/resource.h>
65070f64feSMatthew Dillon #include <sys/resourcevar.h>
6689f6b863SAttilio Rao #include <sys/rwlock.h>
677e19eda4SAndrey Zonov #include <sys/sysctl.h>
68df8bae1dSRodney W. Grimes #include <sys/vnode.h>
693ac4d1efSBruce Evans #include <sys/fcntl.h>
70df8bae1dSRodney W. Grimes #include <sys/file.h>
71df8bae1dSRodney W. Grimes #include <sys/mman.h>
72b483c7f6SGuido van Rooij #include <sys/mount.h>
73df8bae1dSRodney W. Grimes #include <sys/conf.h>
744183b6b6SPeter Wemm #include <sys/stat.h>
7555648840SJohn Baldwin #include <sys/syscallsubr.h>
76497a8238SKonstantin Belousov #include <sys/sysent.h>
77efeaf95aSDavid Greenman #include <sys/vmmeter.h>
78df8bae1dSRodney W. Grimes 
7951d1f690SRobert Watson #include <security/audit/audit.h>
80aed55708SRobert Watson #include <security/mac/mac_framework.h>
81aed55708SRobert Watson 
82df8bae1dSRodney W. Grimes #include <vm/vm.h>
83efeaf95aSDavid Greenman #include <vm/vm_param.h>
84efeaf95aSDavid Greenman #include <vm/pmap.h>
85efeaf95aSDavid Greenman #include <vm/vm_map.h>
86efeaf95aSDavid Greenman #include <vm/vm_object.h>
871c7c3c6aSMatthew Dillon #include <vm/vm_page.h>
88df8bae1dSRodney W. Grimes #include <vm/vm_pager.h>
89b5e8ce9fSBruce Evans #include <vm/vm_pageout.h>
90efeaf95aSDavid Greenman #include <vm/vm_extern.h>
91867a482dSJohn Dyson #include <vm/vm_page.h>
9284110e7eSKonstantin Belousov #include <vm/vnode_pager.h>
93df8bae1dSRodney W. Grimes 
9449874f6eSJoseph Koshy #ifdef HWPMC_HOOKS
9549874f6eSJoseph Koshy #include <sys/pmckern.h>
9649874f6eSJoseph Koshy #endif
9749874f6eSJoseph Koshy 
987e19eda4SAndrey Zonov int old_mlock = 0;
99af3b2549SHans Petter Selasky SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0,
1007e19eda4SAndrey Zonov     "Do not apply RLIMIT_MEMLOCK on mlockall");
1017e19eda4SAndrey Zonov 
102edb572a3SJohn Baldwin #ifdef MAP_32BIT
103edb572a3SJohn Baldwin #define	MAP_32BIT_MAX_ADDR	((vm_offset_t)1 << 31)
104d2d3e875SBruce Evans #endif
1050d94caffSDavid Greenman 
106edb572a3SJohn Baldwin #ifndef _SYS_SYSPROTO_H_
107edb572a3SJohn Baldwin struct sbrk_args {
108edb572a3SJohn Baldwin 	int incr;
109edb572a3SJohn Baldwin };
110edb572a3SJohn Baldwin #endif
111edb572a3SJohn Baldwin 
112df8bae1dSRodney W. Grimes int
11304e89ffbSKonstantin Belousov sys_sbrk(struct thread *td, struct sbrk_args *uap)
114df8bae1dSRodney W. Grimes {
115df8bae1dSRodney W. Grimes 	/* Not yet implemented */
116df8bae1dSRodney W. Grimes 	return (EOPNOTSUPP);
117df8bae1dSRodney W. Grimes }
118df8bae1dSRodney W. Grimes 
119d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
120df8bae1dSRodney W. Grimes struct sstk_args {
121df8bae1dSRodney W. Grimes 	int incr;
122df8bae1dSRodney W. Grimes };
123d2d3e875SBruce Evans #endif
1240d94caffSDavid Greenman 
125df8bae1dSRodney W. Grimes int
12604e89ffbSKonstantin Belousov sys_sstk(struct thread *td, struct sstk_args *uap)
127df8bae1dSRodney W. Grimes {
128df8bae1dSRodney W. Grimes 	/* Not yet implemented */
129df8bae1dSRodney W. Grimes 	return (EOPNOTSUPP);
130df8bae1dSRodney W. Grimes }
131df8bae1dSRodney W. Grimes 
1321930e303SPoul-Henning Kamp #if defined(COMPAT_43)
133d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
134df8bae1dSRodney W. Grimes struct getpagesize_args {
135df8bae1dSRodney W. Grimes 	int dummy;
136df8bae1dSRodney W. Grimes };
137d2d3e875SBruce Evans #endif
1380d94caffSDavid Greenman 
139df8bae1dSRodney W. Grimes int
14004e89ffbSKonstantin Belousov ogetpagesize(struct thread *td, struct getpagesize_args *uap)
141df8bae1dSRodney W. Grimes {
14204e89ffbSKonstantin Belousov 
143b40ce416SJulian Elischer 	td->td_retval[0] = PAGE_SIZE;
144df8bae1dSRodney W. Grimes 	return (0);
145df8bae1dSRodney W. Grimes }
1461930e303SPoul-Henning Kamp #endif				/* COMPAT_43 */
147df8bae1dSRodney W. Grimes 
14854f42e4bSPeter Wemm 
14954f42e4bSPeter Wemm /*
15054f42e4bSPeter Wemm  * Memory Map (mmap) system call.  Note that the file offset
15154f42e4bSPeter Wemm  * and address are allowed to be NOT page aligned, though if
15254f42e4bSPeter Wemm  * the MAP_FIXED flag it set, both must have the same remainder
15354f42e4bSPeter Wemm  * modulo the PAGE_SIZE (POSIX 1003.1b).  If the address is not
15454f42e4bSPeter Wemm  * page-aligned, the actual mapping starts at trunc_page(addr)
15554f42e4bSPeter Wemm  * and the return value is adjusted up by the page offset.
156b4309055SMatthew Dillon  *
157b4309055SMatthew Dillon  * Generally speaking, only character devices which are themselves
158b4309055SMatthew Dillon  * memory-based, such as a video framebuffer, can be mmap'd.  Otherwise
159b4309055SMatthew Dillon  * there would be no cache coherency between a descriptor and a VM mapping
160b4309055SMatthew Dillon  * both to the same character device.
16154f42e4bSPeter Wemm  */
162d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
163df8bae1dSRodney W. Grimes struct mmap_args {
164651bb817SAlexander Langer 	void *addr;
165df8bae1dSRodney W. Grimes 	size_t len;
166df8bae1dSRodney W. Grimes 	int prot;
167df8bae1dSRodney W. Grimes 	int flags;
168df8bae1dSRodney W. Grimes 	int fd;
169df8bae1dSRodney W. Grimes 	long pad;
170df8bae1dSRodney W. Grimes 	off_t pos;
171df8bae1dSRodney W. Grimes };
172d2d3e875SBruce Evans #endif
173df8bae1dSRodney W. Grimes 
174df8bae1dSRodney W. Grimes int
17569cdfcefSEdward Tomasz Napierala sys_mmap(struct thread *td, struct mmap_args *uap)
17669cdfcefSEdward Tomasz Napierala {
17769cdfcefSEdward Tomasz Napierala 
178496ab053SKonstantin Belousov 	return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot,
179496ab053SKonstantin Belousov 	    uap->flags, uap->fd, uap->pos));
18069cdfcefSEdward Tomasz Napierala }
18169cdfcefSEdward Tomasz Napierala 
18269cdfcefSEdward Tomasz Napierala int
183496ab053SKonstantin Belousov kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags,
184496ab053SKonstantin Belousov     int fd, off_t pos)
185df8bae1dSRodney W. Grimes {
186496ab053SKonstantin Belousov 	struct vmspace *vms;
187c8daea13SAlexander Kabaev 	struct file *fp;
188496ab053SKonstantin Belousov 	vm_offset_t addr;
18969cdfcefSEdward Tomasz Napierala 	vm_size_t pageoff;
1907077c426SJohn Baldwin 	vm_prot_t cap_maxprot;
19169cdfcefSEdward Tomasz Napierala 	int align, error;
192a9d2f8d8SRobert Watson 	cap_rights_t rights;
193df8bae1dSRodney W. Grimes 
194496ab053SKonstantin Belousov 	vms = td->td_proc->p_vmspace;
195426da3bcSAlfred Perlstein 	fp = NULL;
19669cdfcefSEdward Tomasz Napierala 	AUDIT_ARG_FD(fd);
197496ab053SKonstantin Belousov 	addr = addr0;
19827bfa958SSimon L. B. Nielsen 
1997707ccabSKonstantin Belousov 	/*
2005817298fSJohn Baldwin 	 * Ignore old flags that used to be defined but did not do anything.
2015817298fSJohn Baldwin 	 */
2025817298fSJohn Baldwin 	flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040);
2035817298fSJohn Baldwin 
2045817298fSJohn Baldwin 	/*
2057707ccabSKonstantin Belousov 	 * Enforce the constraints.
2067707ccabSKonstantin Belousov 	 * Mapping of length 0 is only allowed for old binaries.
2077707ccabSKonstantin Belousov 	 * Anonymous mapping shall specify -1 as filedescriptor and
2087707ccabSKonstantin Belousov 	 * zero position for new code. Be nice to ancient a.out
2097707ccabSKonstantin Belousov 	 * binaries and correct pos for anonymous mapping, since old
2107707ccabSKonstantin Belousov 	 * ld.so sometimes issues anonymous map requests with non-zero
2117707ccabSKonstantin Belousov 	 * pos.
2127707ccabSKonstantin Belousov 	 */
2137707ccabSKonstantin Belousov 	if (!SV_CURPROC_FLAG(SV_AOUT)) {
21469cdfcefSEdward Tomasz Napierala 		if ((size == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) ||
21569cdfcefSEdward Tomasz Napierala 		    ((flags & MAP_ANON) != 0 && (fd != -1 || pos != 0)))
216df8bae1dSRodney W. Grimes 			return (EINVAL);
2177707ccabSKonstantin Belousov 	} else {
2187707ccabSKonstantin Belousov 		if ((flags & MAP_ANON) != 0)
2197707ccabSKonstantin Belousov 			pos = 0;
2207707ccabSKonstantin Belousov 	}
2219154ee6aSPeter Wemm 
2222267af78SJulian Elischer 	if (flags & MAP_STACK) {
22369cdfcefSEdward Tomasz Napierala 		if ((fd != -1) ||
2242267af78SJulian Elischer 		    ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE)))
2252267af78SJulian Elischer 			return (EINVAL);
2262267af78SJulian Elischer 		flags |= MAP_ANON;
2272267af78SJulian Elischer 		pos = 0;
2282907af2aSJulian Elischer 	}
2295817298fSJohn Baldwin 	if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE |
2305817298fSJohn Baldwin 	    MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE |
23119bd0d9cSKonstantin Belousov 	    MAP_PREFAULT_READ | MAP_GUARD |
2325fd3f8b3SJohn Baldwin #ifdef MAP_32BIT
2335fd3f8b3SJohn Baldwin 	    MAP_32BIT |
2345fd3f8b3SJohn Baldwin #endif
2355fd3f8b3SJohn Baldwin 	    MAP_ALIGNMENT_MASK)) != 0)
2365fd3f8b3SJohn Baldwin 		return (EINVAL);
23711c42bccSKonstantin Belousov 	if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL)
23811c42bccSKonstantin Belousov 		return (EINVAL);
23910204535SKonstantin Belousov 	if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED | MAP_PRIVATE))
2405fd3f8b3SJohn Baldwin 		return (EINVAL);
2415fd3f8b3SJohn Baldwin 	if (prot != PROT_NONE &&
2425fd3f8b3SJohn Baldwin 	    (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0)
2435fd3f8b3SJohn Baldwin 		return (EINVAL);
24419bd0d9cSKonstantin Belousov 	if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 ||
24519bd0d9cSKonstantin Belousov 	    pos != 0 || (flags & (MAP_SHARED | MAP_PRIVATE | MAP_PREFAULT |
24619bd0d9cSKonstantin Belousov 	    MAP_PREFAULT_READ | MAP_ANON | MAP_STACK)) != 0))
24719bd0d9cSKonstantin Belousov 		return (EINVAL);
2482907af2aSJulian Elischer 
2499154ee6aSPeter Wemm 	/*
25054f42e4bSPeter Wemm 	 * Align the file position to a page boundary,
25154f42e4bSPeter Wemm 	 * and save its page offset component.
2529154ee6aSPeter Wemm 	 */
25354f42e4bSPeter Wemm 	pageoff = (pos & PAGE_MASK);
25454f42e4bSPeter Wemm 	pos -= pageoff;
25554f42e4bSPeter Wemm 
25654f42e4bSPeter Wemm 	/* Adjust size for rounding (on both ends). */
25754f42e4bSPeter Wemm 	size += pageoff;			/* low end... */
25854f42e4bSPeter Wemm 	size = (vm_size_t) round_page(size);	/* hi end */
2599154ee6aSPeter Wemm 
2605aa60b6fSJohn Baldwin 	/* Ensure alignment is at least a page and fits in a pointer. */
2615aa60b6fSJohn Baldwin 	align = flags & MAP_ALIGNMENT_MASK;
2625aa60b6fSJohn Baldwin 	if (align != 0 && align != MAP_ALIGNED_SUPER &&
2635aa60b6fSJohn Baldwin 	    (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY ||
2645aa60b6fSJohn Baldwin 	    align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT))
2655aa60b6fSJohn Baldwin 		return (EINVAL);
2665aa60b6fSJohn Baldwin 
267df8bae1dSRodney W. Grimes 	/*
2680d94caffSDavid Greenman 	 * Check for illegal addresses.  Watch out for address wrap... Note
2690d94caffSDavid Greenman 	 * that VM_*_ADDRESS are not constants due to casts (argh).
270df8bae1dSRodney W. Grimes 	 */
271df8bae1dSRodney W. Grimes 	if (flags & MAP_FIXED) {
27254f42e4bSPeter Wemm 		/*
27354f42e4bSPeter Wemm 		 * The specified address must have the same remainder
27454f42e4bSPeter Wemm 		 * as the file offset taken modulo PAGE_SIZE, so it
27554f42e4bSPeter Wemm 		 * should be aligned after adjustment by pageoff.
27654f42e4bSPeter Wemm 		 */
27754f42e4bSPeter Wemm 		addr -= pageoff;
27854f42e4bSPeter Wemm 		if (addr & PAGE_MASK)
27954f42e4bSPeter Wemm 			return (EINVAL);
28027bfa958SSimon L. B. Nielsen 
28154f42e4bSPeter Wemm 		/* Address range must be all in user VM space. */
28205ba50f5SJake Burkholder 		if (addr < vm_map_min(&vms->vm_map) ||
28305ba50f5SJake Burkholder 		    addr + size > vm_map_max(&vms->vm_map))
284df8bae1dSRodney W. Grimes 			return (EINVAL);
285bbc0ec52SDavid Greenman 		if (addr + size < addr)
286df8bae1dSRodney W. Grimes 			return (EINVAL);
287edb572a3SJohn Baldwin #ifdef MAP_32BIT
288edb572a3SJohn Baldwin 		if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR)
289edb572a3SJohn Baldwin 			return (EINVAL);
290edb572a3SJohn Baldwin 	} else if (flags & MAP_32BIT) {
291edb572a3SJohn Baldwin 		/*
292edb572a3SJohn Baldwin 		 * For MAP_32BIT, override the hint if it is too high and
293edb572a3SJohn Baldwin 		 * do not bother moving the mapping past the heap (since
294edb572a3SJohn Baldwin 		 * the heap is usually above 2GB).
295edb572a3SJohn Baldwin 		 */
296edb572a3SJohn Baldwin 		if (addr + size > MAP_32BIT_MAX_ADDR)
297edb572a3SJohn Baldwin 			addr = 0;
298edb572a3SJohn Baldwin #endif
29991d5354aSJohn Baldwin 	} else {
300df8bae1dSRodney W. Grimes 		/*
30154f42e4bSPeter Wemm 		 * XXX for non-fixed mappings where no hint is provided or
30254f42e4bSPeter Wemm 		 * the hint would fall in the potential heap space,
30354f42e4bSPeter Wemm 		 * place it after the end of the largest possible heap.
304df8bae1dSRodney W. Grimes 		 *
30554f42e4bSPeter Wemm 		 * There should really be a pmap call to determine a reasonable
30654f42e4bSPeter Wemm 		 * location.
307df8bae1dSRodney W. Grimes 		 */
30891d5354aSJohn Baldwin 		if (addr == 0 ||
3091f6889a1SMatthew Dillon 		    (addr >= round_page((vm_offset_t)vms->vm_taddr) &&
310c460ac3aSPeter Wemm 		    addr < round_page((vm_offset_t)vms->vm_daddr +
311cd336badSMateusz Guzik 		    lim_max(td, RLIMIT_DATA))))
312c460ac3aSPeter Wemm 			addr = round_page((vm_offset_t)vms->vm_daddr +
313cd336badSMateusz Guzik 			    lim_max(td, RLIMIT_DATA));
31491d5354aSJohn Baldwin 	}
3157077c426SJohn Baldwin 	if (size == 0) {
3167077c426SJohn Baldwin 		/*
3177077c426SJohn Baldwin 		 * Return success without mapping anything for old
3187077c426SJohn Baldwin 		 * binaries that request a page-aligned mapping of
3197077c426SJohn Baldwin 		 * length 0.  For modern binaries, this function
3207077c426SJohn Baldwin 		 * returns an error earlier.
3217077c426SJohn Baldwin 		 */
3227077c426SJohn Baldwin 		error = 0;
32319bd0d9cSKonstantin Belousov 	} else if ((flags & MAP_GUARD) != 0) {
32419bd0d9cSKonstantin Belousov 		error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE,
32519bd0d9cSKonstantin Belousov 		    VM_PROT_NONE, flags, NULL, pos, FALSE, td);
32619bd0d9cSKonstantin Belousov 	} else if ((flags & MAP_ANON) != 0) {
327df8bae1dSRodney W. Grimes 		/*
328df8bae1dSRodney W. Grimes 		 * Mapping blank space is trivial.
3297077c426SJohn Baldwin 		 *
3307077c426SJohn Baldwin 		 * This relies on VM_PROT_* matching PROT_*.
331df8bae1dSRodney W. Grimes 		 */
3327077c426SJohn Baldwin 		error = vm_mmap_object(&vms->vm_map, &addr, size, prot,
3337077c426SJohn Baldwin 		    VM_PROT_ALL, flags, NULL, pos, FALSE, td);
33430d4dd7eSAlexander Kabaev 	} else {
335df8bae1dSRodney W. Grimes 		/*
336a9d2f8d8SRobert Watson 		 * Mapping file, get fp for validation and don't let the
337a9d2f8d8SRobert Watson 		 * descriptor disappear on us if we block. Check capability
338a9d2f8d8SRobert Watson 		 * rights, but also return the maximum rights to be combined
339a9d2f8d8SRobert Watson 		 * with maxprot later.
340df8bae1dSRodney W. Grimes 		 */
3417008be5bSPawel Jakub Dawidek 		cap_rights_init(&rights, CAP_MMAP);
342a9d2f8d8SRobert Watson 		if (prot & PROT_READ)
3437008be5bSPawel Jakub Dawidek 			cap_rights_set(&rights, CAP_MMAP_R);
344a9d2f8d8SRobert Watson 		if ((flags & MAP_SHARED) != 0) {
345a9d2f8d8SRobert Watson 			if (prot & PROT_WRITE)
3467008be5bSPawel Jakub Dawidek 				cap_rights_set(&rights, CAP_MMAP_W);
347a9d2f8d8SRobert Watson 		}
348a9d2f8d8SRobert Watson 		if (prot & PROT_EXEC)
3497008be5bSPawel Jakub Dawidek 			cap_rights_set(&rights, CAP_MMAP_X);
35069cdfcefSEdward Tomasz Napierala 		error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp);
3517008be5bSPawel Jakub Dawidek 		if (error != 0)
352426da3bcSAlfred Perlstein 			goto done;
35310204535SKonstantin Belousov 		if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 &&
35410204535SKonstantin Belousov 		    td->td_proc->p_osrel >= P_OSREL_MAP_FSTRICT) {
35510204535SKonstantin Belousov 			error = EINVAL;
35610204535SKonstantin Belousov 			goto done;
35710204535SKonstantin Belousov 		}
3585fd3f8b3SJohn Baldwin 
3595fd3f8b3SJohn Baldwin 		/* This relies on VM_PROT_* matching PROT_*. */
3607077c426SJohn Baldwin 		error = fo_mmap(fp, &vms->vm_map, &addr, size, prot,
3617077c426SJohn Baldwin 		    cap_maxprot, flags, pos, td);
36249874f6eSJoseph Koshy 	}
3637077c426SJohn Baldwin 
364df8bae1dSRodney W. Grimes 	if (error == 0)
365b40ce416SJulian Elischer 		td->td_retval[0] = (register_t) (addr + pageoff);
366279d7226SMatthew Dillon done:
367279d7226SMatthew Dillon 	if (fp)
368b40ce416SJulian Elischer 		fdrop(fp, td);
369f6b5b182SJeff Roberson 
370df8bae1dSRodney W. Grimes 	return (error);
371df8bae1dSRodney W. Grimes }
372df8bae1dSRodney W. Grimes 
3730538aafcSKonstantin Belousov #if defined(COMPAT_FREEBSD6)
374c2815ad5SPeter Wemm int
375c2815ad5SPeter Wemm freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap)
376c2815ad5SPeter Wemm {
377c2815ad5SPeter Wemm 
378496ab053SKonstantin Belousov 	return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot,
379496ab053SKonstantin Belousov 	    uap->flags, uap->fd, uap->pos));
380c2815ad5SPeter Wemm }
3810538aafcSKonstantin Belousov #endif
382c2815ad5SPeter Wemm 
38305f0fdd2SPoul-Henning Kamp #ifdef COMPAT_43
384d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
38505f0fdd2SPoul-Henning Kamp struct ommap_args {
38605f0fdd2SPoul-Henning Kamp 	caddr_t addr;
38705f0fdd2SPoul-Henning Kamp 	int len;
38805f0fdd2SPoul-Henning Kamp 	int prot;
38905f0fdd2SPoul-Henning Kamp 	int flags;
39005f0fdd2SPoul-Henning Kamp 	int fd;
39105f0fdd2SPoul-Henning Kamp 	long pos;
39205f0fdd2SPoul-Henning Kamp };
393d2d3e875SBruce Evans #endif
39405f0fdd2SPoul-Henning Kamp int
39569cdfcefSEdward Tomasz Napierala ommap(struct thread *td, struct ommap_args *uap)
39605f0fdd2SPoul-Henning Kamp {
39705f0fdd2SPoul-Henning Kamp 	static const char cvtbsdprot[8] = {
39805f0fdd2SPoul-Henning Kamp 		0,
39905f0fdd2SPoul-Henning Kamp 		PROT_EXEC,
40005f0fdd2SPoul-Henning Kamp 		PROT_WRITE,
40105f0fdd2SPoul-Henning Kamp 		PROT_EXEC | PROT_WRITE,
40205f0fdd2SPoul-Henning Kamp 		PROT_READ,
40305f0fdd2SPoul-Henning Kamp 		PROT_EXEC | PROT_READ,
40405f0fdd2SPoul-Henning Kamp 		PROT_WRITE | PROT_READ,
40505f0fdd2SPoul-Henning Kamp 		PROT_EXEC | PROT_WRITE | PROT_READ,
40605f0fdd2SPoul-Henning Kamp 	};
40769cdfcefSEdward Tomasz Napierala 	int flags, prot;
4080d94caffSDavid Greenman 
40905f0fdd2SPoul-Henning Kamp #define	OMAP_ANON	0x0002
41005f0fdd2SPoul-Henning Kamp #define	OMAP_COPY	0x0020
41105f0fdd2SPoul-Henning Kamp #define	OMAP_SHARED	0x0010
41205f0fdd2SPoul-Henning Kamp #define	OMAP_FIXED	0x0100
41305f0fdd2SPoul-Henning Kamp 
41469cdfcefSEdward Tomasz Napierala 	prot = cvtbsdprot[uap->prot & 0x7];
415ee4116b8SKonstantin Belousov #ifdef COMPAT_FREEBSD32
416e7d939bdSMarcel Moolenaar #if defined(__amd64__)
417ee4116b8SKonstantin Belousov 	if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) &&
41869cdfcefSEdward Tomasz Napierala 	    prot != 0)
41969cdfcefSEdward Tomasz Napierala 		prot |= PROT_EXEC;
420ee4116b8SKonstantin Belousov #endif
421ee4116b8SKonstantin Belousov #endif
42269cdfcefSEdward Tomasz Napierala 	flags = 0;
42305f0fdd2SPoul-Henning Kamp 	if (uap->flags & OMAP_ANON)
42469cdfcefSEdward Tomasz Napierala 		flags |= MAP_ANON;
42505f0fdd2SPoul-Henning Kamp 	if (uap->flags & OMAP_COPY)
42669cdfcefSEdward Tomasz Napierala 		flags |= MAP_COPY;
42705f0fdd2SPoul-Henning Kamp 	if (uap->flags & OMAP_SHARED)
42869cdfcefSEdward Tomasz Napierala 		flags |= MAP_SHARED;
42905f0fdd2SPoul-Henning Kamp 	else
43069cdfcefSEdward Tomasz Napierala 		flags |= MAP_PRIVATE;
43105f0fdd2SPoul-Henning Kamp 	if (uap->flags & OMAP_FIXED)
43269cdfcefSEdward Tomasz Napierala 		flags |= MAP_FIXED;
433496ab053SKonstantin Belousov 	return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, flags,
434496ab053SKonstantin Belousov 	    uap->fd, uap->pos));
43505f0fdd2SPoul-Henning Kamp }
43605f0fdd2SPoul-Henning Kamp #endif				/* COMPAT_43 */
43705f0fdd2SPoul-Henning Kamp 
43805f0fdd2SPoul-Henning Kamp 
439d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
440df8bae1dSRodney W. Grimes struct msync_args {
441651bb817SAlexander Langer 	void *addr;
442c899450bSPeter Wemm 	size_t len;
443e6c6af11SDavid Greenman 	int flags;
444df8bae1dSRodney W. Grimes };
445d2d3e875SBruce Evans #endif
446df8bae1dSRodney W. Grimes int
44769cdfcefSEdward Tomasz Napierala sys_msync(struct thread *td, struct msync_args *uap)
448df8bae1dSRodney W. Grimes {
44969cdfcefSEdward Tomasz Napierala 
450496ab053SKonstantin Belousov 	return (kern_msync(td, (uintptr_t)uap->addr, uap->len, uap->flags));
45169cdfcefSEdward Tomasz Napierala }
45269cdfcefSEdward Tomasz Napierala 
45369cdfcefSEdward Tomasz Napierala int
454496ab053SKonstantin Belousov kern_msync(struct thread *td, uintptr_t addr0, size_t size, int flags)
45569cdfcefSEdward Tomasz Napierala {
456496ab053SKonstantin Belousov 	vm_offset_t addr;
45769cdfcefSEdward Tomasz Napierala 	vm_size_t pageoff;
458df8bae1dSRodney W. Grimes 	vm_map_t map;
459df8bae1dSRodney W. Grimes 	int rv;
460df8bae1dSRodney W. Grimes 
461496ab053SKonstantin Belousov 	addr = addr0;
462dabee6feSPeter Wemm 	pageoff = (addr & PAGE_MASK);
463dabee6feSPeter Wemm 	addr -= pageoff;
464dabee6feSPeter Wemm 	size += pageoff;
465dabee6feSPeter Wemm 	size = (vm_size_t) round_page(size);
4669154ee6aSPeter Wemm 	if (addr + size < addr)
467dabee6feSPeter Wemm 		return (EINVAL);
468dabee6feSPeter Wemm 
469dabee6feSPeter Wemm 	if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE))
4701e62bc63SDavid Greenman 		return (EINVAL);
4711e62bc63SDavid Greenman 
472b40ce416SJulian Elischer 	map = &td->td_proc->p_vmspace->vm_map;
4739154ee6aSPeter Wemm 
474df8bae1dSRodney W. Grimes 	/*
475df8bae1dSRodney W. Grimes 	 * Clean the pages and interpret the return value.
476df8bae1dSRodney W. Grimes 	 */
477950f8459SAlan Cox 	rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0,
478e6c6af11SDavid Greenman 	    (flags & MS_INVALIDATE) != 0);
479df8bae1dSRodney W. Grimes 	switch (rv) {
480df8bae1dSRodney W. Grimes 	case KERN_SUCCESS:
481d2c60af8SMatthew Dillon 		return (0);
482df8bae1dSRodney W. Grimes 	case KERN_INVALID_ADDRESS:
483e103f5b1SPeter Holm 		return (ENOMEM);
484b7b7cd44SAlan Cox 	case KERN_INVALID_ARGUMENT:
485b7b7cd44SAlan Cox 		return (EBUSY);
486126d6082SKonstantin Belousov 	case KERN_FAILURE:
487126d6082SKonstantin Belousov 		return (EIO);
488df8bae1dSRodney W. Grimes 	default:
489df8bae1dSRodney W. Grimes 		return (EINVAL);
490df8bae1dSRodney W. Grimes 	}
491df8bae1dSRodney W. Grimes }
492df8bae1dSRodney W. Grimes 
493d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
494df8bae1dSRodney W. Grimes struct munmap_args {
495651bb817SAlexander Langer 	void *addr;
4969154ee6aSPeter Wemm 	size_t len;
497df8bae1dSRodney W. Grimes };
498d2d3e875SBruce Evans #endif
499df8bae1dSRodney W. Grimes int
50069cdfcefSEdward Tomasz Napierala sys_munmap(struct thread *td, struct munmap_args *uap)
50169cdfcefSEdward Tomasz Napierala {
50269cdfcefSEdward Tomasz Napierala 
503496ab053SKonstantin Belousov 	return (kern_munmap(td, (uintptr_t)uap->addr, uap->len));
50469cdfcefSEdward Tomasz Napierala }
50569cdfcefSEdward Tomasz Napierala 
50669cdfcefSEdward Tomasz Napierala int
507496ab053SKonstantin Belousov kern_munmap(struct thread *td, uintptr_t addr0, size_t size)
508df8bae1dSRodney W. Grimes {
50949874f6eSJoseph Koshy #ifdef HWPMC_HOOKS
51049874f6eSJoseph Koshy 	struct pmckern_map_out pkm;
51149874f6eSJoseph Koshy 	vm_map_entry_t entry;
512736ff8c3SMateusz Guzik 	bool pmc_handled;
51349874f6eSJoseph Koshy #endif
514496ab053SKonstantin Belousov 	vm_offset_t addr;
51569cdfcefSEdward Tomasz Napierala 	vm_size_t pageoff;
516df8bae1dSRodney W. Grimes 	vm_map_t map;
517df8bae1dSRodney W. Grimes 
518d8834602SAlan Cox 	if (size == 0)
519d8834602SAlan Cox 		return (EINVAL);
520dabee6feSPeter Wemm 
521496ab053SKonstantin Belousov 	addr = addr0;
522dabee6feSPeter Wemm 	pageoff = (addr & PAGE_MASK);
523dabee6feSPeter Wemm 	addr -= pageoff;
524dabee6feSPeter Wemm 	size += pageoff;
525dabee6feSPeter Wemm 	size = (vm_size_t) round_page(size);
5269154ee6aSPeter Wemm 	if (addr + size < addr)
527df8bae1dSRodney W. Grimes 		return (EINVAL);
5289154ee6aSPeter Wemm 
529df8bae1dSRodney W. Grimes 	/*
53005ba50f5SJake Burkholder 	 * Check for illegal addresses.  Watch out for address wrap...
531df8bae1dSRodney W. Grimes 	 */
532b40ce416SJulian Elischer 	map = &td->td_proc->p_vmspace->vm_map;
53305ba50f5SJake Burkholder 	if (addr < vm_map_min(map) || addr + size > vm_map_max(map))
53405ba50f5SJake Burkholder 		return (EINVAL);
535d8834602SAlan Cox 	vm_map_lock(map);
53649874f6eSJoseph Koshy #ifdef HWPMC_HOOKS
537736ff8c3SMateusz Guzik 	pmc_handled = false;
538736ff8c3SMateusz Guzik 	if (PMC_HOOK_INSTALLED(PMC_FN_MUNMAP)) {
539736ff8c3SMateusz Guzik 		pmc_handled = true;
54049874f6eSJoseph Koshy 		/*
54149874f6eSJoseph Koshy 		 * Inform hwpmc if the address range being unmapped contains
54249874f6eSJoseph Koshy 		 * an executable region.
54349874f6eSJoseph Koshy 		 */
5440d419640SRyan Stone 		pkm.pm_address = (uintptr_t) NULL;
54549874f6eSJoseph Koshy 		if (vm_map_lookup_entry(map, addr, &entry)) {
546*1c5196c3SKonstantin Belousov 			for (; entry->start < addr + size;
54749874f6eSJoseph Koshy 			    entry = entry->next) {
54849874f6eSJoseph Koshy 				if (vm_map_check_protection(map, entry->start,
54949874f6eSJoseph Koshy 					entry->end, VM_PROT_EXECUTE) == TRUE) {
55049874f6eSJoseph Koshy 					pkm.pm_address = (uintptr_t) addr;
55149874f6eSJoseph Koshy 					pkm.pm_size = (size_t) size;
55249874f6eSJoseph Koshy 					break;
55349874f6eSJoseph Koshy 				}
55449874f6eSJoseph Koshy 			}
55549874f6eSJoseph Koshy 		}
556736ff8c3SMateusz Guzik 	}
55749874f6eSJoseph Koshy #endif
558655c3490SKonstantin Belousov 	vm_map_delete(map, addr, addr + size);
5590d419640SRyan Stone 
5600d419640SRyan Stone #ifdef HWPMC_HOOKS
561736ff8c3SMateusz Guzik 	if (__predict_false(pmc_handled)) {
5620d419640SRyan Stone 		/* downgrade the lock to prevent a LOR with the pmc-sx lock */
5630d419640SRyan Stone 		vm_map_lock_downgrade(map);
564d473d3a1SRyan Stone 		if (pkm.pm_address != (uintptr_t) NULL)
5650d419640SRyan Stone 			PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm);
5660d419640SRyan Stone 		vm_map_unlock_read(map);
567736ff8c3SMateusz Guzik 	} else
5680d419640SRyan Stone #endif
569736ff8c3SMateusz Guzik 		vm_map_unlock(map);
570736ff8c3SMateusz Guzik 
5710d419640SRyan Stone 	/* vm_map_delete returns nothing but KERN_SUCCESS anyway */
572df8bae1dSRodney W. Grimes 	return (0);
573df8bae1dSRodney W. Grimes }
574df8bae1dSRodney W. Grimes 
575d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
576df8bae1dSRodney W. Grimes struct mprotect_args {
577651bb817SAlexander Langer 	const void *addr;
5789154ee6aSPeter Wemm 	size_t len;
579df8bae1dSRodney W. Grimes 	int prot;
580df8bae1dSRodney W. Grimes };
581d2d3e875SBruce Evans #endif
582df8bae1dSRodney W. Grimes int
58369cdfcefSEdward Tomasz Napierala sys_mprotect(struct thread *td, struct mprotect_args *uap)
584df8bae1dSRodney W. Grimes {
585df8bae1dSRodney W. Grimes 
586496ab053SKonstantin Belousov 	return (kern_mprotect(td, (uintptr_t)uap->addr, uap->len, uap->prot));
58769cdfcefSEdward Tomasz Napierala }
588df8bae1dSRodney W. Grimes 
58969cdfcefSEdward Tomasz Napierala int
590496ab053SKonstantin Belousov kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot)
59169cdfcefSEdward Tomasz Napierala {
592496ab053SKonstantin Belousov 	vm_offset_t addr;
59369cdfcefSEdward Tomasz Napierala 	vm_size_t pageoff;
59469cdfcefSEdward Tomasz Napierala 
595496ab053SKonstantin Belousov 	addr = addr0;
59669cdfcefSEdward Tomasz Napierala 	prot = (prot & VM_PROT_ALL);
597dabee6feSPeter Wemm 	pageoff = (addr & PAGE_MASK);
598dabee6feSPeter Wemm 	addr -= pageoff;
599dabee6feSPeter Wemm 	size += pageoff;
600dabee6feSPeter Wemm 	size = (vm_size_t) round_page(size);
6019154ee6aSPeter Wemm 	if (addr + size < addr)
602dabee6feSPeter Wemm 		return (EINVAL);
603dabee6feSPeter Wemm 
60443285049SAlan Cox 	switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr,
60543285049SAlan Cox 	    addr + size, prot, FALSE)) {
606df8bae1dSRodney W. Grimes 	case KERN_SUCCESS:
607df8bae1dSRodney W. Grimes 		return (0);
608df8bae1dSRodney W. Grimes 	case KERN_PROTECTION_FAILURE:
609df8bae1dSRodney W. Grimes 		return (EACCES);
6103364c323SKonstantin Belousov 	case KERN_RESOURCE_SHORTAGE:
6113364c323SKonstantin Belousov 		return (ENOMEM);
612df8bae1dSRodney W. Grimes 	}
613df8bae1dSRodney W. Grimes 	return (EINVAL);
614df8bae1dSRodney W. Grimes }
615df8bae1dSRodney W. Grimes 
616d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
617dabee6feSPeter Wemm struct minherit_args {
618651bb817SAlexander Langer 	void *addr;
6199154ee6aSPeter Wemm 	size_t len;
620dabee6feSPeter Wemm 	int inherit;
621dabee6feSPeter Wemm };
622dabee6feSPeter Wemm #endif
623dabee6feSPeter Wemm int
62404e89ffbSKonstantin Belousov sys_minherit(struct thread *td, struct minherit_args *uap)
625dabee6feSPeter Wemm {
626dabee6feSPeter Wemm 	vm_offset_t addr;
627dabee6feSPeter Wemm 	vm_size_t size, pageoff;
62854d92145SMatthew Dillon 	vm_inherit_t inherit;
629dabee6feSPeter Wemm 
630dabee6feSPeter Wemm 	addr = (vm_offset_t)uap->addr;
6319154ee6aSPeter Wemm 	size = uap->len;
632dabee6feSPeter Wemm 	inherit = uap->inherit;
633dabee6feSPeter Wemm 
634dabee6feSPeter Wemm 	pageoff = (addr & PAGE_MASK);
635dabee6feSPeter Wemm 	addr -= pageoff;
636dabee6feSPeter Wemm 	size += pageoff;
637dabee6feSPeter Wemm 	size = (vm_size_t) round_page(size);
6389154ee6aSPeter Wemm 	if (addr + size < addr)
639dabee6feSPeter Wemm 		return (EINVAL);
640dabee6feSPeter Wemm 
641e0be79afSAlan Cox 	switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr,
642e0be79afSAlan Cox 	    addr + size, inherit)) {
643dabee6feSPeter Wemm 	case KERN_SUCCESS:
644dabee6feSPeter Wemm 		return (0);
645dabee6feSPeter Wemm 	case KERN_PROTECTION_FAILURE:
646dabee6feSPeter Wemm 		return (EACCES);
647dabee6feSPeter Wemm 	}
648dabee6feSPeter Wemm 	return (EINVAL);
649dabee6feSPeter Wemm }
650dabee6feSPeter Wemm 
651dabee6feSPeter Wemm #ifndef _SYS_SYSPROTO_H_
652df8bae1dSRodney W. Grimes struct madvise_args {
653651bb817SAlexander Langer 	void *addr;
6549154ee6aSPeter Wemm 	size_t len;
655df8bae1dSRodney W. Grimes 	int behav;
656df8bae1dSRodney W. Grimes };
657d2d3e875SBruce Evans #endif
6580d94caffSDavid Greenman 
659df8bae1dSRodney W. Grimes int
66004e89ffbSKonstantin Belousov sys_madvise(struct thread *td, struct madvise_args *uap)
661df8bae1dSRodney W. Grimes {
66269cdfcefSEdward Tomasz Napierala 
663496ab053SKonstantin Belousov 	return (kern_madvise(td, (uintptr_t)uap->addr, uap->len, uap->behav));
66469cdfcefSEdward Tomasz Napierala }
66569cdfcefSEdward Tomasz Napierala 
66669cdfcefSEdward Tomasz Napierala int
667496ab053SKonstantin Belousov kern_madvise(struct thread *td, uintptr_t addr0, size_t len, int behav)
66869cdfcefSEdward Tomasz Napierala {
66905ba50f5SJake Burkholder 	vm_map_t map;
670496ab053SKonstantin Belousov 	vm_offset_t addr, end, start;
67155648840SJohn Baldwin 	int flags;
672b4309055SMatthew Dillon 
673b4309055SMatthew Dillon 	/*
674f4cf2141SWes Peters 	 * Check for our special case, advising the swap pager we are
675f4cf2141SWes Peters 	 * "immortal."
676f4cf2141SWes Peters 	 */
67769cdfcefSEdward Tomasz Napierala 	if (behav == MADV_PROTECT) {
67855648840SJohn Baldwin 		flags = PPROT_SET;
67955648840SJohn Baldwin 		return (kern_procctl(td, P_PID, td->td_proc->p_pid,
68055648840SJohn Baldwin 		    PROC_SPROTECT, &flags));
68169297bf8SJohn Baldwin 	}
68255648840SJohn Baldwin 
683f4cf2141SWes Peters 	/*
684b4309055SMatthew Dillon 	 * Check for illegal behavior
685b4309055SMatthew Dillon 	 */
68669cdfcefSEdward Tomasz Napierala 	if (behav < 0 || behav > MADV_CORE)
687b4309055SMatthew Dillon 		return (EINVAL);
688867a482dSJohn Dyson 	/*
689867a482dSJohn Dyson 	 * Check for illegal addresses.  Watch out for address wrap... Note
690867a482dSJohn Dyson 	 * that VM_*_ADDRESS are not constants due to casts (argh).
691867a482dSJohn Dyson 	 */
69205ba50f5SJake Burkholder 	map = &td->td_proc->p_vmspace->vm_map;
693496ab053SKonstantin Belousov 	addr = addr0;
69469cdfcefSEdward Tomasz Napierala 	if (addr < vm_map_min(map) || addr + len > vm_map_max(map))
695867a482dSJohn Dyson 		return (EINVAL);
69669cdfcefSEdward Tomasz Napierala 	if ((addr + len) < addr)
697867a482dSJohn Dyson 		return (EINVAL);
698867a482dSJohn Dyson 
699867a482dSJohn Dyson 	/*
700867a482dSJohn Dyson 	 * Since this routine is only advisory, we default to conservative
701867a482dSJohn Dyson 	 * behavior.
702867a482dSJohn Dyson 	 */
70369cdfcefSEdward Tomasz Napierala 	start = trunc_page(addr);
70469cdfcefSEdward Tomasz Napierala 	end = round_page(addr + len);
705867a482dSJohn Dyson 
70669cdfcefSEdward Tomasz Napierala 	if (vm_map_madvise(map, start, end, behav))
707094f6d26SAlan Cox 		return (EINVAL);
708094f6d26SAlan Cox 	return (0);
709df8bae1dSRodney W. Grimes }
710df8bae1dSRodney W. Grimes 
711d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
712df8bae1dSRodney W. Grimes struct mincore_args {
713651bb817SAlexander Langer 	const void *addr;
7149154ee6aSPeter Wemm 	size_t len;
715df8bae1dSRodney W. Grimes 	char *vec;
716df8bae1dSRodney W. Grimes };
717d2d3e875SBruce Evans #endif
7180d94caffSDavid Greenman 
719df8bae1dSRodney W. Grimes int
72004e89ffbSKonstantin Belousov sys_mincore(struct thread *td, struct mincore_args *uap)
721df8bae1dSRodney W. Grimes {
72246dc8e9dSDmitry Chagin 
72346dc8e9dSDmitry Chagin 	return (kern_mincore(td, (uintptr_t)uap->addr, uap->len, uap->vec));
72446dc8e9dSDmitry Chagin }
72546dc8e9dSDmitry Chagin 
72646dc8e9dSDmitry Chagin int
72746dc8e9dSDmitry Chagin kern_mincore(struct thread *td, uintptr_t addr0, size_t len, char *vec)
72846dc8e9dSDmitry Chagin {
729867a482dSJohn Dyson 	vm_offset_t addr, first_addr;
730867a482dSJohn Dyson 	vm_offset_t end, cend;
731867a482dSJohn Dyson 	pmap_t pmap;
732867a482dSJohn Dyson 	vm_map_t map;
733d2c60af8SMatthew Dillon 	int error = 0;
734867a482dSJohn Dyson 	int vecindex, lastvecindex;
73554d92145SMatthew Dillon 	vm_map_entry_t current;
736867a482dSJohn Dyson 	vm_map_entry_t entry;
737567e51e1SAlan Cox 	vm_object_t object;
738567e51e1SAlan Cox 	vm_paddr_t locked_pa;
739567e51e1SAlan Cox 	vm_page_t m;
740567e51e1SAlan Cox 	vm_pindex_t pindex;
741867a482dSJohn Dyson 	int mincoreinfo;
742dd2622a8SAlan Cox 	unsigned int timestamp;
743567e51e1SAlan Cox 	boolean_t locked;
744df8bae1dSRodney W. Grimes 
745867a482dSJohn Dyson 	/*
746867a482dSJohn Dyson 	 * Make sure that the addresses presented are valid for user
747867a482dSJohn Dyson 	 * mode.
748867a482dSJohn Dyson 	 */
74946dc8e9dSDmitry Chagin 	first_addr = addr = trunc_page(addr0);
75046dc8e9dSDmitry Chagin 	end = addr + (vm_size_t)round_page(len);
75105ba50f5SJake Burkholder 	map = &td->td_proc->p_vmspace->vm_map;
75205ba50f5SJake Burkholder 	if (end > vm_map_max(map) || end < addr)
753455dd7d4SKonstantin Belousov 		return (ENOMEM);
75402c04a2fSJohn Dyson 
755b40ce416SJulian Elischer 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
756867a482dSJohn Dyson 
757eff50fcdSAlan Cox 	vm_map_lock_read(map);
758dd2622a8SAlan Cox RestartScan:
759dd2622a8SAlan Cox 	timestamp = map->timestamp;
760867a482dSJohn Dyson 
761455dd7d4SKonstantin Belousov 	if (!vm_map_lookup_entry(map, addr, &entry)) {
762455dd7d4SKonstantin Belousov 		vm_map_unlock_read(map);
763455dd7d4SKonstantin Belousov 		return (ENOMEM);
764455dd7d4SKonstantin Belousov 	}
765867a482dSJohn Dyson 
766867a482dSJohn Dyson 	/*
767867a482dSJohn Dyson 	 * Do this on a map entry basis so that if the pages are not
768867a482dSJohn Dyson 	 * in the current processes address space, we can easily look
769867a482dSJohn Dyson 	 * up the pages elsewhere.
770867a482dSJohn Dyson 	 */
771867a482dSJohn Dyson 	lastvecindex = -1;
772*1c5196c3SKonstantin Belousov 	for (current = entry; current->start < end; current = current->next) {
773867a482dSJohn Dyson 
774867a482dSJohn Dyson 		/*
775455dd7d4SKonstantin Belousov 		 * check for contiguity
776455dd7d4SKonstantin Belousov 		 */
777*1c5196c3SKonstantin Belousov 		if (current->end < end && current->next->start > current->end) {
778455dd7d4SKonstantin Belousov 			vm_map_unlock_read(map);
779455dd7d4SKonstantin Belousov 			return (ENOMEM);
780455dd7d4SKonstantin Belousov 		}
781455dd7d4SKonstantin Belousov 
782455dd7d4SKonstantin Belousov 		/*
783867a482dSJohn Dyson 		 * ignore submaps (for now) or null objects
784867a482dSJohn Dyson 		 */
7859fdfe602SMatthew Dillon 		if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) ||
786867a482dSJohn Dyson 			current->object.vm_object == NULL)
787867a482dSJohn Dyson 			continue;
788867a482dSJohn Dyson 
789867a482dSJohn Dyson 		/*
790867a482dSJohn Dyson 		 * limit this scan to the current map entry and the
791867a482dSJohn Dyson 		 * limits for the mincore call
792867a482dSJohn Dyson 		 */
793867a482dSJohn Dyson 		if (addr < current->start)
794867a482dSJohn Dyson 			addr = current->start;
795867a482dSJohn Dyson 		cend = current->end;
796867a482dSJohn Dyson 		if (cend > end)
797867a482dSJohn Dyson 			cend = end;
798867a482dSJohn Dyson 
799867a482dSJohn Dyson 		/*
800867a482dSJohn Dyson 		 * scan this entry one page at a time
801867a482dSJohn Dyson 		 */
802867a482dSJohn Dyson 		while (addr < cend) {
803867a482dSJohn Dyson 			/*
804867a482dSJohn Dyson 			 * Check pmap first, it is likely faster, also
805867a482dSJohn Dyson 			 * it can provide info as to whether we are the
806867a482dSJohn Dyson 			 * one referencing or modifying the page.
807867a482dSJohn Dyson 			 */
808567e51e1SAlan Cox 			object = NULL;
809567e51e1SAlan Cox 			locked_pa = 0;
810567e51e1SAlan Cox 		retry:
811567e51e1SAlan Cox 			m = NULL;
812567e51e1SAlan Cox 			mincoreinfo = pmap_mincore(pmap, addr, &locked_pa);
813567e51e1SAlan Cox 			if (locked_pa != 0) {
814867a482dSJohn Dyson 				/*
815567e51e1SAlan Cox 				 * The page is mapped by this process but not
816567e51e1SAlan Cox 				 * both accessed and modified.  It is also
817567e51e1SAlan Cox 				 * managed.  Acquire the object lock so that
818567e51e1SAlan Cox 				 * other mappings might be examined.
819867a482dSJohn Dyson 				 */
820567e51e1SAlan Cox 				m = PHYS_TO_VM_PAGE(locked_pa);
821567e51e1SAlan Cox 				if (m->object != object) {
822567e51e1SAlan Cox 					if (object != NULL)
82389f6b863SAttilio Rao 						VM_OBJECT_WUNLOCK(object);
824567e51e1SAlan Cox 					object = m->object;
82589f6b863SAttilio Rao 					locked = VM_OBJECT_TRYWLOCK(object);
826567e51e1SAlan Cox 					vm_page_unlock(m);
827567e51e1SAlan Cox 					if (!locked) {
82889f6b863SAttilio Rao 						VM_OBJECT_WLOCK(object);
8292965a453SKip Macy 						vm_page_lock(m);
830567e51e1SAlan Cox 						goto retry;
831567e51e1SAlan Cox 					}
832567e51e1SAlan Cox 				} else
833567e51e1SAlan Cox 					vm_page_unlock(m);
834567e51e1SAlan Cox 				KASSERT(m->valid == VM_PAGE_BITS_ALL,
835567e51e1SAlan Cox 				    ("mincore: page %p is mapped but invalid",
836567e51e1SAlan Cox 				    m));
837567e51e1SAlan Cox 			} else if (mincoreinfo == 0) {
838567e51e1SAlan Cox 				/*
839567e51e1SAlan Cox 				 * The page is not mapped by this process.  If
840567e51e1SAlan Cox 				 * the object implements managed pages, then
841567e51e1SAlan Cox 				 * determine if the page is resident so that
842567e51e1SAlan Cox 				 * the mappings might be examined.
843567e51e1SAlan Cox 				 */
844567e51e1SAlan Cox 				if (current->object.vm_object != object) {
845567e51e1SAlan Cox 					if (object != NULL)
84689f6b863SAttilio Rao 						VM_OBJECT_WUNLOCK(object);
847567e51e1SAlan Cox 					object = current->object.vm_object;
84889f6b863SAttilio Rao 					VM_OBJECT_WLOCK(object);
849567e51e1SAlan Cox 				}
850567e51e1SAlan Cox 				if (object->type == OBJT_DEFAULT ||
851567e51e1SAlan Cox 				    object->type == OBJT_SWAP ||
852567e51e1SAlan Cox 				    object->type == OBJT_VNODE) {
853567e51e1SAlan Cox 					pindex = OFF_TO_IDX(current->offset +
854567e51e1SAlan Cox 					    (addr - current->start));
855567e51e1SAlan Cox 					m = vm_page_lookup(object, pindex);
856567e51e1SAlan Cox 					if (m != NULL && m->valid == 0)
857567e51e1SAlan Cox 						m = NULL;
858567e51e1SAlan Cox 					if (m != NULL)
859567e51e1SAlan Cox 						mincoreinfo = MINCORE_INCORE;
860567e51e1SAlan Cox 				}
861567e51e1SAlan Cox 			}
862567e51e1SAlan Cox 			if (m != NULL) {
863567e51e1SAlan Cox 				/* Examine other mappings to the page. */
864567e51e1SAlan Cox 				if (m->dirty == 0 && pmap_is_modified(m))
865567e51e1SAlan Cox 					vm_page_dirty(m);
866567e51e1SAlan Cox 				if (m->dirty != 0)
867867a482dSJohn Dyson 					mincoreinfo |= MINCORE_MODIFIED_OTHER;
868c46b90e9SAlan Cox 				/*
8693407fefeSKonstantin Belousov 				 * The first test for PGA_REFERENCED is an
870c46b90e9SAlan Cox 				 * optimization.  The second test is
871c46b90e9SAlan Cox 				 * required because a concurrent pmap
872c46b90e9SAlan Cox 				 * operation could clear the last reference
8733407fefeSKonstantin Belousov 				 * and set PGA_REFERENCED before the call to
874c46b90e9SAlan Cox 				 * pmap_is_referenced().
875c46b90e9SAlan Cox 				 */
8763407fefeSKonstantin Belousov 				if ((m->aflags & PGA_REFERENCED) != 0 ||
877c46b90e9SAlan Cox 				    pmap_is_referenced(m) ||
8783407fefeSKonstantin Belousov 				    (m->aflags & PGA_REFERENCED) != 0)
879867a482dSJohn Dyson 					mincoreinfo |= MINCORE_REFERENCED_OTHER;
8809b5a5d81SJohn Dyson 			}
881567e51e1SAlan Cox 			if (object != NULL)
88289f6b863SAttilio Rao 				VM_OBJECT_WUNLOCK(object);
883867a482dSJohn Dyson 
884867a482dSJohn Dyson 			/*
885dd2622a8SAlan Cox 			 * subyte may page fault.  In case it needs to modify
886dd2622a8SAlan Cox 			 * the map, we release the lock.
887dd2622a8SAlan Cox 			 */
888dd2622a8SAlan Cox 			vm_map_unlock_read(map);
889dd2622a8SAlan Cox 
890dd2622a8SAlan Cox 			/*
891867a482dSJohn Dyson 			 * calculate index into user supplied byte vector
892867a482dSJohn Dyson 			 */
893d1780e8dSKonstantin Belousov 			vecindex = atop(addr - first_addr);
894867a482dSJohn Dyson 
895867a482dSJohn Dyson 			/*
896867a482dSJohn Dyson 			 * If we have skipped map entries, we need to make sure that
897867a482dSJohn Dyson 			 * the byte vector is zeroed for those skipped entries.
898867a482dSJohn Dyson 			 */
899867a482dSJohn Dyson 			while ((lastvecindex + 1) < vecindex) {
9006a87d217SJohn Baldwin 				++lastvecindex;
901867a482dSJohn Dyson 				error = subyte(vec + lastvecindex, 0);
902867a482dSJohn Dyson 				if (error) {
903d2c60af8SMatthew Dillon 					error = EFAULT;
904d2c60af8SMatthew Dillon 					goto done2;
905867a482dSJohn Dyson 				}
906867a482dSJohn Dyson 			}
907867a482dSJohn Dyson 
908867a482dSJohn Dyson 			/*
909867a482dSJohn Dyson 			 * Pass the page information to the user
910867a482dSJohn Dyson 			 */
911867a482dSJohn Dyson 			error = subyte(vec + vecindex, mincoreinfo);
912867a482dSJohn Dyson 			if (error) {
913d2c60af8SMatthew Dillon 				error = EFAULT;
914d2c60af8SMatthew Dillon 				goto done2;
915867a482dSJohn Dyson 			}
916dd2622a8SAlan Cox 
917dd2622a8SAlan Cox 			/*
918dd2622a8SAlan Cox 			 * If the map has changed, due to the subyte, the previous
919dd2622a8SAlan Cox 			 * output may be invalid.
920dd2622a8SAlan Cox 			 */
921dd2622a8SAlan Cox 			vm_map_lock_read(map);
922dd2622a8SAlan Cox 			if (timestamp != map->timestamp)
923dd2622a8SAlan Cox 				goto RestartScan;
924dd2622a8SAlan Cox 
925867a482dSJohn Dyson 			lastvecindex = vecindex;
92602c04a2fSJohn Dyson 			addr += PAGE_SIZE;
92702c04a2fSJohn Dyson 		}
928867a482dSJohn Dyson 	}
929867a482dSJohn Dyson 
930867a482dSJohn Dyson 	/*
931dd2622a8SAlan Cox 	 * subyte may page fault.  In case it needs to modify
932dd2622a8SAlan Cox 	 * the map, we release the lock.
933dd2622a8SAlan Cox 	 */
934dd2622a8SAlan Cox 	vm_map_unlock_read(map);
935dd2622a8SAlan Cox 
936dd2622a8SAlan Cox 	/*
937867a482dSJohn Dyson 	 * Zero the last entries in the byte vector.
938867a482dSJohn Dyson 	 */
939d1780e8dSKonstantin Belousov 	vecindex = atop(end - first_addr);
940867a482dSJohn Dyson 	while ((lastvecindex + 1) < vecindex) {
9416a87d217SJohn Baldwin 		++lastvecindex;
942867a482dSJohn Dyson 		error = subyte(vec + lastvecindex, 0);
943867a482dSJohn Dyson 		if (error) {
944d2c60af8SMatthew Dillon 			error = EFAULT;
945d2c60af8SMatthew Dillon 			goto done2;
946867a482dSJohn Dyson 		}
947867a482dSJohn Dyson 	}
948867a482dSJohn Dyson 
949dd2622a8SAlan Cox 	/*
950dd2622a8SAlan Cox 	 * If the map has changed, due to the subyte, the previous
951dd2622a8SAlan Cox 	 * output may be invalid.
952dd2622a8SAlan Cox 	 */
953dd2622a8SAlan Cox 	vm_map_lock_read(map);
954dd2622a8SAlan Cox 	if (timestamp != map->timestamp)
955dd2622a8SAlan Cox 		goto RestartScan;
956eff50fcdSAlan Cox 	vm_map_unlock_read(map);
957d2c60af8SMatthew Dillon done2:
958d2c60af8SMatthew Dillon 	return (error);
959df8bae1dSRodney W. Grimes }
960df8bae1dSRodney W. Grimes 
961d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
962df8bae1dSRodney W. Grimes struct mlock_args {
963651bb817SAlexander Langer 	const void *addr;
964df8bae1dSRodney W. Grimes 	size_t len;
965df8bae1dSRodney W. Grimes };
966d2d3e875SBruce Evans #endif
967df8bae1dSRodney W. Grimes int
96804e89ffbSKonstantin Belousov sys_mlock(struct thread *td, struct mlock_args *uap)
969df8bae1dSRodney W. Grimes {
970995d7069SGleb Smirnoff 
971496ab053SKonstantin Belousov 	return (kern_mlock(td->td_proc, td->td_ucred,
972496ab053SKonstantin Belousov 	    __DECONST(uintptr_t, uap->addr), uap->len));
973995d7069SGleb Smirnoff }
974995d7069SGleb Smirnoff 
975995d7069SGleb Smirnoff int
976496ab053SKonstantin Belousov kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len)
977995d7069SGleb Smirnoff {
978bb734798SDon Lewis 	vm_offset_t addr, end, last, start;
979bb734798SDon Lewis 	vm_size_t npages, size;
9803ac7d297SAndrey Zonov 	vm_map_t map;
9811ba5ad42SEdward Tomasz Napierala 	unsigned long nsize;
982bb734798SDon Lewis 	int error;
983df8bae1dSRodney W. Grimes 
984995d7069SGleb Smirnoff 	error = priv_check_cred(cred, PRIV_VM_MLOCK, 0);
98547934cefSDon Lewis 	if (error)
98647934cefSDon Lewis 		return (error);
987496ab053SKonstantin Belousov 	addr = addr0;
988995d7069SGleb Smirnoff 	size = len;
989bb734798SDon Lewis 	last = addr + size;
99016929939SDon Lewis 	start = trunc_page(addr);
991bb734798SDon Lewis 	end = round_page(last);
992bb734798SDon Lewis 	if (last < addr || end < addr)
993df8bae1dSRodney W. Grimes 		return (EINVAL);
99416929939SDon Lewis 	npages = atop(end - start);
99516929939SDon Lewis 	if (npages > vm_page_max_wired)
99616929939SDon Lewis 		return (ENOMEM);
9973ac7d297SAndrey Zonov 	map = &proc->p_vmspace->vm_map;
99847934cefSDon Lewis 	PROC_LOCK(proc);
9993ac7d297SAndrey Zonov 	nsize = ptoa(npages + pmap_wired_count(map->pmap));
1000f6f6d240SMateusz Guzik 	if (nsize > lim_cur_proc(proc, RLIMIT_MEMLOCK)) {
100147934cefSDon Lewis 		PROC_UNLOCK(proc);
10024a40e3d4SJohn Dyson 		return (ENOMEM);
100391d5354aSJohn Baldwin 	}
100447934cefSDon Lewis 	PROC_UNLOCK(proc);
100544f1c916SBryan Drewery 	if (npages + vm_cnt.v_wire_count > vm_page_max_wired)
100616929939SDon Lewis 		return (EAGAIN);
1007afcc55f3SEdward Tomasz Napierala #ifdef RACCT
10084b5c9cf6SEdward Tomasz Napierala 	if (racct_enable) {
10091ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(proc);
10101ba5ad42SEdward Tomasz Napierala 		error = racct_set(proc, RACCT_MEMLOCK, nsize);
10111ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(proc);
10121ba5ad42SEdward Tomasz Napierala 		if (error != 0)
10131ba5ad42SEdward Tomasz Napierala 			return (ENOMEM);
10144b5c9cf6SEdward Tomasz Napierala 	}
1015afcc55f3SEdward Tomasz Napierala #endif
10163ac7d297SAndrey Zonov 	error = vm_map_wire(map, start, end,
101716929939SDon Lewis 	    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
1018afcc55f3SEdward Tomasz Napierala #ifdef RACCT
10194b5c9cf6SEdward Tomasz Napierala 	if (racct_enable && error != KERN_SUCCESS) {
10201ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(proc);
10211ba5ad42SEdward Tomasz Napierala 		racct_set(proc, RACCT_MEMLOCK,
10223ac7d297SAndrey Zonov 		    ptoa(pmap_wired_count(map->pmap)));
10231ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(proc);
10241ba5ad42SEdward Tomasz Napierala 	}
1025afcc55f3SEdward Tomasz Napierala #endif
1026df8bae1dSRodney W. Grimes 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
1027df8bae1dSRodney W. Grimes }
1028df8bae1dSRodney W. Grimes 
1029d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
10304a40e3d4SJohn Dyson struct mlockall_args {
10314a40e3d4SJohn Dyson 	int	how;
10324a40e3d4SJohn Dyson };
10334a40e3d4SJohn Dyson #endif
10344a40e3d4SJohn Dyson 
10354a40e3d4SJohn Dyson int
103604e89ffbSKonstantin Belousov sys_mlockall(struct thread *td, struct mlockall_args *uap)
10374a40e3d4SJohn Dyson {
1038abd498aaSBruce M Simpson 	vm_map_t map;
1039abd498aaSBruce M Simpson 	int error;
1040abd498aaSBruce M Simpson 
1041abd498aaSBruce M Simpson 	map = &td->td_proc->p_vmspace->vm_map;
10427e19eda4SAndrey Zonov 	error = priv_check(td, PRIV_VM_MLOCK);
10437e19eda4SAndrey Zonov 	if (error)
10447e19eda4SAndrey Zonov 		return (error);
1045abd498aaSBruce M Simpson 
1046abd498aaSBruce M Simpson 	if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0))
1047abd498aaSBruce M Simpson 		return (EINVAL);
1048abd498aaSBruce M Simpson 
1049abd498aaSBruce M Simpson 	/*
1050abd498aaSBruce M Simpson 	 * If wiring all pages in the process would cause it to exceed
1051abd498aaSBruce M Simpson 	 * a hard resource limit, return ENOMEM.
1052abd498aaSBruce M Simpson 	 */
10537e19eda4SAndrey Zonov 	if (!old_mlock && uap->how & MCL_CURRENT) {
105491d5354aSJohn Baldwin 		PROC_LOCK(td->td_proc);
1055f6f6d240SMateusz Guzik 		if (map->size > lim_cur(td, RLIMIT_MEMLOCK)) {
105691d5354aSJohn Baldwin 			PROC_UNLOCK(td->td_proc);
1057abd498aaSBruce M Simpson 			return (ENOMEM);
105891d5354aSJohn Baldwin 		}
105991d5354aSJohn Baldwin 		PROC_UNLOCK(td->td_proc);
10607e19eda4SAndrey Zonov 	}
1061afcc55f3SEdward Tomasz Napierala #ifdef RACCT
10624b5c9cf6SEdward Tomasz Napierala 	if (racct_enable) {
10631ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(td->td_proc);
10641ba5ad42SEdward Tomasz Napierala 		error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size);
10651ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(td->td_proc);
10661ba5ad42SEdward Tomasz Napierala 		if (error != 0)
10671ba5ad42SEdward Tomasz Napierala 			return (ENOMEM);
10684b5c9cf6SEdward Tomasz Napierala 	}
1069afcc55f3SEdward Tomasz Napierala #endif
1070abd498aaSBruce M Simpson 
1071abd498aaSBruce M Simpson 	if (uap->how & MCL_FUTURE) {
1072abd498aaSBruce M Simpson 		vm_map_lock(map);
1073abd498aaSBruce M Simpson 		vm_map_modflags(map, MAP_WIREFUTURE, 0);
1074abd498aaSBruce M Simpson 		vm_map_unlock(map);
1075abd498aaSBruce M Simpson 		error = 0;
1076abd498aaSBruce M Simpson 	}
1077abd498aaSBruce M Simpson 
1078abd498aaSBruce M Simpson 	if (uap->how & MCL_CURRENT) {
1079abd498aaSBruce M Simpson 		/*
1080abd498aaSBruce M Simpson 		 * P1003.1-2001 mandates that all currently mapped pages
1081abd498aaSBruce M Simpson 		 * will be memory resident and locked (wired) upon return
1082abd498aaSBruce M Simpson 		 * from mlockall(). vm_map_wire() will wire pages, by
1083abd498aaSBruce M Simpson 		 * calling vm_fault_wire() for each page in the region.
1084abd498aaSBruce M Simpson 		 */
1085abd498aaSBruce M Simpson 		error = vm_map_wire(map, vm_map_min(map), vm_map_max(map),
1086abd498aaSBruce M Simpson 		    VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK);
1087abd498aaSBruce M Simpson 		error = (error == KERN_SUCCESS ? 0 : EAGAIN);
1088abd498aaSBruce M Simpson 	}
1089afcc55f3SEdward Tomasz Napierala #ifdef RACCT
10904b5c9cf6SEdward Tomasz Napierala 	if (racct_enable && error != KERN_SUCCESS) {
10911ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(td->td_proc);
10921ba5ad42SEdward Tomasz Napierala 		racct_set(td->td_proc, RACCT_MEMLOCK,
10933ac7d297SAndrey Zonov 		    ptoa(pmap_wired_count(map->pmap)));
10941ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(td->td_proc);
10951ba5ad42SEdward Tomasz Napierala 	}
1096afcc55f3SEdward Tomasz Napierala #endif
1097abd498aaSBruce M Simpson 
1098abd498aaSBruce M Simpson 	return (error);
10994a40e3d4SJohn Dyson }
11004a40e3d4SJohn Dyson 
11014a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_
1102fa721254SAlfred Perlstein struct munlockall_args {
1103abd498aaSBruce M Simpson 	register_t dummy;
11044a40e3d4SJohn Dyson };
11054a40e3d4SJohn Dyson #endif
11064a40e3d4SJohn Dyson 
11074a40e3d4SJohn Dyson int
110804e89ffbSKonstantin Belousov sys_munlockall(struct thread *td, struct munlockall_args *uap)
11094a40e3d4SJohn Dyson {
1110abd498aaSBruce M Simpson 	vm_map_t map;
1111abd498aaSBruce M Simpson 	int error;
1112abd498aaSBruce M Simpson 
1113abd498aaSBruce M Simpson 	map = &td->td_proc->p_vmspace->vm_map;
1114acd3428bSRobert Watson 	error = priv_check(td, PRIV_VM_MUNLOCK);
1115abd498aaSBruce M Simpson 	if (error)
1116abd498aaSBruce M Simpson 		return (error);
1117abd498aaSBruce M Simpson 
1118abd498aaSBruce M Simpson 	/* Clear the MAP_WIREFUTURE flag from this vm_map. */
1119abd498aaSBruce M Simpson 	vm_map_lock(map);
1120abd498aaSBruce M Simpson 	vm_map_modflags(map, 0, MAP_WIREFUTURE);
1121abd498aaSBruce M Simpson 	vm_map_unlock(map);
1122abd498aaSBruce M Simpson 
1123abd498aaSBruce M Simpson 	/* Forcibly unwire all pages. */
1124abd498aaSBruce M Simpson 	error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map),
1125abd498aaSBruce M Simpson 	    VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK);
1126afcc55f3SEdward Tomasz Napierala #ifdef RACCT
11274b5c9cf6SEdward Tomasz Napierala 	if (racct_enable && error == KERN_SUCCESS) {
11281ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(td->td_proc);
11291ba5ad42SEdward Tomasz Napierala 		racct_set(td->td_proc, RACCT_MEMLOCK, 0);
11301ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(td->td_proc);
11311ba5ad42SEdward Tomasz Napierala 	}
1132afcc55f3SEdward Tomasz Napierala #endif
1133abd498aaSBruce M Simpson 
1134abd498aaSBruce M Simpson 	return (error);
11354a40e3d4SJohn Dyson }
11364a40e3d4SJohn Dyson 
11374a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_
1138df8bae1dSRodney W. Grimes struct munlock_args {
1139651bb817SAlexander Langer 	const void *addr;
1140df8bae1dSRodney W. Grimes 	size_t len;
1141df8bae1dSRodney W. Grimes };
1142d2d3e875SBruce Evans #endif
1143df8bae1dSRodney W. Grimes int
114469cdfcefSEdward Tomasz Napierala sys_munlock(struct thread *td, struct munlock_args *uap)
1145df8bae1dSRodney W. Grimes {
114669cdfcefSEdward Tomasz Napierala 
1147496ab053SKonstantin Belousov 	return (kern_munlock(td, (uintptr_t)uap->addr, uap->len));
114869cdfcefSEdward Tomasz Napierala }
114969cdfcefSEdward Tomasz Napierala 
115069cdfcefSEdward Tomasz Napierala int
1151496ab053SKonstantin Belousov kern_munlock(struct thread *td, uintptr_t addr0, size_t size)
115269cdfcefSEdward Tomasz Napierala {
1153496ab053SKonstantin Belousov 	vm_offset_t addr, end, last, start;
1154fc2b1679SJeremie Le Hen #ifdef RACCT
1155c92b5069SJeremie Le Hen 	vm_map_t map;
1156fc2b1679SJeremie Le Hen #endif
1157df8bae1dSRodney W. Grimes 	int error;
1158df8bae1dSRodney W. Grimes 
1159acd3428bSRobert Watson 	error = priv_check(td, PRIV_VM_MUNLOCK);
116047934cefSDon Lewis 	if (error)
116147934cefSDon Lewis 		return (error);
1162496ab053SKonstantin Belousov 	addr = addr0;
1163bb734798SDon Lewis 	last = addr + size;
116416929939SDon Lewis 	start = trunc_page(addr);
1165bb734798SDon Lewis 	end = round_page(last);
1166bb734798SDon Lewis 	if (last < addr || end < addr)
1167df8bae1dSRodney W. Grimes 		return (EINVAL);
116816929939SDon Lewis 	error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end,
116916929939SDon Lewis 	    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
1170afcc55f3SEdward Tomasz Napierala #ifdef RACCT
11714b5c9cf6SEdward Tomasz Napierala 	if (racct_enable && error == KERN_SUCCESS) {
11721ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(td->td_proc);
1173c92b5069SJeremie Le Hen 		map = &td->td_proc->p_vmspace->vm_map;
1174c92b5069SJeremie Le Hen 		racct_set(td->td_proc, RACCT_MEMLOCK,
1175c92b5069SJeremie Le Hen 		    ptoa(pmap_wired_count(map->pmap)));
11761ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(td->td_proc);
11771ba5ad42SEdward Tomasz Napierala 	}
1178afcc55f3SEdward Tomasz Napierala #endif
1179df8bae1dSRodney W. Grimes 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
1180df8bae1dSRodney W. Grimes }
1181df8bae1dSRodney W. Grimes 
1182df8bae1dSRodney W. Grimes /*
1183c8daea13SAlexander Kabaev  * vm_mmap_vnode()
1184c8daea13SAlexander Kabaev  *
1185c8daea13SAlexander Kabaev  * Helper function for vm_mmap.  Perform sanity check specific for mmap
1186c8daea13SAlexander Kabaev  * operations on vnodes.
1187c8daea13SAlexander Kabaev  */
1188c8daea13SAlexander Kabaev int
1189c8daea13SAlexander Kabaev vm_mmap_vnode(struct thread *td, vm_size_t objsize,
1190c8daea13SAlexander Kabaev     vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
119184110e7eSKonstantin Belousov     struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp,
119284110e7eSKonstantin Belousov     boolean_t *writecounted)
1193c8daea13SAlexander Kabaev {
1194c8daea13SAlexander Kabaev 	struct vattr va;
1195c8daea13SAlexander Kabaev 	vm_object_t obj;
1196bd0e1bebSMark Johnston 	vm_ooffset_t foff;
11970359a12eSAttilio Rao 	struct ucred *cred;
11985050aa86SKonstantin Belousov 	int error, flags, locktype;
1199c8daea13SAlexander Kabaev 
12000359a12eSAttilio Rao 	cred = td->td_ucred;
120184110e7eSKonstantin Belousov 	if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED))
120284110e7eSKonstantin Belousov 		locktype = LK_EXCLUSIVE;
120384110e7eSKonstantin Belousov 	else
120484110e7eSKonstantin Belousov 		locktype = LK_SHARED;
12055050aa86SKonstantin Belousov 	if ((error = vget(vp, locktype, td)) != 0)
1206c8daea13SAlexander Kabaev 		return (error);
12070df42647SRobert Watson 	AUDIT_ARG_VNODE1(vp);
120864345f0bSJohn Baldwin 	foff = *foffp;
1209c8daea13SAlexander Kabaev 	flags = *flagsp;
12108516dd18SPoul-Henning Kamp 	obj = vp->v_object;
1211c8daea13SAlexander Kabaev 	if (vp->v_type == VREG) {
1212c8daea13SAlexander Kabaev 		/*
1213c8daea13SAlexander Kabaev 		 * Get the proper underlying object
1214c8daea13SAlexander Kabaev 		 */
12158516dd18SPoul-Henning Kamp 		if (obj == NULL) {
1216c8daea13SAlexander Kabaev 			error = EINVAL;
1217c8daea13SAlexander Kabaev 			goto done;
1218c8daea13SAlexander Kabaev 		}
1219e5f299ffSKonstantin Belousov 		if (obj->type == OBJT_VNODE && obj->handle != vp) {
1220c8daea13SAlexander Kabaev 			vput(vp);
1221c8daea13SAlexander Kabaev 			vp = (struct vnode *)obj->handle;
122284110e7eSKonstantin Belousov 			/*
122384110e7eSKonstantin Belousov 			 * Bypass filesystems obey the mpsafety of the
122453f5f8a0SKonstantin Belousov 			 * underlying fs.  Tmpfs never bypasses.
122584110e7eSKonstantin Belousov 			 */
122684110e7eSKonstantin Belousov 			error = vget(vp, locktype, td);
12275050aa86SKonstantin Belousov 			if (error != 0)
122884110e7eSKonstantin Belousov 				return (error);
122984110e7eSKonstantin Belousov 		}
123084110e7eSKonstantin Belousov 		if (locktype == LK_EXCLUSIVE) {
123184110e7eSKonstantin Belousov 			*writecounted = TRUE;
123284110e7eSKonstantin Belousov 			vnode_pager_update_writecount(obj, 0, objsize);
123384110e7eSKonstantin Belousov 		}
1234c8daea13SAlexander Kabaev 	} else {
1235c8daea13SAlexander Kabaev 		error = EINVAL;
1236c8daea13SAlexander Kabaev 		goto done;
1237c8daea13SAlexander Kabaev 	}
12380359a12eSAttilio Rao 	if ((error = VOP_GETATTR(vp, &va, cred)))
1239c8daea13SAlexander Kabaev 		goto done;
1240c92163dcSChristian S.J. Peron #ifdef MAC
12417077c426SJohn Baldwin 	/* This relies on VM_PROT_* matching PROT_*. */
12427077c426SJohn Baldwin 	error = mac_vnode_check_mmap(cred, vp, (int)prot, flags);
1243c92163dcSChristian S.J. Peron 	if (error != 0)
1244c92163dcSChristian S.J. Peron 		goto done;
1245c92163dcSChristian S.J. Peron #endif
1246c8daea13SAlexander Kabaev 	if ((flags & MAP_SHARED) != 0) {
1247c8daea13SAlexander Kabaev 		if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) {
12487077c426SJohn Baldwin 			if (prot & VM_PROT_WRITE) {
1249c8daea13SAlexander Kabaev 				error = EPERM;
1250c8daea13SAlexander Kabaev 				goto done;
1251c8daea13SAlexander Kabaev 			}
1252c8daea13SAlexander Kabaev 			*maxprotp &= ~VM_PROT_WRITE;
1253c8daea13SAlexander Kabaev 		}
1254c8daea13SAlexander Kabaev 	}
1255c8daea13SAlexander Kabaev 	/*
1256c8daea13SAlexander Kabaev 	 * If it is a regular file without any references
1257c8daea13SAlexander Kabaev 	 * we do not need to sync it.
1258c8daea13SAlexander Kabaev 	 * Adjust object size to be the size of actual file.
1259c8daea13SAlexander Kabaev 	 */
1260c8daea13SAlexander Kabaev 	objsize = round_page(va.va_size);
1261c8daea13SAlexander Kabaev 	if (va.va_nlink == 0)
1262c8daea13SAlexander Kabaev 		flags |= MAP_NOSYNC;
12633d653db0SAlan Cox 	if (obj->type == OBJT_VNODE) {
1264e5f299ffSKonstantin Belousov 		obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff,
1265e5f299ffSKonstantin Belousov 		    cred);
1266c8daea13SAlexander Kabaev 		if (obj == NULL) {
126764345f0bSJohn Baldwin 			error = ENOMEM;
1268c8daea13SAlexander Kabaev 			goto done;
1269c8daea13SAlexander Kabaev 		}
12703d653db0SAlan Cox 	} else {
12713d653db0SAlan Cox 		KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP,
12723d653db0SAlan Cox 		    ("wrong object type"));
12733d653db0SAlan Cox 		VM_OBJECT_WLOCK(obj);
12743d653db0SAlan Cox 		vm_object_reference_locked(obj);
12753d653db0SAlan Cox #if VM_NRESERVLEVEL > 0
12763d653db0SAlan Cox 		vm_object_color(obj, 0);
12773d653db0SAlan Cox #endif
12783d653db0SAlan Cox 		VM_OBJECT_WUNLOCK(obj);
12793d653db0SAlan Cox 	}
1280c8daea13SAlexander Kabaev 	*objp = obj;
1281c8daea13SAlexander Kabaev 	*flagsp = flags;
128264345f0bSJohn Baldwin 
12830359a12eSAttilio Rao 	vfs_mark_atime(vp, cred);
12841e309003SDiomidis Spinellis 
1285c8daea13SAlexander Kabaev done:
1286bafa6cfcSKonstantin Belousov 	if (error != 0 && *writecounted) {
1287bafa6cfcSKonstantin Belousov 		*writecounted = FALSE;
1288bafa6cfcSKonstantin Belousov 		vnode_pager_update_writecount(obj, objsize, 0);
1289bafa6cfcSKonstantin Belousov 	}
1290c8daea13SAlexander Kabaev 	vput(vp);
1291c8daea13SAlexander Kabaev 	return (error);
1292c8daea13SAlexander Kabaev }
1293c8daea13SAlexander Kabaev 
1294c8daea13SAlexander Kabaev /*
129598df9218SJohn Baldwin  * vm_mmap_cdev()
129698df9218SJohn Baldwin  *
129798df9218SJohn Baldwin  * Helper function for vm_mmap.  Perform sanity check specific for mmap
129898df9218SJohn Baldwin  * operations on cdevs.
129998df9218SJohn Baldwin  */
130098df9218SJohn Baldwin int
13017077c426SJohn Baldwin vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot,
13027077c426SJohn Baldwin     vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw,
13037077c426SJohn Baldwin     vm_ooffset_t *foff, vm_object_t *objp)
130498df9218SJohn Baldwin {
130598df9218SJohn Baldwin 	vm_object_t obj;
13067077c426SJohn Baldwin 	int error, flags;
130798df9218SJohn Baldwin 
130898df9218SJohn Baldwin 	flags = *flagsp;
130998df9218SJohn Baldwin 
131091a35e78SKonstantin Belousov 	if (dsw->d_flags & D_MMAP_ANON) {
13117077c426SJohn Baldwin 		*objp = NULL;
13127077c426SJohn Baldwin 		*foff = 0;
131398df9218SJohn Baldwin 		*maxprotp = VM_PROT_ALL;
131498df9218SJohn Baldwin 		*flagsp |= MAP_ANON;
131598df9218SJohn Baldwin 		return (0);
131698df9218SJohn Baldwin 	}
131798df9218SJohn Baldwin 	/*
131864345f0bSJohn Baldwin 	 * cdevs do not provide private mappings of any kind.
131998df9218SJohn Baldwin 	 */
132098df9218SJohn Baldwin 	if ((*maxprotp & VM_PROT_WRITE) == 0 &&
13217077c426SJohn Baldwin 	    (prot & VM_PROT_WRITE) != 0)
132298df9218SJohn Baldwin 		return (EACCES);
13237077c426SJohn Baldwin 	if (flags & (MAP_PRIVATE|MAP_COPY))
132498df9218SJohn Baldwin 		return (EINVAL);
132598df9218SJohn Baldwin 	/*
132698df9218SJohn Baldwin 	 * Force device mappings to be shared.
132798df9218SJohn Baldwin 	 */
132898df9218SJohn Baldwin 	flags |= MAP_SHARED;
132998df9218SJohn Baldwin #ifdef MAC_XXX
13307077c426SJohn Baldwin 	error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot);
13317077c426SJohn Baldwin 	if (error != 0)
133298df9218SJohn Baldwin 		return (error);
133398df9218SJohn Baldwin #endif
133464345f0bSJohn Baldwin 	/*
133564345f0bSJohn Baldwin 	 * First, try d_mmap_single().  If that is not implemented
133664345f0bSJohn Baldwin 	 * (returns ENODEV), fall back to using the device pager.
133764345f0bSJohn Baldwin 	 * Note that d_mmap_single() must return a reference to the
133864345f0bSJohn Baldwin 	 * object (it needs to bump the reference count of the object
133964345f0bSJohn Baldwin 	 * it returns somehow).
134064345f0bSJohn Baldwin 	 *
134164345f0bSJohn Baldwin 	 * XXX assumes VM_PROT_* == PROT_*
134264345f0bSJohn Baldwin 	 */
134364345f0bSJohn Baldwin 	error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot);
134464345f0bSJohn Baldwin 	if (error != ENODEV)
134564345f0bSJohn Baldwin 		return (error);
13463364c323SKonstantin Belousov 	obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff,
13473364c323SKonstantin Belousov 	    td->td_ucred);
134898df9218SJohn Baldwin 	if (obj == NULL)
134998df9218SJohn Baldwin 		return (EINVAL);
135098df9218SJohn Baldwin 	*objp = obj;
135198df9218SJohn Baldwin 	*flagsp = flags;
135298df9218SJohn Baldwin 	return (0);
135398df9218SJohn Baldwin }
135498df9218SJohn Baldwin 
135598df9218SJohn Baldwin /*
1356d2c60af8SMatthew Dillon  * vm_mmap()
1357d2c60af8SMatthew Dillon  *
13587077c426SJohn Baldwin  * Internal version of mmap used by exec, sys5 shared memory, and
13597077c426SJohn Baldwin  * various device drivers.  Handle is either a vnode pointer, a
13607077c426SJohn Baldwin  * character device, or NULL for MAP_ANON.
1361df8bae1dSRodney W. Grimes  */
1362df8bae1dSRodney W. Grimes int
1363b9dcd593SBruce Evans vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
1364b9dcd593SBruce Evans 	vm_prot_t maxprot, int flags,
136598df9218SJohn Baldwin 	objtype_t handle_type, void *handle,
1366b9dcd593SBruce Evans 	vm_ooffset_t foff)
1367df8bae1dSRodney W. Grimes {
13687077c426SJohn Baldwin 	vm_object_t object;
1369b40ce416SJulian Elischer 	struct thread *td = curthread;
13707077c426SJohn Baldwin 	int error;
137184110e7eSKonstantin Belousov 	boolean_t writecounted;
1372df8bae1dSRodney W. Grimes 
1373df8bae1dSRodney W. Grimes 	if (size == 0)
13747077c426SJohn Baldwin 		return (EINVAL);
1375df8bae1dSRodney W. Grimes 
1376749474f2SPeter Wemm 	size = round_page(size);
1377010ba384SMark Johnston 	object = NULL;
13787077c426SJohn Baldwin 	writecounted = FALSE;
13797077c426SJohn Baldwin 
13807077c426SJohn Baldwin 	/*
13817077c426SJohn Baldwin 	 * Lookup/allocate object.
13827077c426SJohn Baldwin 	 */
13837077c426SJohn Baldwin 	switch (handle_type) {
13847077c426SJohn Baldwin 	case OBJT_DEVICE: {
13857077c426SJohn Baldwin 		struct cdevsw *dsw;
13867077c426SJohn Baldwin 		struct cdev *cdev;
13877077c426SJohn Baldwin 		int ref;
13887077c426SJohn Baldwin 
13897077c426SJohn Baldwin 		cdev = handle;
13907077c426SJohn Baldwin 		dsw = dev_refthread(cdev, &ref);
13917077c426SJohn Baldwin 		if (dsw == NULL)
13927077c426SJohn Baldwin 			return (ENXIO);
13937077c426SJohn Baldwin 		error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev,
13947077c426SJohn Baldwin 		    dsw, &foff, &object);
13957077c426SJohn Baldwin 		dev_relthread(cdev, ref);
13967077c426SJohn Baldwin 		break;
13977077c426SJohn Baldwin 	}
13987077c426SJohn Baldwin 	case OBJT_VNODE:
13997077c426SJohn Baldwin 		error = vm_mmap_vnode(td, size, prot, &maxprot, &flags,
14007077c426SJohn Baldwin 		    handle, &foff, &object, &writecounted);
14017077c426SJohn Baldwin 		break;
14027077c426SJohn Baldwin 	case OBJT_DEFAULT:
14037077c426SJohn Baldwin 		if (handle == NULL) {
14047077c426SJohn Baldwin 			error = 0;
14057077c426SJohn Baldwin 			break;
14067077c426SJohn Baldwin 		}
14077077c426SJohn Baldwin 		/* FALLTHROUGH */
14087077c426SJohn Baldwin 	default:
14097077c426SJohn Baldwin 		error = EINVAL;
14107077c426SJohn Baldwin 		break;
14117077c426SJohn Baldwin 	}
14127077c426SJohn Baldwin 	if (error)
14137077c426SJohn Baldwin 		return (error);
14147077c426SJohn Baldwin 
14157077c426SJohn Baldwin 	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
14167077c426SJohn Baldwin 	    foff, writecounted, td);
14177077c426SJohn Baldwin 	if (error != 0 && object != NULL) {
14187077c426SJohn Baldwin 		/*
14197077c426SJohn Baldwin 		 * If this mapping was accounted for in the vnode's
14207077c426SJohn Baldwin 		 * writecount, then undo that now.
14217077c426SJohn Baldwin 		 */
14227077c426SJohn Baldwin 		if (writecounted)
14237077c426SJohn Baldwin 			vnode_pager_release_writecount(object, 0, size);
14247077c426SJohn Baldwin 		vm_object_deallocate(object);
14257077c426SJohn Baldwin 	}
14267077c426SJohn Baldwin 	return (error);
14277077c426SJohn Baldwin }
14287077c426SJohn Baldwin 
14297077c426SJohn Baldwin /*
14307077c426SJohn Baldwin  * Internal version of mmap that maps a specific VM object into an
14317077c426SJohn Baldwin  * map.  Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap.
14327077c426SJohn Baldwin  */
14337077c426SJohn Baldwin int
14347077c426SJohn Baldwin vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
14357077c426SJohn Baldwin     vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff,
14367077c426SJohn Baldwin     boolean_t writecounted, struct thread *td)
14377077c426SJohn Baldwin {
14386a97a3f7SKonstantin Belousov 	boolean_t curmap, fitit;
14396a97a3f7SKonstantin Belousov 	vm_offset_t max_addr;
14407077c426SJohn Baldwin 	int docow, error, findspace, rv;
1441df8bae1dSRodney W. Grimes 
14426a97a3f7SKonstantin Belousov 	curmap = map == &td->td_proc->p_vmspace->vm_map;
14436a97a3f7SKonstantin Belousov 	if (curmap) {
144491d5354aSJohn Baldwin 		PROC_LOCK(td->td_proc);
1445f6f6d240SMateusz Guzik 		if (map->size + size > lim_cur_proc(td->td_proc, RLIMIT_VMEM)) {
144691d5354aSJohn Baldwin 			PROC_UNLOCK(td->td_proc);
1447070f64feSMatthew Dillon 			return (ENOMEM);
1448070f64feSMatthew Dillon 		}
1449a6492969SAlan Cox 		if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) {
14501ba5ad42SEdward Tomasz Napierala 			PROC_UNLOCK(td->td_proc);
14511ba5ad42SEdward Tomasz Napierala 			return (ENOMEM);
14521ba5ad42SEdward Tomasz Napierala 		}
14537e19eda4SAndrey Zonov 		if (!old_mlock && map->flags & MAP_WIREFUTURE) {
14543ac7d297SAndrey Zonov 			if (ptoa(pmap_wired_count(map->pmap)) + size >
1455f6f6d240SMateusz Guzik 			    lim_cur_proc(td->td_proc, RLIMIT_MEMLOCK)) {
14567e19eda4SAndrey Zonov 				racct_set_force(td->td_proc, RACCT_VMEM,
14577e19eda4SAndrey Zonov 				    map->size);
14587e19eda4SAndrey Zonov 				PROC_UNLOCK(td->td_proc);
14597e19eda4SAndrey Zonov 				return (ENOMEM);
14607e19eda4SAndrey Zonov 			}
14617e19eda4SAndrey Zonov 			error = racct_set(td->td_proc, RACCT_MEMLOCK,
14623ac7d297SAndrey Zonov 			    ptoa(pmap_wired_count(map->pmap)) + size);
14637e19eda4SAndrey Zonov 			if (error != 0) {
14647e19eda4SAndrey Zonov 				racct_set_force(td->td_proc, RACCT_VMEM,
14657e19eda4SAndrey Zonov 				    map->size);
14667e19eda4SAndrey Zonov 				PROC_UNLOCK(td->td_proc);
14677e19eda4SAndrey Zonov 				return (error);
14687e19eda4SAndrey Zonov 			}
14697e19eda4SAndrey Zonov 		}
147091d5354aSJohn Baldwin 		PROC_UNLOCK(td->td_proc);
1471a6492969SAlan Cox 	}
1472070f64feSMatthew Dillon 
1473df8bae1dSRodney W. Grimes 	/*
1474bc9ad247SDavid Greenman 	 * We currently can only deal with page aligned file offsets.
14757077c426SJohn Baldwin 	 * The mmap() system call already enforces this by subtracting
14767077c426SJohn Baldwin 	 * the page offset from the file offset, but checking here
14777077c426SJohn Baldwin 	 * catches errors in device drivers (e.g. d_single_mmap()
14787077c426SJohn Baldwin 	 * callbacks) and other internal mapping requests (such as in
14797077c426SJohn Baldwin 	 * exec).
1480bc9ad247SDavid Greenman 	 */
1481bc9ad247SDavid Greenman 	if (foff & PAGE_MASK)
1482bc9ad247SDavid Greenman 		return (EINVAL);
1483bc9ad247SDavid Greenman 
148406cb7259SDavid Greenman 	if ((flags & MAP_FIXED) == 0) {
148506cb7259SDavid Greenman 		fitit = TRUE;
148606cb7259SDavid Greenman 		*addr = round_page(*addr);
148706cb7259SDavid Greenman 	} else {
148806cb7259SDavid Greenman 		if (*addr != trunc_page(*addr))
148906cb7259SDavid Greenman 			return (EINVAL);
149006cb7259SDavid Greenman 		fitit = FALSE;
149106cb7259SDavid Greenman 	}
149284110e7eSKonstantin Belousov 
14935f55e841SDavid Greenman 	if (flags & MAP_ANON) {
14947077c426SJohn Baldwin 		if (object != NULL || foff != 0)
14957077c426SJohn Baldwin 			return (EINVAL);
1496c8daea13SAlexander Kabaev 		docow = 0;
149774ffb9afSAlan Cox 	} else if (flags & MAP_PREFAULT_READ)
149874ffb9afSAlan Cox 		docow = MAP_PREFAULT;
149974ffb9afSAlan Cox 	else
15004738fa09SAlan Cox 		docow = MAP_PREFAULT_PARTIAL;
1501df8bae1dSRodney W. Grimes 
15024f79d873SMatthew Dillon 	if ((flags & (MAP_ANON|MAP_SHARED)) == 0)
15034738fa09SAlan Cox 		docow |= MAP_COPY_ON_WRITE;
15044f79d873SMatthew Dillon 	if (flags & MAP_NOSYNC)
15054f79d873SMatthew Dillon 		docow |= MAP_DISABLE_SYNCER;
15069730a5daSPaul Saab 	if (flags & MAP_NOCORE)
15079730a5daSPaul Saab 		docow |= MAP_DISABLE_COREDUMP;
15088211bd45SKonstantin Belousov 	/* Shared memory is also shared with children. */
15098211bd45SKonstantin Belousov 	if (flags & MAP_SHARED)
15108211bd45SKonstantin Belousov 		docow |= MAP_INHERIT_SHARE;
151184110e7eSKonstantin Belousov 	if (writecounted)
151284110e7eSKonstantin Belousov 		docow |= MAP_VN_WRITECOUNT;
15134648ba0aSKonstantin Belousov 	if (flags & MAP_STACK) {
15144648ba0aSKonstantin Belousov 		if (object != NULL)
15154648ba0aSKonstantin Belousov 			return (EINVAL);
15164648ba0aSKonstantin Belousov 		docow |= MAP_STACK_GROWS_DOWN;
15174648ba0aSKonstantin Belousov 	}
151811c42bccSKonstantin Belousov 	if ((flags & MAP_EXCL) != 0)
151911c42bccSKonstantin Belousov 		docow |= MAP_CHECK_EXCL;
152019bd0d9cSKonstantin Belousov 	if ((flags & MAP_GUARD) != 0)
152119bd0d9cSKonstantin Belousov 		docow |= MAP_CREATE_GUARD;
15225850152dSJohn Dyson 
15234648ba0aSKonstantin Belousov 	if (fitit) {
15245aa60b6fSJohn Baldwin 		if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER)
15255aa60b6fSJohn Baldwin 			findspace = VMFS_SUPER_SPACE;
15265aa60b6fSJohn Baldwin 		else if ((flags & MAP_ALIGNMENT_MASK) != 0)
15275aa60b6fSJohn Baldwin 			findspace = VMFS_ALIGNED_SPACE(flags >>
15285aa60b6fSJohn Baldwin 			    MAP_ALIGNMENT_SHIFT);
15292267af78SJulian Elischer 		else
15305aa60b6fSJohn Baldwin 			findspace = VMFS_OPTIMAL_SPACE;
15316a97a3f7SKonstantin Belousov 		max_addr = 0;
1532edb572a3SJohn Baldwin #ifdef MAP_32BIT
15336a97a3f7SKonstantin Belousov 		if ((flags & MAP_32BIT) != 0)
15346a97a3f7SKonstantin Belousov 			max_addr = MAP_32BIT_MAX_ADDR;
1535edb572a3SJohn Baldwin #endif
15366a97a3f7SKonstantin Belousov 		if (curmap) {
15376a97a3f7SKonstantin Belousov 			rv = vm_map_find_min(map, object, foff, addr, size,
15386a97a3f7SKonstantin Belousov 			    round_page((vm_offset_t)td->td_proc->p_vmspace->
15396a97a3f7SKonstantin Belousov 			    vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr,
15406a97a3f7SKonstantin Belousov 			    findspace, prot, maxprot, docow);
15416a97a3f7SKonstantin Belousov 		} else {
15426a97a3f7SKonstantin Belousov 			rv = vm_map_find(map, object, foff, addr, size,
15436a97a3f7SKonstantin Belousov 			    max_addr, findspace, prot, maxprot, docow);
15446a97a3f7SKonstantin Belousov 		}
15454648ba0aSKonstantin Belousov 	} else {
1546b8ca4ef2SAlan Cox 		rv = vm_map_fixed(map, object, foff, *addr, size,
1547bd7e5f99SJohn Dyson 		    prot, maxprot, docow);
15484648ba0aSKonstantin Belousov 	}
1549bd7e5f99SJohn Dyson 
1550f9230ad6SAlan Cox 	if (rv == KERN_SUCCESS) {
15517fb0c17eSDavid Greenman 		/*
1552f9230ad6SAlan Cox 		 * If the process has requested that all future mappings
1553f9230ad6SAlan Cox 		 * be wired, then heed this.
1554f9230ad6SAlan Cox 		 */
15551472f4f4SKonstantin Belousov 		if (map->flags & MAP_WIREFUTURE) {
1556f9230ad6SAlan Cox 			vm_map_wire(map, *addr, *addr + size,
15571472f4f4SKonstantin Belousov 			    VM_MAP_WIRE_USER | ((flags & MAP_STACK) ?
15581472f4f4SKonstantin Belousov 			    VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES));
15591472f4f4SKonstantin Belousov 		}
1560df8bae1dSRodney W. Grimes 	}
15612e32165cSKonstantin Belousov 	return (vm_mmap_to_errno(rv));
15622e32165cSKonstantin Belousov }
15632e32165cSKonstantin Belousov 
1564f9230ad6SAlan Cox /*
1565f9230ad6SAlan Cox  * Translate a Mach VM return code to zero on success or the appropriate errno
1566f9230ad6SAlan Cox  * on failure.
1567f9230ad6SAlan Cox  */
15682e32165cSKonstantin Belousov int
15692e32165cSKonstantin Belousov vm_mmap_to_errno(int rv)
15702e32165cSKonstantin Belousov {
15712e32165cSKonstantin Belousov 
1572df8bae1dSRodney W. Grimes 	switch (rv) {
1573df8bae1dSRodney W. Grimes 	case KERN_SUCCESS:
1574df8bae1dSRodney W. Grimes 		return (0);
1575df8bae1dSRodney W. Grimes 	case KERN_INVALID_ADDRESS:
1576df8bae1dSRodney W. Grimes 	case KERN_NO_SPACE:
1577df8bae1dSRodney W. Grimes 		return (ENOMEM);
1578df8bae1dSRodney W. Grimes 	case KERN_PROTECTION_FAILURE:
1579df8bae1dSRodney W. Grimes 		return (EACCES);
1580df8bae1dSRodney W. Grimes 	default:
1581df8bae1dSRodney W. Grimes 		return (EINVAL);
1582df8bae1dSRodney W. Grimes 	}
1583df8bae1dSRodney W. Grimes }
1584